Skip to content

Commit a1aba11

Browse files
feat: add rag_embedding_model_config to RagCorpus (#5430)
* feat: add rag_embedding_model_config to RagCorpus feat: add max_embedding_requests_per_min to ImportRagFilesConfig PiperOrigin-RevId: 640251019 Source-Link: googleapis/googleapis@ede5e02 Source-Link: googleapis/googleapis-gen@7bb40e0 Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWFpcGxhdGZvcm0vLk93bEJvdC55YW1sIiwiaCI6IjdiYjQwZTAxY2M2MDEzYjA2NmU4MTgyN2E4ZGQxOWFkZTZlNzFiMzkifQ== * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: sofisl <55454395+sofisl@users.noreply.github.com>
1 parent ada6ab8 commit a1aba11

27 files changed

+3076
-1
lines changed

packages/google-cloud-aiplatform/protos/google/cloud/aiplatform/v1beta1/vertex_rag_data.proto

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,48 @@ option java_package = "com.google.cloud.aiplatform.v1beta1";
2929
option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
3030
option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
3131

32+
// Config for the embedding model to use for RAG.
33+
message RagEmbeddingModelConfig {
34+
// Config representing a model hosted on Vertex Prediction Endpoint.
35+
message VertexPredictionEndpoint {
36+
// Required. The endpoint resource name.
37+
// Format:
38+
// `projects/{project}/locations/{location}/publishers/{publisher}/models/{model}`
39+
// or
40+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
41+
string endpoint = 1 [
42+
(google.api.field_behavior) = REQUIRED,
43+
(google.api.resource_reference) = {
44+
type: "aiplatform.googleapis.com/Endpoint"
45+
}
46+
];
47+
48+
// Output only. The resource name of the model that is deployed on the
49+
// endpoint. Present only when the endpoint is not a publisher model.
50+
// Pattern:
51+
// `projects/{project}/locations/{location}/models/{model}`
52+
string model = 2 [
53+
(google.api.field_behavior) = OUTPUT_ONLY,
54+
(google.api.resource_reference) = {
55+
type: "aiplatform.googleapis.com/Model"
56+
}
57+
];
58+
59+
// Output only. Version ID of the model that is deployed on the endpoint.
60+
// Present only when the endpoint is not a publisher model.
61+
string model_version_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
62+
}
63+
64+
// The model config to use.
65+
oneof model_config {
66+
// The Vertex AI Prediction Endpoint that either refers to a publisher model
67+
// or an endpoint that is hosting a 1P fine-tuned text embedding model.
68+
// Endpoints hosting non-1P fine-tuned text embedding models are
69+
// currently not supported.
70+
VertexPredictionEndpoint vertex_prediction_endpoint = 1;
71+
}
72+
}
73+
3274
// A RagCorpus is a RagFile container and a project can have multiple
3375
// RagCorpora.
3476
message RagCorpus {
@@ -50,6 +92,12 @@ message RagCorpus {
5092
// Optional. The description of the RagCorpus.
5193
string description = 3 [(google.api.field_behavior) = OPTIONAL];
5294

95+
// Optional. Immutable. The embedding model config of the RagCorpus.
96+
RagEmbeddingModelConfig rag_embedding_model_config = 6 [
97+
(google.api.field_behavior) = OPTIONAL,
98+
(google.api.field_behavior) = IMMUTABLE
99+
];
100+
53101
// Output only. Timestamp when this RagCorpus was created.
54102
google.protobuf.Timestamp create_time = 4
55103
[(google.api.field_behavior) = OUTPUT_ONLY];
@@ -155,4 +203,12 @@ message ImportRagFilesConfig {
155203

156204
// Specifies the size and overlap of chunks after importing RagFiles.
157205
RagFileChunkingConfig rag_file_chunking_config = 4;
206+
207+
// Optional. The max number of queries per minute that this job is allowed to
208+
// make to the embedding model specified on the corpus. This value is specific
209+
// to this job and not shared across other import jobs. Consult the Quotas
210+
// page on the project to set an appropriate value here.
211+
// If unspecified, a default value of 1,000 QPM would be used.
212+
int32 max_embedding_requests_per_min = 5
213+
[(google.api.field_behavior) = OPTIONAL];
158214
}

packages/google-cloud-aiplatform/protos/protos.d.ts

Lines changed: 224 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)