Skip to content

Commit 79a8512

Browse files
Google APIscopybara-github
authored andcommitted
feat: expose RecommendSpecs api to vertex python SDK for Custom Weights Model deployment
PiperOrigin-RevId: 793835739
1 parent 939901a commit 79a8512

File tree

1 file changed

+90
-0
lines changed

1 file changed

+90
-0
lines changed

google/cloud/aiplatform/v1beta1/model_service.proto

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import "google/cloud/aiplatform/v1beta1/encryption_spec.proto";
2424
import "google/cloud/aiplatform/v1beta1/evaluated_annotation.proto";
2525
import "google/cloud/aiplatform/v1beta1/explanation.proto";
2626
import "google/cloud/aiplatform/v1beta1/io.proto";
27+
import "google/cloud/aiplatform/v1beta1/machine_resources.proto";
2728
import "google/cloud/aiplatform/v1beta1/model.proto";
2829
import "google/cloud/aiplatform/v1beta1/model_evaluation.proto";
2930
import "google/cloud/aiplatform/v1beta1/model_evaluation_slice.proto";
@@ -261,6 +262,14 @@ service ModelService {
261262
};
262263
option (google.api.method_signature) = "parent";
263264
}
265+
266+
// Gets a Model's spec recommendations.
267+
rpc RecommendSpec(RecommendSpecRequest) returns (RecommendSpecResponse) {
268+
option (google.api.http) = {
269+
post: "/v1beta1/{parent=projects/*/locations/*}:recommendSpec"
270+
body: "*"
271+
};
272+
}
264273
}
265274

266275
// Request message for
@@ -966,3 +975,84 @@ message ListModelEvaluationSlicesResponse {
966975
// to obtain that page.
967976
string next_page_token = 2;
968977
}
978+
979+
// Request message for
980+
// [ModelService.RecommendSpec][google.cloud.aiplatform.v1beta1.ModelService.RecommendSpec].
981+
message RecommendSpecRequest {
982+
// Required. The resource name of the Location from which to recommend specs.
983+
// The users must have permission to make a call in the project.
984+
// Format:
985+
// `projects/{project}/locations/{location}`.
986+
string parent = 2 [
987+
(google.api.field_behavior) = REQUIRED,
988+
(google.api.resource_reference) = {
989+
type: "locations.googleapis.com/Location"
990+
}
991+
];
992+
993+
// Required. The Google Cloud Storage URI of the custom model, storing weights
994+
// and config files (which can be used to infer the base model).
995+
string gcs_uri = 1 [(google.api.field_behavior) = REQUIRED];
996+
997+
// Optional. If true, check machine availability for the recommended regions.
998+
// Only return the machine spec in regions where the machine is available.
999+
bool check_machine_availability = 3 [(google.api.field_behavior) = OPTIONAL];
1000+
1001+
// Optional. If true, check user quota for the recommended regions.
1002+
// Returns all the machine spec in regions they are available, and also the
1003+
// user quota state for each machine type in each region.
1004+
bool check_user_quota = 4 [(google.api.field_behavior) = OPTIONAL];
1005+
}
1006+
1007+
// Response message for
1008+
// [ModelService.RecommendSpec][google.cloud.aiplatform.v1beta1.ModelService.RecommendSpec].
1009+
message RecommendSpecResponse {
1010+
// A machine and model container spec.
1011+
message MachineAndModelContainerSpec {
1012+
// Output only. The machine spec.
1013+
MachineSpec machine_spec = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1014+
1015+
// Output only. The model container spec.
1016+
ModelContainerSpec container_spec = 2
1017+
[(google.api.field_behavior) = OUTPUT_ONLY];
1018+
}
1019+
1020+
// Recommendation of one deployment option for the given custom weights model
1021+
// in one region.
1022+
// Contains the machine and container spec, and user accelerator quota state.
1023+
message Recommendation {
1024+
// The user accelerator quota state.
1025+
enum QuotaState {
1026+
// Unspecified quota state. Quota information not available.
1027+
QUOTA_STATE_UNSPECIFIED = 0;
1028+
1029+
// User has enough accelerator quota for the machine type.
1030+
QUOTA_STATE_USER_HAS_QUOTA = 1;
1031+
1032+
// User does not have enough accelerator quota for the machine type.
1033+
QUOTA_STATE_NO_USER_QUOTA = 2;
1034+
}
1035+
1036+
// The region for the deployment spec (machine).
1037+
string region = 1;
1038+
1039+
// Output only. The machine and model container specs.
1040+
MachineAndModelContainerSpec spec = 2
1041+
[(google.api.field_behavior) = OUTPUT_ONLY];
1042+
1043+
// Output only. The user accelerator quota state.
1044+
QuotaState user_quota_state = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
1045+
}
1046+
1047+
// Output only. The base model used to finetune the custom model.
1048+
string base_model = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1049+
1050+
// Output only. Recommendations of deployment options for the given custom
1051+
// weights model.
1052+
repeated Recommendation recommendations = 3
1053+
[(google.api.field_behavior) = OUTPUT_ONLY];
1054+
1055+
// Output only. The machine and model container specs.
1056+
repeated MachineAndModelContainerSpec specs = 2
1057+
[(google.api.field_behavior) = OUTPUT_ONLY];
1058+
}

0 commit comments

Comments
 (0)