Skip to content

Commit 7f62014

Browse files
Google APIscopybara-github
authored andcommitted
feat: A new field predefined_metric_spec is added to message .google.cloud.aiplatform.v1beta1.Metric
feat: A new field `computation_based_metric_spec` is added to message `.google.cloud.aiplatform.v1beta1.Metric` feat: A new field `llm_based_metric_spec` is added to message `.google.cloud.aiplatform.v1beta1.Metric` feat: A new field `metric_results` is added to message `.google.cloud.aiplatform.v1beta1.EvaluateInstancesResponse` feat: A new message `MetricResult` is added feat: A new message `PredefinedMetricSpec` is added feat: A new message `ComputationBasedMetricSpec` is added feat: A new message `LLMBasedMetricSpec` is added docs: A comment for message `EvaluateDatasetResponse` is changed docs: A comment for field `aggregation_output` in message `.google.cloud.aiplatform.v1beta1.EvaluateDatasetResponse` is changed docs: A comment for field `output_info` in message `.google.cloud.aiplatform.v1beta1.EvaluateDatasetResponse` is changed docs: A comment for message `OutputInfo` is changed docs: A comment for message `Metric` is changed docs: A comment for enum `AggregationMetric` is changed PiperOrigin-RevId: 867799029
1 parent 21faf44 commit 7f62014

File tree

1 file changed

+109
-8
lines changed

1 file changed

+109
-8
lines changed

google/cloud/aiplatform/v1beta1/evaluation_service.proto

Lines changed: 109 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025 Google LLC
1+
// Copyright 2026 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -24,6 +24,8 @@ import "google/cloud/aiplatform/v1beta1/content.proto";
2424
import "google/cloud/aiplatform/v1beta1/io.proto";
2525
import "google/cloud/aiplatform/v1beta1/operation.proto";
2626
import "google/longrunning/operations.proto";
27+
import "google/protobuf/struct.proto";
28+
import "google/rpc/status.proto";
2729

2830
option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
2931
option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb";
@@ -45,6 +47,7 @@ service EvaluationService {
4547
option (google.api.http) = {
4648
post: "/v1beta1/{location=projects/*/locations/*}:evaluateInstances"
4749
body: "*"
50+
additional_bindings { post: "/v1beta1:evaluateInstances" body: "*" }
4851
};
4952
}
5053

@@ -54,6 +57,7 @@ service EvaluationService {
5457
option (google.api.http) = {
5558
post: "/v1beta1/{location=projects/*/locations/*}:evaluateDataset"
5659
body: "*"
60+
additional_bindings { post: "/v1beta1:evaluateDataset" body: "*" }
5761
};
5862
option (google.longrunning.operation_info) = {
5963
response_type: "EvaluateDatasetResponse"
@@ -83,18 +87,18 @@ message EvaluateDatasetOperationMetadata {
8387
GenericOperationMetadata generic_metadata = 1;
8488
}
8589

86-
// Response in LRO for EvaluationService.EvaluateDataset.
90+
// The results from an evaluation run performed by the EvaluationService.
8791
message EvaluateDatasetResponse {
8892
// Output only. Aggregation statistics derived from results of
89-
// EvaluationService.EvaluateDataset.
93+
// EvaluationService.
9094
AggregationOutput aggregation_output = 1
9195
[(google.api.field_behavior) = OUTPUT_ONLY];
9296

93-
// Output only. Output info for EvaluationService.EvaluateDataset.
97+
// Output only. Output info for EvaluationService.
9498
OutputInfo output_info = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
9599
}
96100

97-
// Describes the info for output of EvaluationService.EvaluateDataset.
101+
// Describes the info for output of EvaluationService.
98102
message OutputInfo {
99103
// The output location into which evaluation output is written.
100104
oneof output_location {
@@ -172,9 +176,10 @@ message OutputConfig {
172176
}
173177
}
174178

175-
// The metric used for dataset level evaluation.
179+
// The metric used for running evaluations.
176180
message Metric {
177-
// The aggregation metrics supported by EvaluationService.EvaluateDataset.
181+
// The per-metric statistics on evaluation results supported by
182+
// `EvaluationService.EvaluateDataset`.
178183
enum AggregationMetric {
179184
// Unspecified aggregation metric.
180185
AGGREGATION_METRIC_UNSPECIFIED = 0;
@@ -210,8 +215,18 @@ message Metric {
210215
PERCENTILE_P99 = 10;
211216
}
212217

213-
// The metric spec used for evaluation.
218+
// The spec for the metric.
219+
// It would be either a pre-defined metric, or a inline metric spec.
214220
oneof metric_spec {
221+
// The spec for a pre-defined metric.
222+
PredefinedMetricSpec predefined_metric_spec = 8;
223+
224+
// Spec for a computation based metric.
225+
ComputationBasedMetricSpec computation_based_metric_spec = 9;
226+
227+
// Spec for an LLM based metric.
228+
LLMBasedMetricSpec llm_based_metric_spec = 10;
229+
215230
// Spec for pointwise metric.
216231
PointwiseMetricSpec pointwise_metric_spec = 2;
217232

@@ -511,6 +526,92 @@ message EvaluateInstancesResponse {
511526
RubricBasedInstructionFollowingResult
512527
rubric_based_instruction_following_result = 38;
513528
}
529+
530+
// Metric results for each instance.
531+
// The order of the metric results is guaranteed to be the same as the order
532+
// of the instances in the request.
533+
repeated MetricResult metric_results = 43;
534+
}
535+
536+
// Result for a single metric on a single instance.
537+
message MetricResult {
538+
// Output only. The score for the metric.
539+
// Please refer to each metric's documentation for the meaning of the score.
540+
optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
541+
542+
// Output only. The explanation for the metric result.
543+
optional string explanation = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
544+
545+
// Output only. The error status for the metric result.
546+
optional google.rpc.Status error = 4
547+
[(google.api.field_behavior) = OUTPUT_ONLY];
548+
}
549+
550+
// The spec for a pre-defined metric.
551+
message PredefinedMetricSpec {
552+
// Required. The name of a pre-defined metric, such as
553+
// "instruction_following_v1" or "text_quality_v1".
554+
string metric_spec_name = 1 [(google.api.field_behavior) = REQUIRED];
555+
556+
// Optional. The parameters needed to run the pre-defined metric.
557+
google.protobuf.Struct metric_spec_parameters = 2
558+
[(google.api.field_behavior) = OPTIONAL];
559+
}
560+
561+
// Specification for a computation based metric.
562+
message ComputationBasedMetricSpec {
563+
// Types of computation based metrics.
564+
enum ComputationBasedMetricType {
565+
// Unspecified computation based metric type.
566+
COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED = 0;
567+
568+
// Exact match metric.
569+
EXACT_MATCH = 1;
570+
571+
// BLEU metric.
572+
BLEU = 2;
573+
574+
// ROUGE metric.
575+
ROUGE = 3;
576+
}
577+
578+
// Required. The type of the computation based metric.
579+
optional ComputationBasedMetricType type = 1
580+
[(google.api.field_behavior) = REQUIRED];
581+
582+
// Optional. A map of parameters for the metric, e.g. {"rouge_type":
583+
// "rougeL"}.
584+
optional google.protobuf.Struct parameters = 2
585+
[(google.api.field_behavior) = OPTIONAL];
586+
}
587+
588+
// Specification for an LLM based metric.
589+
message LLMBasedMetricSpec {
590+
// Source of the rubrics to be used for evaluation.
591+
oneof rubrics_source {
592+
// Use a pre-defined group of rubrics associated with the input.
593+
// Refers to a key in the rubric_groups map of EvaluationInstance.
594+
string rubric_group_key = 4;
595+
596+
// Dynamically generate rubrics using a predefined spec.
597+
PredefinedMetricSpec predefined_rubric_generation_spec = 6;
598+
}
599+
600+
// Required. Template for the prompt sent to the judge model.
601+
optional string metric_prompt_template = 1
602+
[(google.api.field_behavior) = REQUIRED];
603+
604+
// Optional. System instructions for the judge model.
605+
optional string system_instruction = 2
606+
[(google.api.field_behavior) = OPTIONAL];
607+
608+
// Optional. Optional configuration for the judge LLM (Autorater).
609+
optional AutoraterConfig judge_autorater_config = 3
610+
[(google.api.field_behavior) = OPTIONAL];
611+
612+
// Optional. Optional additional configuration for the metric.
613+
optional google.protobuf.Struct additional_config = 7
614+
[(google.api.field_behavior) = OPTIONAL];
514615
}
515616

516617
// Input for exact match metric.

0 commit comments

Comments
 (0)