1- // Copyright 2025 Google LLC
1+ // Copyright 2026 Google LLC
22//
33// Licensed under the Apache License, Version 2.0 (the "License");
44// you may not use this file except in compliance with the License.
@@ -24,6 +24,8 @@ import "google/cloud/aiplatform/v1beta1/content.proto";
2424import "google/cloud/aiplatform/v1beta1/io.proto" ;
2525import "google/cloud/aiplatform/v1beta1/operation.proto" ;
2626import "google/longrunning/operations.proto" ;
27+ import "google/protobuf/struct.proto" ;
28+ import "google/rpc/status.proto" ;
2729
2830option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1" ;
2931option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb" ;
@@ -45,6 +47,7 @@ service EvaluationService {
4547 option (google.api.http ) = {
4648 post : "/v1beta1/{location=projects/*/locations/*}:evaluateInstances"
4749 body : "*"
50+ additional_bindings { post : "/v1beta1:evaluateInstances" body : "*" }
4851 };
4952 }
5053
@@ -54,6 +57,7 @@ service EvaluationService {
5457 option (google.api.http ) = {
5558 post : "/v1beta1/{location=projects/*/locations/*}:evaluateDataset"
5659 body : "*"
60+ additional_bindings { post : "/v1beta1:evaluateDataset" body : "*" }
5761 };
5862 option (google.longrunning.operation_info ) = {
5963 response_type : "EvaluateDatasetResponse"
@@ -83,18 +87,18 @@ message EvaluateDatasetOperationMetadata {
8387 GenericOperationMetadata generic_metadata = 1 ;
8488}
8589
86- // Response in LRO for EvaluationService.EvaluateDataset .
90+ // The results from an evaluation run performed by the EvaluationService.
8791message EvaluateDatasetResponse {
8892 // Output only. Aggregation statistics derived from results of
89- // EvaluationService.EvaluateDataset.
93+ // EvaluationService.
9094 AggregationOutput aggregation_output = 1
9195 [(google.api.field_behavior ) = OUTPUT_ONLY ];
9296
93- // Output only. Output info for EvaluationService.EvaluateDataset.
97+ // Output only. Output info for EvaluationService.
9498 OutputInfo output_info = 3 [(google.api.field_behavior ) = OUTPUT_ONLY ];
9599}
96100
97- // Describes the info for output of EvaluationService.EvaluateDataset.
101+ // Describes the info for output of EvaluationService.
98102message OutputInfo {
99103 // The output location into which evaluation output is written.
100104 oneof output_location {
@@ -172,9 +176,10 @@ message OutputConfig {
172176 }
173177}
174178
175- // The metric used for dataset level evaluation .
179+ // The metric used for running evaluations .
176180message Metric {
177- // The aggregation metrics supported by EvaluationService.EvaluateDataset.
181+ // The per-metric statistics on evaluation results supported by
182+ // `EvaluationService.EvaluateDataset`.
178183 enum AggregationMetric {
179184 // Unspecified aggregation metric.
180185 AGGREGATION_METRIC_UNSPECIFIED = 0 ;
@@ -210,8 +215,18 @@ message Metric {
210215 PERCENTILE_P99 = 10 ;
211216 }
212217
213- // The metric spec used for evaluation.
218+ // The spec for the metric.
219+ // It would be either a pre-defined metric, or a inline metric spec.
214220 oneof metric_spec {
221+ // The spec for a pre-defined metric.
222+ PredefinedMetricSpec predefined_metric_spec = 8 ;
223+
224+ // Spec for a computation based metric.
225+ ComputationBasedMetricSpec computation_based_metric_spec = 9 ;
226+
227+ // Spec for an LLM based metric.
228+ LLMBasedMetricSpec llm_based_metric_spec = 10 ;
229+
215230 // Spec for pointwise metric.
216231 PointwiseMetricSpec pointwise_metric_spec = 2 ;
217232
@@ -511,6 +526,92 @@ message EvaluateInstancesResponse {
511526 RubricBasedInstructionFollowingResult
512527 rubric_based_instruction_following_result = 38;
513528 }
529+
530+ // Metric results for each instance.
531+ // The order of the metric results is guaranteed to be the same as the order
532+ // of the instances in the request.
533+ repeated MetricResult metric_results = 43 ;
534+ }
535+
536+ // Result for a single metric on a single instance.
537+ message MetricResult {
538+ // Output only. The score for the metric.
539+ // Please refer to each metric's documentation for the meaning of the score.
540+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
541+
542+ // Output only. The explanation for the metric result.
543+ optional string explanation = 3 [(google.api.field_behavior ) = OUTPUT_ONLY ];
544+
545+ // Output only. The error status for the metric result.
546+ optional google.rpc.Status error = 4
547+ [(google.api.field_behavior ) = OUTPUT_ONLY ];
548+ }
549+
550+ // The spec for a pre-defined metric.
551+ message PredefinedMetricSpec {
552+ // Required. The name of a pre-defined metric, such as
553+ // "instruction_following_v1" or "text_quality_v1".
554+ string metric_spec_name = 1 [(google.api.field_behavior ) = REQUIRED ];
555+
556+ // Optional. The parameters needed to run the pre-defined metric.
557+ google.protobuf.Struct metric_spec_parameters = 2
558+ [(google.api.field_behavior ) = OPTIONAL ];
559+ }
560+
561+ // Specification for a computation based metric.
562+ message ComputationBasedMetricSpec {
563+ // Types of computation based metrics.
564+ enum ComputationBasedMetricType {
565+ // Unspecified computation based metric type.
566+ COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED = 0 ;
567+
568+ // Exact match metric.
569+ EXACT_MATCH = 1 ;
570+
571+ // BLEU metric.
572+ BLEU = 2 ;
573+
574+ // ROUGE metric.
575+ ROUGE = 3 ;
576+ }
577+
578+ // Required. The type of the computation based metric.
579+ optional ComputationBasedMetricType type = 1
580+ [(google.api.field_behavior ) = REQUIRED ];
581+
582+ // Optional. A map of parameters for the metric, e.g. {"rouge_type":
583+ // "rougeL"}.
584+ optional google.protobuf.Struct parameters = 2
585+ [(google.api.field_behavior ) = OPTIONAL ];
586+ }
587+
588+ // Specification for an LLM based metric.
589+ message LLMBasedMetricSpec {
590+ // Source of the rubrics to be used for evaluation.
591+ oneof rubrics_source {
592+ // Use a pre-defined group of rubrics associated with the input.
593+ // Refers to a key in the rubric_groups map of EvaluationInstance.
594+ string rubric_group_key = 4 ;
595+
596+ // Dynamically generate rubrics using a predefined spec.
597+ PredefinedMetricSpec predefined_rubric_generation_spec = 6 ;
598+ }
599+
600+ // Required. Template for the prompt sent to the judge model.
601+ optional string metric_prompt_template = 1
602+ [(google.api.field_behavior ) = REQUIRED ];
603+
604+ // Optional. System instructions for the judge model.
605+ optional string system_instruction = 2
606+ [(google.api.field_behavior ) = OPTIONAL ];
607+
608+ // Optional. Optional configuration for the judge LLM (Autorater).
609+ optional AutoraterConfig judge_autorater_config = 3
610+ [(google.api.field_behavior ) = OPTIONAL ];
611+
612+ // Optional. Optional additional configuration for the metric.
613+ optional google.protobuf.Struct additional_config = 7
614+ [(google.api.field_behavior ) = OPTIONAL ];
514615}
515616
516617// Input for exact match metric.
0 commit comments