feat: Add traffic_type, tool_use_prompt_tokens_details to message GenerateContentResponse.UsageMetadata

Google APIs · copybara-github · commit 7d37a81f68f8 · 2026-02-17T14:30:51.000-08:00
PiperOrigin-RevId: 871498028
diff --git a/google/cloud/aiplatform/v1/prediction_service.proto b/google/cloud/aiplatform/v1/prediction_service.proto
@@ -804,6 +804,25 @@ message GenerateContentResponse {
 
   // Usage metadata about response(s).
   message UsageMetadata {
+    // The type of traffic that this request was processed with, indicating
+    // which quota is consumed.
+    enum TrafficType {
+      // Unspecified request traffic type.
+      TRAFFIC_TYPE_UNSPECIFIED = 0;
+
+      // The request was processed using Pay-As-You-Go quota.
+      ON_DEMAND = 1;
+
+      // Type for Priority Pay-As-You-Go traffic.
+      ON_DEMAND_PRIORITY = 3;
+
+      // Type for Flex traffic.
+      ON_DEMAND_FLEX = 4;
+
+      // Type for Provisioned Throughput traffic.
+      PROVISIONED_THROUGHPUT = 2;
+    }
+
     // Number of tokens in the request. When `cached_content` is set, this is
     // still the total effective prompt size meaning this includes the number of
     // tokens in the cached content.
@@ -835,6 +854,15 @@ message GenerateContentResponse {
     // Output only. List of modalities that were returned in the response.
     repeated ModalityTokenCount candidates_tokens_details = 11
         [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. A detailed breakdown by modality of the token counts from
+    // the results of tool executions, which are provided back to the model as
+    // input.
+    repeated ModalityTokenCount tool_use_prompt_tokens_details = 12
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. The traffic type for this request.
+    TrafficType traffic_type = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
   }
 
   // Output only. Generated candidates.