feat: add support for FunctionResponsePart

Google APIs · copybara-github · commit fd84be8a8b7b · 2025-10-15T10:43:31.000-07:00
feat: add support for raw media bytes for function response
feat: add Computer Use tool type
feat: add support for image generation features
feat: add support for additional reasons to stop image generation
feat: add URL_RETRIEVAL_STATUS_UNSAFE and URL_RETRIEVAL_STATUS_PAYWALL statuses for url retrieval
feat: add log_probability_sum
fix!: replaced GenerateVideoResponse by PredictLongRunningGeneratedVideoResponse field

PiperOrigin-RevId: 819821787
diff --git a/google/ai/generativelanguage/v1beta/content.proto b/google/ai/generativelanguage/v1beta/content.proto
@@ -141,6 +141,29 @@ message Part {
   // Optional. An opaque signature for the thought so it can be reused in
   // subsequent requests.
   bytes thought_signature = 13 [(google.api.field_behavior) = OPTIONAL];
+
+  // Custom metadata associated with the Part.
+  // Agents using genai.Part as content representation may need to keep track
+  // of the additional information. For example it can be name of a file/source
+  // from which the Part originates or a way to multiplex multiple Part streams.
+  google.protobuf.Struct part_metadata = 8;
+}
+
+// A datatype containing media that is part of a `FunctionResponse` message.
+//
+// A `FunctionResponsePart` consists of data which has an associated datatype. A
+// `FunctionResponsePart` can only contain one of the accepted types in
+// `FunctionResponsePart.data`.
+//
+// A `FunctionResponsePart` must have a fixed IANA MIME type identifying the
+// type and subtype of the media if the `inline_data` field is filled with raw
+// bytes.
+message FunctionResponsePart {
+  // The data of the function response part.
+  oneof data {
+    // Inline media bytes.
+    FunctionResponseBlob inline_data = 1;
+  }
 }
 
 // Raw media bytes.
@@ -160,6 +183,24 @@ message Blob {
   bytes data = 2;
 }
 
+// Raw media bytes for function response.
+//
+// Text should not be sent as raw bytes, use the 'FunctionResponse.response'
+// field.
+message FunctionResponseBlob {
+  // The IANA standard MIME type of the source data.
+  // Examples:
+  //   - image/png
+  //   - image/jpeg
+  // If an unsupported MIME type is provided, an error will be returned. For a
+  // complete list of supported types, see [Supported file
+  // formats](https://ai.google.dev/gemini-api/docs/prompting_with_media#supported_file_formats).
+  string mime_type = 1;
+
+  // Raw bytes for media formats.
+  bytes data = 2;
+}
+
 // URI based data.
 message FileData {
   // Optional. The IANA standard MIME type of the source data.
@@ -242,6 +283,8 @@ message CodeExecutionResult {
 // A `Tool` is a piece of code that enables the system to interact with
 // external systems to perform an action, or set of actions, outside of
 // knowledge and scope of the model.
+//
+// Next ID: 12
 message Tool {
   // GoogleSearch tool type.
   // Tool to support Google Search in Model. Powered by Google.
@@ -253,6 +296,29 @@ message Tool {
         [(google.api.field_behavior) = OPTIONAL];
   }
 
+  // Computer Use tool type.
+  message ComputerUse {
+    // Represents the environment being operated, such as a web browser.
+    enum Environment {
+      // Defaults to browser.
+      ENVIRONMENT_UNSPECIFIED = 0;
+
+      // Operates in a web browser.
+      ENVIRONMENT_BROWSER = 1;
+    }
+
+    // Required. The environment being operated.
+    Environment environment = 3 [(google.api.field_behavior) = REQUIRED];
+
+    // Optional. By default, predefined functions are included in the final
+    // model call. Some of them can be explicitly excluded from being
+    // automatically included. This can serve two purposes:
+    // 1. Using a more restricted / different action space.
+    // 2. Improving the definitions / instructions of predefined functions.
+    repeated string excluded_predefined_functions = 5
+        [(google.api.field_behavior) = OPTIONAL];
+  }
+
   // Optional. A list of `FunctionDeclarations` available to the model that can
   // be used for function calling.
   //
@@ -280,6 +346,11 @@ message Tool {
   // Tool to support Google Search in Model. Powered by Google.
   GoogleSearch google_search = 4 [(google.api.field_behavior) = OPTIONAL];
 
+  // Optional. Tool to support the model interacting directly with the computer.
+  // If enabled, it automatically populates computer-use specific Function
+  // Declarations.
+  ComputerUse computer_use = 6 [(google.api.field_behavior) = OPTIONAL];
+
   // Optional. Tool to support URL context retrieval.
   UrlContext url_context = 8 [(google.api.field_behavior) = OPTIONAL];
 }
@@ -352,6 +423,9 @@ message FunctionCallingConfig {
     // Model decides to predict either a function call
     // or a natural language response, but will validate function calls with
     // constrained decoding.
+    // If "allowed_function_names" are set, the predicted function call will be
+    // limited to any one of "allowed_function_names", else the predicted
+    // function call will be any one of the provided "function_declarations".
     VALIDATED = 4;
   }
 
@@ -362,9 +436,9 @@ message FunctionCallingConfig {
   // Optional. A set of function names that, when provided, limits the functions
   // the model will call.
   //
-  // This should only be set when the Mode is ANY. Function names
-  // should match [FunctionDeclaration.name]. With mode set to ANY, model will
-  // predict a function call from the set of function names provided.
+  // This should only be set when the Mode is ANY or VALIDATED. Function names
+  // should match [FunctionDeclaration.name]. When set, model will
+  // predict a function call from only allowed function names.
   repeated string allowed_function_names = 2
       [(google.api.field_behavior) = OPTIONAL];
 }
@@ -392,8 +466,8 @@ message FunctionDeclaration {
   }
 
   // Required. The name of the function.
-  // Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum
-  // length of 63.
+  // Must be a-z, A-Z, 0-9, or contain underscores, colons, dots, and dashes,
+  // with a maximum length of 64.
   string name = 1 [(google.api.field_behavior) = REQUIRED];
 
   // Required. A brief description of the function.
@@ -453,7 +527,7 @@ message FunctionCall {
 
   // Required. The name of the function to call.
   // Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum
-  // length of 63.
+  // length of 64.
   string name = 1 [(google.api.field_behavior) = REQUIRED];
 
   // Optional. The function parameters and values in JSON object format.
@@ -491,12 +565,21 @@ message FunctionResponse {
 
   // Required. The name of the function to call.
   // Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum
-  // length of 63.
+  // length of 64.
   string name = 1 [(google.api.field_behavior) = REQUIRED];
 
   // Required. The function response in JSON object format.
+  // Callers can use any keys of their choice that fit the function's syntax
+  // to return the function output, e.g. "output", "result", etc.
+  // In particular, if the function call failed to execute, the response can
+  // have an "error" key to return error details to the model.
   google.protobuf.Struct response = 2 [(google.api.field_behavior) = REQUIRED];
 
+  // Optional. Ordered `Parts` that constitute a function response. Parts may
+  // have different IANA MIME types.
+  repeated FunctionResponsePart parts = 8
+      [(google.api.field_behavior) = OPTIONAL];
+
   // Optional. Signals that function call continues, and more responses will be
   // returned, turning the function call into a generator.
   // Is only applicable to NON_BLOCKING function calls, is ignored otherwise.
@@ -521,11 +604,8 @@ message Schema {
   // Required. Data type.
   Type type = 1 [(google.api.field_behavior) = REQUIRED];
 
-  // Optional. The format of the data. This is used only for primitive
-  // datatypes. Supported formats:
-  //  for NUMBER type: float, double
-  //  for INTEGER type: int32, int64
-  //  for STRING type: enum, date-time
+  // Optional. The format of the data. Any value is allowed, but most do not
+  // trigger any special functionality.
   string format = 2 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. The title of the schema.
diff --git a/google/ai/generativelanguage/v1beta/file.proto b/google/ai/generativelanguage/v1beta/file.proto
@@ -61,6 +61,9 @@ message File {
 
     // Indicates the file is generated by Google.
     GENERATED = 2;
+
+    // Indicates the file is a registered, i.e. a Google Cloud Storage file.
+    REGISTERED = 3;
   }
 
   // Metadata for the File.
diff --git a/google/ai/generativelanguage/v1beta/generative_service.proto b/google/ai/generativelanguage/v1beta/generative_service.proto
@@ -163,6 +163,7 @@ enum TaskType {
 }
 
 // Request to generate a completion from the model.
+// NEXT ID: 18
 message GenerateContentRequest {
   // Required. The name of the `Model` to use for generating the completion.
   //
@@ -303,8 +304,19 @@ message ThinkingConfig {
   optional int32 thinking_budget = 2;
 }
 
+// Config for image generation features.
+message ImageConfig {
+  // Optional. The aspect ratio of the image to generate. Supported aspect
+  // ratios: 1:1, 2:3, 3:2, 3:4, 4:3, 9:16, 16:9, 21:9.
+  //
+  // If not specified, the model will choose a default aspect ratio based on any
+  // reference images provided.
+  optional string aspect_ratio = 1 [(google.api.field_behavior) = OPTIONAL];
+}
+
 // Configuration options for model generation and outputs. Not all parameters
 // are configurable for every model.
+// Next ID: 29
 message GenerationConfig {
   // Supported modalities of the response.
   enum Modality {
@@ -450,8 +462,17 @@ message GenerationConfig {
   // be used within non-required properties. (Nullable properties are not
   // sufficient.) If `$ref` is set on a sub-schema, no other properties, except
   // for than those starting as a `$`, may be set.
-  google.protobuf.Value response_json_schema = 24
-      [(google.api.field_behavior) = OPTIONAL];
+  google.protobuf.Value response_json_schema = 24 [
+    json_name = "_responseJsonSchema",
+    (google.api.field_behavior) = OPTIONAL
+  ];
+
+  // Optional. An internal detail. Use `responseJsonSchema` rather than this
+  // field.
+  google.protobuf.Value response_json_schema_ordered = 28 [
+    json_name = "responseJsonSchema",
+    (google.api.field_behavior) = OPTIONAL
+  ];
 
   // Optional. Presence penalty applied to the next token's logprobs if the
   // token has already been seen in the response.
@@ -494,6 +515,7 @@ message GenerationConfig {
   // [response_logprobs=True][google.ai.generativelanguage.v1beta.GenerationConfig.response_logprobs].
   // This sets the number of top logprobs to return at each decoding step in the
   // [Candidate.logprobs_result][google.ai.generativelanguage.v1beta.Candidate.logprobs_result].
+  // The number must be in the range of [0, 20].
   optional int32 logprobs = 18 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. Enables enhanced civic answers. It may not be available for all
@@ -523,6 +545,12 @@ message GenerationConfig {
   optional ThinkingConfig thinking_config = 22
       [(google.api.field_behavior) = OPTIONAL];
 
+  // Optional. Config for image generation.
+  // An error will be returned if this field is set for models that don't
+  // support these config options.
+  optional ImageConfig image_config = 27
+      [(google.api.field_behavior) = OPTIONAL];
+
   // Optional. If specified, the media resolution specified will be used.
   optional MediaResolution media_resolution = 23
       [(google.api.field_behavior) = OPTIONAL];
@@ -700,8 +728,25 @@ message Candidate {
     // violations.
     IMAGE_SAFETY = 11;
 
+    // Image generation stopped because generated images has other prohibited
+    // content.
+    IMAGE_PROHIBITED_CONTENT = 14;
+
+    // Image generation stopped because of other miscellaneous issue.
+    IMAGE_OTHER = 15;
+
+    // The model was expected to generate an image, but none was generated.
+    NO_IMAGE = 16;
+
+    // Image generation stopped due to recitation.
+    IMAGE_RECITATION = 17;
+
     // Model generated a tool call but no tools were enabled in the request.
     UNEXPECTED_TOOL_CALL = 12;
+
+    // Model called too many tools consecutively, thus the system exited
+    // execution.
+    TOO_MANY_TOOL_CALLS = 13;
   }
 
   // Output only. Index of the candidate in the list of response candidates.
@@ -718,6 +763,13 @@ message Candidate {
     (google.api.field_behavior) = OUTPUT_ONLY
   ];
 
+  // Optional. Output only. Details the reason why the model stopped generating
+  // tokens. This is populated only when `finish_reason` is set.
+  optional string finish_message = 4 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = OUTPUT_ONLY
+  ];
+
   // List of ratings for the safety of a response candidate.
   //
   // There is at most one rating per category.
@@ -777,6 +829,12 @@ message UrlMetadata {
 
     // Url retrieval is failed due to error.
     URL_RETRIEVAL_STATUS_ERROR = 2;
+
+    // Url retrieval is failed because the content is behind paywall.
+    URL_RETRIEVAL_STATUS_PAYWALL = 3;
+
+    // Url retrieval is failed because the content is unsafe.
+    URL_RETRIEVAL_STATUS_UNSAFE = 4;
   }
 
   // Retrieved url by the tool.
@@ -806,6 +864,9 @@ message LogprobsResult {
     repeated Candidate candidates = 1;
   }
 
+  // Sum of log probabilities for all tokens.
+  optional float log_probability_sum = 3;
+
   // Length = total number of decoding steps.
   repeated TopCandidates top_candidates = 1;
 
@@ -1552,8 +1613,7 @@ message BidiGenerateContentToolResponse {
 message BidiGenerateContentClientMessage {
   // The type of the message.
   oneof message_type {
-    // Optional. Session configuration sent in the first and only first client
-    // message.
+    // Optional. Session configuration sent only in the first client message.
     BidiGenerateContentSetup setup = 1 [(google.api.field_behavior) = OPTIONAL];
 
     // Optional. Incremental update of the current conversation delivered from
@@ -1625,6 +1685,11 @@ message BidiGenerateContentServerContent {
 
   UrlContextMetadata url_context_metadata = 9
       [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. If true, indicates that the model is not generating content
+  // because it is waiting for more input from the user, e.g. because it expects
+  // the user to continue talking.
+  bool waiting_for_input = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Request for the client to execute the `function_calls` and return the
diff --git a/google/ai/generativelanguage/v1beta/prediction_service.proto b/google/ai/generativelanguage/v1beta/prediction_service.proto
@@ -106,7 +106,7 @@ message PredictLongRunningResponse {
   // The response of the long running operation.
   oneof response {
     // The response of the video generation prediction.
-    GenerateVideoResponse generate_video_response = 1;
+    PredictLongRunningGeneratedVideoResponse generate_video_response = 1;
   }
 }
 
@@ -135,7 +135,7 @@ message Video {
 }
 
 // Veo response.
-message GenerateVideoResponse {
+message PredictLongRunningGeneratedVideoResponse {
   // The generated samples.
   repeated Media generated_samples = 1;
 
diff --git a/google/ai/generativelanguage/v1beta/safety.proto b/google/ai/generativelanguage/v1beta/safety.proto
@@ -65,7 +65,8 @@ enum HarmCategory {
   HARM_CATEGORY_DANGEROUS_CONTENT = 10;
 
   // **Gemini** - Content that may be used to harm civic integrity.
-  HARM_CATEGORY_CIVIC_INTEGRITY = 11;
+  // DEPRECATED: use enable_enhanced_civic_answers instead.
+  HARM_CATEGORY_CIVIC_INTEGRITY = 11 [deprecated = true];
 }
 
 // Content filtering metadata associated with processing a single request.

Original file line number	Diff line number	Diff line change
`@@ -61,6 +61,9 @@ message File {`
`61`	`61`
`62`	`62`	`// Indicates the file is generated by Google.`
`63`	`63`	`GENERATED = 2;`
	`64`	`+`
	`65`	`+ // Indicates the file is a registered, i.e. a Google Cloud Storage file.`
	`66`	`+ REGISTERED = 3;`
`64`	`67`	`}`
`65`	`68`
`66`	`69`	`// Metadata for the File.`
Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,7 @@ message PredictLongRunningResponse {`
`106`	`106`	`// The response of the long running operation.`
`107`	`107`	`oneof response {`
`108`	`108`	`// The response of the video generation prediction.`
`109`		`- GenerateVideoResponse generate_video_response = 1;`
	`109`	`+ PredictLongRunningGeneratedVideoResponse generate_video_response = 1;`
`110`	`110`	`}`
`111`	`111`	`}`
`112`	`112`
`@@ -135,7 +135,7 @@ message Video {`
`135`	`135`	`}`
`136`	`136`
`137`	`137`	`// Veo response.`
`138`		`-message GenerateVideoResponse {`
	`138`	`+message PredictLongRunningGeneratedVideoResponse {`
`139`	`139`	`// The generated samples.`
`140`	`140`	`repeated Media generated_samples = 1;`
`141`	`141`
Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,8 @@ enum HarmCategory {`
`65`	`65`	`HARM_CATEGORY_DANGEROUS_CONTENT = 10;`
`66`	`66`
`67`	`67`	`// Gemini - Content that may be used to harm civic integrity.`
`68`		`- HARM_CATEGORY_CIVIC_INTEGRITY = 11;`
	`68`	`+ // DEPRECATED: use enable_enhanced_civic_answers instead.`
	`69`	`+ HARM_CATEGORY_CIVIC_INTEGRITY = 11 [deprecated = true];`
`69`	`70`	`}`
`70`	`71`
`71`	`72`	`// Content filtering metadata associated with processing a single request.`