feat: add custom prompt config in the request and return prompt in the response

Google APIs · copybara-github · commit 1a1415300798 · 2026-01-05T11:23:08.000-08:00
feat: update min_speaker_count and max_speaker_count to be optional
docs: Clarify that min_speaker_count and max_speaker_count in SpeakerDiarizationConfig are not currently used
docs: Update guidance on how to enable speaker diarization; to enable, set the diarization_config field to an empty SpeakerDiarizationConfig message

PiperOrigin-RevId: 852383212
diff --git a/google/cloud/speech/v2/BUILD.bazel b/google/cloud/speech/v2/BUILD.bazel
@@ -347,7 +347,6 @@ load(
 
 csharp_proto_library(
     name = "speech_csharp_proto",
-    extra_opts = [],
     deps = [":speech_proto"],
 )
 
diff --git a/google/cloud/speech/v2/cloud_speech.proto b/google/cloud/speech/v2/cloud_speech.proto
@@ -779,19 +779,20 @@ message ExplicitDecodingConfig {
 
 // Configuration to enable speaker diarization.
 message SpeakerDiarizationConfig {
-  // Required. Minimum number of speakers in the conversation. This range gives
-  // you more flexibility by allowing the system to automatically determine the
-  // correct number of speakers.
-  //
-  // To fix the number of speakers detected in the audio, set
-  // `min_speaker_count` = `max_speaker_count`.
-  int32 min_speaker_count = 2 [(google.api.field_behavior) = REQUIRED];
-
-  // Required. Maximum number of speakers in the conversation. Valid values are:
-  // 1-6. Must be >= `min_speaker_count`. This range gives you more flexibility
-  // by allowing the system to automatically determine the correct number of
-  // speakers.
-  int32 max_speaker_count = 3 [(google.api.field_behavior) = REQUIRED];
+  // Optional. The system automatically determines the number of speakers. This
+  // value is not currently used.
+  int32 min_speaker_count = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The system automatically determines the number of speakers. This
+  // value is not currently used.
+  int32 max_speaker_count = 3 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// Configuration to enable custom prompt in chirp3.
+message CustomPromptConfig {
+  // Optional. The custom instructions to override the existing instructions for
+  // chirp3.
+  string custom_prompt = 1 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Available recognition features.
@@ -846,21 +847,19 @@ message RecognitionFeatures {
   // Mode for recognizing multi-channel audio.
   MultiChannelMode multi_channel_mode = 17;
 
-  // Configuration to enable speaker diarization and set additional
-  // parameters to make diarization better suited for your application.
-  // When this is enabled, we send all the words from the beginning of the
-  // audio for the top alternative in every consecutive STREAMING responses.
-  // This is done in order to improve our speaker tags as our models learn to
-  // identify the speakers in the conversation over time.
-  // For non-streaming requests, the diarization results will be provided only
-  // in the top alternative of the FINAL SpeechRecognitionResult.
+  // Configuration to enable speaker diarization. To enable diarization, set
+  // this field to an empty SpeakerDiarizationConfig message.
   SpeakerDiarizationConfig diarization_config = 9;
 
   // Maximum number of recognition hypotheses to be returned.
   // The server may return fewer than `max_alternatives`.
   // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
   // one. If omitted, will return a maximum of one.
   int32 max_alternatives = 16;
+
+  // Optional. Configuration to enable custom prompt for chirp3.
+  CustomPromptConfig custom_prompt_config = 18
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Transcription normalization configuration. Use transcription normalization
@@ -1066,6 +1065,13 @@ message RecognitionResponseMetadata {
 
   // When available, billed audio seconds for the corresponding request.
   google.protobuf.Duration total_billed_duration = 6;
+
+  // Optional. Output only. Provides the prompt used for the recognition
+  // request.
+  optional string prompt = 10 [
+    (google.api.field_behavior) = OUTPUT_ONLY,
+    (google.api.field_behavior) = OPTIONAL
+  ];
 }
 
 // Alternative hypotheses (a.k.a. n-best list).

Original file line number	Diff line number	Diff line change
`@@ -347,7 +347,6 @@ load(`
`347`	`347`
`348`	`348`	`csharp_proto_library(`
`349`	`349`	`name = "speech_csharp_proto",`
`350`		`- extra_opts = [],`
`351`	`350`	`deps = [":speech_proto"],`
`352`	`351`	`)`
`353`	`352`