@@ -779,19 +779,20 @@ message ExplicitDecodingConfig {
779779
780780// Configuration to enable speaker diarization.
781781message SpeakerDiarizationConfig {
782- // Required. Minimum number of speakers in the conversation. This range gives
783- // you more flexibility by allowing the system to automatically determine the
784- // correct number of speakers.
785- //
786- // To fix the number of speakers detected in the audio, set
787- // `min_speaker_count` = `max_speaker_count`.
788- int32 min_speaker_count = 2 [(google.api.field_behavior ) = REQUIRED ];
789-
790- // Required. Maximum number of speakers in the conversation. Valid values are:
791- // 1-6. Must be >= `min_speaker_count`. This range gives you more flexibility
792- // by allowing the system to automatically determine the correct number of
793- // speakers.
794- int32 max_speaker_count = 3 [(google.api.field_behavior ) = REQUIRED ];
782+ // Optional. The system automatically determines the number of speakers. This
783+ // value is not currently used.
784+ int32 min_speaker_count = 2 [(google.api.field_behavior ) = OPTIONAL ];
785+
786+ // Optional. The system automatically determines the number of speakers. This
787+ // value is not currently used.
788+ int32 max_speaker_count = 3 [(google.api.field_behavior ) = OPTIONAL ];
789+ }
790+
791+ // Configuration to enable custom prompt in chirp3.
792+ message CustomPromptConfig {
793+ // Optional. The custom instructions to override the existing instructions for
794+ // chirp3.
795+ string custom_prompt = 1 [(google.api.field_behavior ) = OPTIONAL ];
795796}
796797
797798// Available recognition features.
@@ -846,21 +847,19 @@ message RecognitionFeatures {
846847 // Mode for recognizing multi-channel audio.
847848 MultiChannelMode multi_channel_mode = 17 ;
848849
849- // Configuration to enable speaker diarization and set additional
850- // parameters to make diarization better suited for your application.
851- // When this is enabled, we send all the words from the beginning of the
852- // audio for the top alternative in every consecutive STREAMING responses.
853- // This is done in order to improve our speaker tags as our models learn to
854- // identify the speakers in the conversation over time.
855- // For non-streaming requests, the diarization results will be provided only
856- // in the top alternative of the FINAL SpeechRecognitionResult.
850+ // Configuration to enable speaker diarization. To enable diarization, set
851+ // this field to an empty SpeakerDiarizationConfig message.
857852 SpeakerDiarizationConfig diarization_config = 9 ;
858853
859854 // Maximum number of recognition hypotheses to be returned.
860855 // The server may return fewer than `max_alternatives`.
861856 // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
862857 // one. If omitted, will return a maximum of one.
863858 int32 max_alternatives = 16 ;
859+
860+ // Optional. Configuration to enable custom prompt for chirp3.
861+ CustomPromptConfig custom_prompt_config = 18
862+ [(google.api.field_behavior ) = OPTIONAL ];
864863}
865864
866865// Transcription normalization configuration. Use transcription normalization
@@ -1066,6 +1065,13 @@ message RecognitionResponseMetadata {
10661065
10671066 // When available, billed audio seconds for the corresponding request.
10681067 google.protobuf.Duration total_billed_duration = 6 ;
1068+
1069+ // Optional. Output only. Provides the prompt used for the recognition
1070+ // request.
1071+ optional string prompt = 10 [
1072+ (google.api.field_behavior ) = OUTPUT_ONLY ,
1073+ (google.api.field_behavior ) = OPTIONAL
1074+ ];
10691075}
10701076
10711077// Alternative hypotheses (a.k.a. n-best list).
0 commit comments