feat: Support promptable voices by specifying a model name and a prompt

Google APIs · copybara-github · commit a92cee399e0f · 2025-08-26T08:23:26.000-07:00
feat: Add enum value M4A to enum AudioEncoding
docs: A comment for enum value `AUDIO_ENCODING_UNSPECIFIED` in enum `AudioEncoding` is changed

PiperOrigin-RevId: 799573824
diff --git a/google/cloud/texttospeech/v1/cloud_tts.proto b/google/cloud/texttospeech/v1/cloud_tts.proto
@@ -88,7 +88,8 @@ enum SsmlVoiceGender {
 // Configuration to set up audio encoder. The encoding determines the output
 // audio format that we'd like.
 enum AudioEncoding {
-  // Not specified. Will return result
+  // Not specified. Only used by GenerateVoiceCloningKey. Otherwise, will return
+  // result
   // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
   AUDIO_ENCODING_UNSPECIFIED = 0;
 
@@ -117,6 +118,9 @@ enum AudioEncoding {
   // Note that as opposed to LINEAR16, audio won't be wrapped in a WAV (or
   // any other) header.
   PCM = 7;
+
+  // M4A audio.
+  M4A = 8;
 }
 
 // The top-level message sent by the client for the `ListVoices` method.
@@ -327,6 +331,10 @@ message VoiceSelectionParams {
   // [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
   // clone matching the specified configuration.
   VoiceCloneParams voice_clone = 5 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The name of the model. If set, the service will choose the model
+  // matching the specified configuration.
+  string model_name = 6 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Description of audio data to be synthesized.
@@ -485,6 +493,9 @@ message StreamingSynthesisInput {
     // other voices.
     string markup = 5;
   }
+
+  // This is system instruction supported only for controllable voice models.
+  optional string prompt = 6;
 }
 
 // Request message for the `StreamingSynthesize` method. Multiple