Skip to content

Commit a92cee3

Browse files
Google APIscopybara-github
authored andcommitted
feat: Support promptable voices by specifying a model name and a prompt
feat: Add enum value M4A to enum AudioEncoding docs: A comment for enum value `AUDIO_ENCODING_UNSPECIFIED` in enum `AudioEncoding` is changed PiperOrigin-RevId: 799573824
1 parent 41f615c commit a92cee3

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

google/cloud/texttospeech/v1/cloud_tts.proto

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ enum SsmlVoiceGender {
8888
// Configuration to set up audio encoder. The encoding determines the output
8989
// audio format that we'd like.
9090
enum AudioEncoding {
91-
// Not specified. Will return result
91+
// Not specified. Only used by GenerateVoiceCloningKey. Otherwise, will return
92+
// result
9293
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
9394
AUDIO_ENCODING_UNSPECIFIED = 0;
9495

@@ -117,6 +118,9 @@ enum AudioEncoding {
117118
// Note that as opposed to LINEAR16, audio won't be wrapped in a WAV (or
118119
// any other) header.
119120
PCM = 7;
121+
122+
// M4A audio.
123+
M4A = 8;
120124
}
121125

122126
// The top-level message sent by the client for the `ListVoices` method.
@@ -327,6 +331,10 @@ message VoiceSelectionParams {
327331
// [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
328332
// clone matching the specified configuration.
329333
VoiceCloneParams voice_clone = 5 [(google.api.field_behavior) = OPTIONAL];
334+
335+
// Optional. The name of the model. If set, the service will choose the model
336+
// matching the specified configuration.
337+
string model_name = 6 [(google.api.field_behavior) = OPTIONAL];
330338
}
331339

332340
// Description of audio data to be synthesized.
@@ -485,6 +493,9 @@ message StreamingSynthesisInput {
485493
// other voices.
486494
string markup = 5;
487495
}
496+
497+
// This is system instruction supported only for controllable voice models.
498+
optional string prompt = 6;
488499
}
489500

490501
// Request message for the `StreamingSynthesize` method. Multiple

0 commit comments

Comments
 (0)