Skip to content

Commit c970f9f

Browse files
feat: [google-cloud-speech] expose google.cloud.location.Locations API (#13734)
- [ ] Regenerate this pull request now. BEGIN_COMMIT_OVERRIDE feat: expose google.cloud.location.Locations API feat: add new fields to CustomClass and PhraseSet.Phrase messages feat: add ALAW support to RecognitionConfig feat: make transcript_normalization field optional feat: deprecating speaker_tag (int) for speaker_label (string) docs: miscellaneous clarifications END_COMMIT_OVERRIDE PiperOrigin-RevId: 743615436 Source-Link: googleapis/googleapis@3891337 Source-Link: googleapis/googleapis-gen@a36bdfd Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXNwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiYTM2YmRmZGNjZTFiMTZkNGU3ZTc4OWJhNTkyYzM3MzA0ZGFhMTZjMCJ9 --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent c46b81a commit c970f9f

File tree

8 files changed

+856
-53
lines changed

8 files changed

+856
-53
lines changed

packages/google-cloud-speech/google/cloud/speech_v1p1beta1/services/adaptation/async_client.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545

4646
from google.longrunning import operations_pb2 # type: ignore
4747
from google.protobuf import field_mask_pb2 # type: ignore
48+
from google.protobuf import timestamp_pb2 # type: ignore
4849

4950
from google.cloud.speech_v1p1beta1.services.adaptation import pagers
5051
from google.cloud.speech_v1p1beta1.types import cloud_speech_adaptation, resource
@@ -75,6 +76,12 @@ class AdaptationAsyncClient:
7576
_DEFAULT_ENDPOINT_TEMPLATE = AdaptationClient._DEFAULT_ENDPOINT_TEMPLATE
7677
_DEFAULT_UNIVERSE = AdaptationClient._DEFAULT_UNIVERSE
7778

79+
crypto_key_path = staticmethod(AdaptationClient.crypto_key_path)
80+
parse_crypto_key_path = staticmethod(AdaptationClient.parse_crypto_key_path)
81+
crypto_key_version_path = staticmethod(AdaptationClient.crypto_key_version_path)
82+
parse_crypto_key_version_path = staticmethod(
83+
AdaptationClient.parse_crypto_key_version_path
84+
)
7885
custom_class_path = staticmethod(AdaptationClient.custom_class_path)
7986
parse_custom_class_path = staticmethod(AdaptationClient.parse_custom_class_path)
8087
phrase_set_path = staticmethod(AdaptationClient.phrase_set_path)

packages/google-cloud-speech/google/cloud/speech_v1p1beta1/services/adaptation/client.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262

6363
from google.longrunning import operations_pb2 # type: ignore
6464
from google.protobuf import field_mask_pb2 # type: ignore
65+
from google.protobuf import timestamp_pb2 # type: ignore
6566

6667
from google.cloud.speech_v1p1beta1.services.adaptation import pagers
6768
from google.cloud.speech_v1p1beta1.types import cloud_speech_adaptation, resource
@@ -196,6 +197,56 @@ def transport(self) -> AdaptationTransport:
196197
"""
197198
return self._transport
198199

200+
@staticmethod
201+
def crypto_key_path(
202+
project: str,
203+
location: str,
204+
key_ring: str,
205+
crypto_key: str,
206+
) -> str:
207+
"""Returns a fully-qualified crypto_key string."""
208+
return "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}".format(
209+
project=project,
210+
location=location,
211+
key_ring=key_ring,
212+
crypto_key=crypto_key,
213+
)
214+
215+
@staticmethod
216+
def parse_crypto_key_path(path: str) -> Dict[str, str]:
217+
"""Parses a crypto_key path into its component segments."""
218+
m = re.match(
219+
r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/keyRings/(?P<key_ring>.+?)/cryptoKeys/(?P<crypto_key>.+?)$",
220+
path,
221+
)
222+
return m.groupdict() if m else {}
223+
224+
@staticmethod
225+
def crypto_key_version_path(
226+
project: str,
227+
location: str,
228+
key_ring: str,
229+
crypto_key: str,
230+
crypto_key_version: str,
231+
) -> str:
232+
"""Returns a fully-qualified crypto_key_version string."""
233+
return "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}".format(
234+
project=project,
235+
location=location,
236+
key_ring=key_ring,
237+
crypto_key=crypto_key,
238+
crypto_key_version=crypto_key_version,
239+
)
240+
241+
@staticmethod
242+
def parse_crypto_key_version_path(path: str) -> Dict[str, str]:
243+
"""Parses a crypto_key_version path into its component segments."""
244+
m = re.match(
245+
r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/keyRings/(?P<key_ring>.+?)/cryptoKeys/(?P<crypto_key>.+?)/cryptoKeyVersions/(?P<crypto_key_version>.+?)$",
246+
path,
247+
)
248+
return m.groupdict() if m else {}
249+
199250
@staticmethod
200251
def custom_class_path(
201252
project: str,

packages/google-cloud-speech/google/cloud/speech_v1p1beta1/services/speech/async_client.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ class SpeechAsyncClient:
8080
_DEFAULT_ENDPOINT_TEMPLATE = SpeechClient._DEFAULT_ENDPOINT_TEMPLATE
8181
_DEFAULT_UNIVERSE = SpeechClient._DEFAULT_UNIVERSE
8282

83+
crypto_key_path = staticmethod(SpeechClient.crypto_key_path)
84+
parse_crypto_key_path = staticmethod(SpeechClient.parse_crypto_key_path)
85+
crypto_key_version_path = staticmethod(SpeechClient.crypto_key_version_path)
86+
parse_crypto_key_version_path = staticmethod(
87+
SpeechClient.parse_crypto_key_version_path
88+
)
8389
custom_class_path = staticmethod(SpeechClient.custom_class_path)
8490
parse_custom_class_path = staticmethod(SpeechClient.parse_custom_class_path)
8591
phrase_set_path = staticmethod(SpeechClient.phrase_set_path)

packages/google-cloud-speech/google/cloud/speech_v1p1beta1/services/speech/client.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,56 @@ def transport(self) -> SpeechTransport:
200200
"""
201201
return self._transport
202202

203+
@staticmethod
204+
def crypto_key_path(
205+
project: str,
206+
location: str,
207+
key_ring: str,
208+
crypto_key: str,
209+
) -> str:
210+
"""Returns a fully-qualified crypto_key string."""
211+
return "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}".format(
212+
project=project,
213+
location=location,
214+
key_ring=key_ring,
215+
crypto_key=crypto_key,
216+
)
217+
218+
@staticmethod
219+
def parse_crypto_key_path(path: str) -> Dict[str, str]:
220+
"""Parses a crypto_key path into its component segments."""
221+
m = re.match(
222+
r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/keyRings/(?P<key_ring>.+?)/cryptoKeys/(?P<crypto_key>.+?)$",
223+
path,
224+
)
225+
return m.groupdict() if m else {}
226+
227+
@staticmethod
228+
def crypto_key_version_path(
229+
project: str,
230+
location: str,
231+
key_ring: str,
232+
crypto_key: str,
233+
crypto_key_version: str,
234+
) -> str:
235+
"""Returns a fully-qualified crypto_key_version string."""
236+
return "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}".format(
237+
project=project,
238+
location=location,
239+
key_ring=key_ring,
240+
crypto_key=crypto_key,
241+
crypto_key_version=crypto_key_version,
242+
)
243+
244+
@staticmethod
245+
def parse_crypto_key_version_path(path: str) -> Dict[str, str]:
246+
"""Parses a crypto_key_version path into its component segments."""
247+
m = re.match(
248+
r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/keyRings/(?P<key_ring>.+?)/cryptoKeys/(?P<crypto_key>.+?)/cryptoKeyVersions/(?P<crypto_key_version>.+?)$",
249+
path,
250+
)
251+
return m.groupdict() if m else {}
252+
203253
@staticmethod
204254
def custom_class_path(
205255
project: str,

packages/google-cloud-speech/google/cloud/speech_v1p1beta1/types/cloud_speech.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,9 @@ class StreamingRecognitionConfig(proto.Message):
211211
212212
The ``single_utterance`` field can only be used with
213213
specified models, otherwise an error is thrown. The
214-
``model`` field in [``RecognitionConfig``][] must be set to:
214+
``model`` field in
215+
[RecognitionConfig][google.cloud.speech.v1p1beta1.RecognitionConfig]
216+
must be set to:
215217
216218
- ``command_and_search``
217219
- ``phone_call`` AND additional field
@@ -360,7 +362,7 @@ class RecognitionConfig(proto.Message):
360362
documentation. When speech adaptation is set it supersedes
361363
the ``speech_contexts`` field.
362364
transcript_normalization (google.cloud.speech_v1p1beta1.types.TranscriptNormalization):
363-
Use transcription normalization to
365+
Optional. Use transcription normalization to
364366
automatically replace parts of the transcript
365367
with phrases of your choosing. For
366368
StreamingRecognize, this normalization only
@@ -409,7 +411,7 @@ class RecognitionConfig(proto.Message):
409411
enable_speaker_diarization (bool):
410412
If 'true', enables speaker detection for each recognized
411413
word in the top alternative of the recognition result using
412-
a speaker_tag provided in the WordInfo. Note: Use
414+
a speaker_label provided in the WordInfo. Note: Use
413415
diarization_config instead.
414416
diarization_speaker_count (int):
415417
If set, specifies the estimated number of speakers in the
@@ -576,9 +578,12 @@ class AudioEncoding(proto.Enum):
576578
file being used.
577579
WEBM_OPUS (9):
578580
Opus encoded audio frames in WebM container
579-
(`OggOpus <https://wiki.xiph.org/OggOpus>`__).
581+
(`WebM <https://www.webmproject.org/docs/container/>`__).
580582
``sample_rate_hertz`` must be one of 8000, 12000, 16000,
581583
24000, or 48000.
584+
ALAW (10):
585+
8-bit samples that compand 13-bit audio
586+
samples using G.711 PCMU/a-law.
582587
"""
583588
ENCODING_UNSPECIFIED = 0
584589
LINEAR16 = 1
@@ -590,6 +595,7 @@ class AudioEncoding(proto.Enum):
590595
SPEEX_WITH_HEADER_BYTE = 7
591596
MP3 = 8
592597
WEBM_OPUS = 9
598+
ALAW = 10
593599

594600
encoding: AudioEncoding = proto.Field(
595601
proto.ENUM,
@@ -696,7 +702,7 @@ class SpeakerDiarizationConfig(proto.Message):
696702
enable_speaker_diarization (bool):
697703
If 'true', enables speaker detection for each recognized
698704
word in the top alternative of the recognition result using
699-
a speaker_tag provided in the WordInfo.
705+
a speaker_label provided in the WordInfo.
700706
min_speaker_count (int):
701707
Minimum number of speakers in the
702708
conversation. This range gives you more
@@ -1042,6 +1048,10 @@ class RecognizeResponse(proto.Message):
10421048
request_id (int):
10431049
The ID associated with the request. This is a
10441050
unique ID specific only to the given request.
1051+
using_legacy_models (bool):
1052+
Whether request used legacy asr models (was
1053+
not automatically migrated to use conformer
1054+
models).
10451055
"""
10461056

10471057
results: MutableSequence["SpeechRecognitionResult"] = proto.RepeatedField(
@@ -1063,6 +1073,10 @@ class RecognizeResponse(proto.Message):
10631073
proto.INT64,
10641074
number=8,
10651075
)
1076+
using_legacy_models: bool = proto.Field(
1077+
proto.BOOL,
1078+
number=9,
1079+
)
10661080

10671081

10681082
class LongRunningRecognizeResponse(proto.Message):
@@ -1523,8 +1537,17 @@ class WordInfo(proto.Message):
15231537
speaker within the audio. This field specifies which one of
15241538
those speakers was detected to have spoken this word. Value
15251539
ranges from '1' to diarization_speaker_count. speaker_tag is
1526-
set if enable_speaker_diarization = 'true' and only in the
1527-
top alternative.
1540+
set if enable_speaker_diarization = 'true' and only for the
1541+
top alternative. Note: Use speaker_label instead.
1542+
speaker_label (str):
1543+
Output only. A label value assigned for every unique speaker
1544+
within the audio. This field specifies which speaker was
1545+
detected to have spoken this word. For some models, like
1546+
medical_conversation this can be actual speaker role, for
1547+
example "patient" or "provider", but generally this would be
1548+
a number identifying a speaker. This field is only set if
1549+
enable_speaker_diarization = 'true' and only for the top
1550+
alternative.
15281551
"""
15291552

15301553
start_time: duration_pb2.Duration = proto.Field(
@@ -1549,6 +1572,10 @@ class WordInfo(proto.Message):
15491572
proto.INT32,
15501573
number=5,
15511574
)
1575+
speaker_label: str = proto.Field(
1576+
proto.STRING,
1577+
number=6,
1578+
)
15521579

15531580

15541581
class SpeechAdaptationInfo(proto.Message):

0 commit comments

Comments
 (0)