@@ -50,6 +50,11 @@ const MAX_PENDING_AUDIO_CHUNKS = 320;
5050const DEFAULT_AUDIO_STREAM_END_SILENCE_MS = 500 ;
5151const GOOGLE_REALTIME_BROWSER_SESSION_TTL_MS = 30 * 60 * 1000 ;
5252const GOOGLE_REALTIME_BROWSER_NEW_SESSION_TTL_MS = 60 * 1000 ;
53+ const MULAW_LINEAR_SAMPLES = new Int16Array ( 256 ) ;
54+
55+ for ( let i = 0 ; i < MULAW_LINEAR_SAMPLES . length ; i += 1 ) {
56+ MULAW_LINEAR_SAMPLES [ i ] = decodeMulawSample ( i ) ;
57+ }
5358
5459type GoogleRealtimeSensitivity = "low" | "high" ;
5560type GoogleRealtimeThinkingLevel = "minimal" | "low" | "medium" | "high" ;
@@ -330,6 +335,8 @@ function buildFunctionDeclarations(tools: RealtimeVoiceTool[] | undefined): Func
330335
331336function buildGoogleLiveConnectConfig ( config : GoogleRealtimeLiveConfig ) : LiveConnectConfig {
332337 const functionDeclarations = buildFunctionDeclarations ( config . tools ) ;
338+ const realtimeInputConfig = buildRealtimeInputConfig ( config ) ;
339+ const thinkingConfig = buildThinkingConfig ( config ) ;
333340 return {
334341 responseModalities : [ "AUDIO" as Modality ] ,
335342 ...( typeof config . temperature === "number" && config . temperature > 0
@@ -344,15 +351,13 @@ function buildGoogleLiveConnectConfig(config: GoogleRealtimeLiveConfig): LiveCon
344351 } ,
345352 systemInstruction : config . instructions ,
346353 ...( functionDeclarations . length > 0 ? { tools : [ { functionDeclarations } ] } : { } ) ,
347- ...( buildRealtimeInputConfig ( config )
348- ? { realtimeInputConfig : buildRealtimeInputConfig ( config ) }
349- : { } ) ,
354+ ...( realtimeInputConfig ? { realtimeInputConfig } : { } ) ,
350355 inputAudioTranscription : { } ,
351356 outputAudioTranscription : { } ,
352357 ...( typeof config . enableAffectiveDialog === "boolean"
353358 ? { enableAffectiveDialog : config . enableAffectiveDialog }
354359 : { } ) ,
355- ...( buildThinkingConfig ( config ) ? { thinkingConfig : buildThinkingConfig ( config ) } : { } ) ,
360+ ...( thinkingConfig ? { thinkingConfig } : { } ) ,
356361 } ;
357362}
358363
@@ -487,12 +492,7 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
487492 this . audioStreamEnded = false ;
488493 }
489494
490- const pcm = this . toInputPcm ( audio ) ;
491- const pcm16k = resamplePcm (
492- pcm ,
493- this . audioFormat . sampleRateHz ,
494- GOOGLE_REALTIME_INPUT_SAMPLE_RATE ,
495- ) ;
495+ const pcm16k = this . toGoogleInputPcm16k ( audio ) ;
496496 this . session . sendRealtimeInput ( {
497497 audio : {
498498 data : pcm16k . toString ( "base64" ) ,
@@ -617,6 +617,21 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
617617 return this . audioFormat . encoding === "pcm16" ? audio : mulawToPcm ( audio ) ;
618618 }
619619
620+ private toGoogleInputPcm16k ( audio : Buffer ) : Buffer {
621+ if (
622+ this . audioFormat . encoding === "g711_ulaw" &&
623+ this . audioFormat . sampleRateHz === 8_000 &&
624+ GOOGLE_REALTIME_INPUT_SAMPLE_RATE === 16_000
625+ ) {
626+ return convertMulaw8kToPcm16k ( audio ) ;
627+ }
628+ return resamplePcm (
629+ this . toInputPcm ( audio ) ,
630+ this . audioFormat . sampleRateHz ,
631+ GOOGLE_REALTIME_INPUT_SAMPLE_RATE ,
632+ ) ;
633+ }
634+
620635 private toOutputAudio ( pcm : Buffer , sampleRate : number ) : Buffer {
621636 return this . audioFormat . encoding === "pcm16"
622637 ? resamplePcm ( pcm , sampleRate , this . audioFormat . sampleRateHz )
@@ -726,6 +741,30 @@ class GoogleRealtimeVoiceBridge implements RealtimeVoiceBridge {
726741 }
727742}
728743
744+ function convertMulaw8kToPcm16k ( muLaw : Buffer ) : Buffer {
745+ if ( muLaw . length === 0 ) {
746+ return Buffer . alloc ( 0 ) ;
747+ }
748+ const pcm = Buffer . alloc ( muLaw . length * 4 ) ;
749+ for ( let i = 0 ; i < muLaw . length ; i += 1 ) {
750+ const current = MULAW_LINEAR_SAMPLES [ muLaw [ i ] ?? 0 ] ?? 0 ;
751+ const next = MULAW_LINEAR_SAMPLES [ muLaw [ i + 1 ] ?? muLaw [ i ] ?? 0 ] ?? current ;
752+ pcm . writeInt16LE ( current , i * 4 ) ;
753+ pcm . writeInt16LE ( Math . round ( ( current + next ) / 2 ) , i * 4 + 2 ) ;
754+ }
755+ return pcm ;
756+ }
757+
758+ function decodeMulawSample ( value : number ) : number {
759+ const muLaw = ~ value & 0xff ;
760+ const sign = muLaw & 0x80 ;
761+ const exponent = ( muLaw >> 4 ) & 0x07 ;
762+ const mantissa = muLaw & 0x0f ;
763+ let sample = ( ( mantissa << 3 ) + 132 ) << exponent ;
764+ sample -= 132 ;
765+ return sign ? - sample : sample ;
766+ }
767+
729768async function createGoogleRealtimeBrowserSession (
730769 req : RealtimeVoiceBrowserSessionCreateRequest ,
731770) : Promise < RealtimeVoiceBrowserSession > {
0 commit comments