@@ -13,6 +13,7 @@ import {
1313import {
1414 createRealtimeVoiceAgentTalkbackQueue ,
1515 createRealtimeVoiceBridgeSession ,
16+ createRealtimeVoiceOutputActivityTracker ,
1617 createTalkSessionController ,
1718 convertPcmToMulaw8k ,
1819 extendRealtimeVoiceOutputEchoSuppression ,
@@ -30,6 +31,7 @@ import {
3031 type RealtimeVoiceAgentTalkbackQueue ,
3132 type RealtimeVoiceBridgeEventLogEntry ,
3233 type RealtimeVoiceBridgeSession ,
34+ type RealtimeVoiceOutputActivityTracker ,
3335 type RealtimeVoiceProviderConfig ,
3436 type RealtimeVoiceProviderPlugin ,
3537 type RealtimeVoiceTranscriptEntry ,
@@ -163,6 +165,24 @@ export function extendGoogleMeetOutputEchoSuppression(params: {
163165 } ) ;
164166}
165167
168+ export function recordGoogleMeetOutputActivity ( params : {
169+ tracker : RealtimeVoiceOutputActivityTracker ;
170+ audio : Buffer ;
171+ audioFormat : GoogleMeetConfig [ "chrome" ] [ "audioFormat" ] ;
172+ nowMs : number ;
173+ lastOutputPlayableUntilMs : number ;
174+ suppressInputUntilMs : number ;
175+ } ) : { lastOutputPlayableUntilMs : number ; suppressInputUntilMs : number ; durationMs : number } {
176+ const suppression = extendGoogleMeetOutputEchoSuppression ( params ) ;
177+ params . tracker . markPlaybackStarted ( ) ;
178+ params . tracker . markAudio ( {
179+ audioMs : suppression . durationMs ,
180+ sourceAudioBytes : params . audio . byteLength ,
181+ sinkAudioBytes : params . audio . byteLength ,
182+ } ) ;
183+ return suppression ;
184+ }
185+
166186export function resolveGoogleMeetRealtimeAudioFormat ( config : GoogleMeetConfig ) {
167187 return config . chrome . audioFormat === "g711-ulaw-8khz"
168188 ? REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ
@@ -477,7 +497,7 @@ export async function startCommandAgentAudioBridge(params: {
477497 let lastInputAt : string | undefined ;
478498 let lastOutputAt : string | undefined ;
479499 let lastInputBytes = 0 ;
480- let lastOutputBytes = 0 ;
500+ const outputActivity = createRealtimeVoiceOutputActivityTracker ( ) ;
481501 let suppressedInputBytes = 0 ;
482502 let lastSuppressedInputAt : string | undefined ;
483503 let suppressInputUntil = 0 ;
@@ -606,7 +626,8 @@ export async function startCommandAgentAudioBridge(params: {
606626 } ) ;
607627
608628 const writeOutputAudio = ( audio : Buffer ) => {
609- const suppression = extendGoogleMeetOutputEchoSuppression ( {
629+ const suppression = recordGoogleMeetOutputActivity ( {
630+ tracker : outputActivity ,
610631 audio,
611632 audioFormat : params . config . chrome . audioFormat ,
612633 nowMs : Date . now ( ) ,
@@ -616,7 +637,6 @@ export async function startCommandAgentAudioBridge(params: {
616637 suppressInputUntil = suppression . suppressInputUntilMs ;
617638 lastOutputPlayableUntilMs = suppression . lastOutputPlayableUntilMs ;
618639 lastOutputAt = new Date ( ) . toISOString ( ) ;
619- lastOutputBytes += audio . byteLength ;
620640 emitTalkEvent ( {
621641 type : "output.audio.delta" ,
622642 turnId : ensureTalkTurn ( ) ,
@@ -787,12 +807,12 @@ export async function startCommandAgentAudioBridge(params: {
787807 providerConnected : sttSession ?. isConnected ( ) ?? false ,
788808 realtimeReady,
789809 audioInputActive : lastInputBytes > 0 ,
790- audioOutputActive : lastOutputBytes > 0 ,
810+ audioOutputActive : outputActivity . isActive ( ) ,
791811 lastInputAt,
792812 lastOutputAt,
793813 lastSuppressedInputAt,
794814 lastInputBytes,
795- lastOutputBytes,
815+ lastOutputBytes : outputActivity . snapshot ( ) . sinkAudioBytes ,
796816 suppressedInputBytes,
797817 ...getGoogleMeetRealtimeTranscriptHealth ( transcript ) ,
798818 recentTalkEvents : summarizeGoogleMeetTalkEvents ( recentTalkEvents ) ,
@@ -833,19 +853,19 @@ export async function startCommandRealtimeAudioBridge(params: {
833853 let lastInputAt : string | undefined ;
834854 let lastOutputAt : string | undefined ;
835855 let lastInputBytes = 0 ;
836- let lastOutputBytes = 0 ;
856+ const outputActivity = createRealtimeVoiceOutputActivityTracker ( ) ;
837857 let lastClearAt : string | undefined ;
838858 let clearCount = 0 ;
839859 let suppressedInputBytes = 0 ;
840860 let lastSuppressedInputAt : string | undefined ;
841861 let suppressInputUntil = 0 ;
842- let lastOutputAtMs = 0 ;
843862 let lastOutputPlayableUntilMs = 0 ;
844863 let bargeInInputProcess : BridgeProcess | undefined ;
845864 let agentTalkback : RealtimeVoiceAgentTalkbackQueue | undefined ;
846865
847866 const suppressInputForOutput = ( audio : Buffer ) => {
848- const suppression = extendGoogleMeetOutputEchoSuppression ( {
867+ const suppression = recordGoogleMeetOutputActivity ( {
868+ tracker : outputActivity ,
849869 audio,
850870 audioFormat : params . config . chrome . audioFormat ,
851871 nowMs : Date . now ( ) ,
@@ -970,12 +990,13 @@ export async function startCommandRealtimeAudioBridge(params: {
970990 stdio : [ "ignore" , "pipe" , "pipe" ] ,
971991 } ) ;
972992 bargeInInputProcess . stdout ?. on ( "data" , ( chunk ) => {
973- if ( stopped || lastOutputAtMs === 0 ) {
993+ if ( stopped || ! outputActivity . isInterruptible ( ) ) {
974994 return ;
975995 }
976996 const now = Date . now ( ) ;
977997 const playbackActive = now <= Math . max ( lastOutputPlayableUntilMs , suppressInputUntil ) ;
978- if ( ! playbackActive && now - lastOutputAtMs > 1000 ) {
998+ const lastOutputAudioAt = outputActivity . snapshot ( ) . lastAudioAt ;
999+ if ( ! playbackActive && ( lastOutputAudioAt === undefined || now - lastOutputAudioAt > 1_000 ) ) {
9791000 return ;
9801001 }
9811002 if ( now - lastBargeInAt < params . config . chrome . bargeInCooldownMs ) {
@@ -1141,9 +1162,7 @@ export async function startCommandRealtimeAudioBridge(params: {
11411162 turnId,
11421163 payload : { byteLength : audio . byteLength } ,
11431164 } ) ;
1144- lastOutputAtMs = Date . now ( ) ;
11451165 lastOutputAt = new Date ( ) . toISOString ( ) ;
1146- lastOutputBytes += audio . byteLength ;
11471166 suppressInputForOutput ( audio ) ;
11481167 writeOutputAudio ( audio ) ;
11491168 } ,
@@ -1315,12 +1334,12 @@ export async function startCommandRealtimeAudioBridge(params: {
13151334 providerConnected : bridge ?. bridge . isConnected ( ) ?? false ,
13161335 realtimeReady,
13171336 audioInputActive : lastInputBytes > 0 ,
1318- audioOutputActive : lastOutputBytes > 0 ,
1337+ audioOutputActive : outputActivity . isActive ( ) ,
13191338 lastInputAt,
13201339 lastOutputAt,
13211340 lastSuppressedInputAt,
13221341 lastInputBytes,
1323- lastOutputBytes,
1342+ lastOutputBytes : outputActivity . snapshot ( ) . sinkAudioBytes ,
13241343 suppressedInputBytes,
13251344 ...getGoogleMeetRealtimeTranscriptHealth ( transcript ) ,
13261345 ...getGoogleMeetRealtimeEventHealth ( realtimeEvents ) ,
0 commit comments