Skip to content

Commit 029472c

Browse files
committed
fix: keep discord realtime audio playback alive
1 parent 069c7b8 commit 029472c

4 files changed

Lines changed: 114 additions & 15 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ Docs: https://docs.openclaw.ai
6464

6565
### Fixes
6666

67+
- Discord/voice: keep realtime playback running when meeting notes attaches to an existing voice session or a realtime consult starts, and route realtime user transcripts into meeting notes.
6768
- WebChat: keep the run-complete indicator in progress until deferred history replay renders the assistant reply, so Done no longer appears before response text. (#85374) Thanks @neeravmakwana.
6869
- Agents/compaction: skip agent-harness preflight for provider-owned CLI runtime sessions so over-threshold Claude CLI sessions continue through normal compaction instead of failing on a missing harness. Fixes #84857. (#84878) Thanks @zhangguiping-xydt.
6970
- Control UI/config: save form-mode edits from the source config snapshot so runtime-only provider defaults like empty `models.providers.<id>.baseUrl` are not written back and rejected. Fixes #85831. Thanks @garyd9.

extensions/discord/src/voice/manager.e2e.test.ts

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,79 @@ describe("DiscordVoiceManager", () => {
637637
});
638638
});
639639

640+
it("keeps realtime playback alive when meeting notes attaches to an existing voice session", async () => {
641+
const manager = createManager({
642+
groupPolicy: "open",
643+
voice: {
644+
enabled: true,
645+
mode: "agent-proxy",
646+
realtime: { provider: "openai", consultPolicy: "auto" },
647+
},
648+
});
649+
650+
await manager.join({ guildId: "g1", channelId: "1001" });
651+
const player = getLastAudioPlayer();
652+
const entry = getSessionEntry(manager) as {
653+
meetingNotes?: { sessionId: string; onUtterance: (event: unknown) => Promise<void> };
654+
realtime?: {
655+
beginSpeakerTurn: (
656+
context: { extraSystemPrompt?: string; senderIsOwner: boolean; speakerLabel: string },
657+
userId: string,
658+
) => { close: () => void; sendInputAudio: (audio: Buffer) => void };
659+
};
660+
};
661+
const bridgeParams = lastRealtimeBridgeParams() as
662+
| {
663+
audioSink?: { sendAudio: (audio: Buffer) => void };
664+
onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
665+
}
666+
| undefined;
667+
668+
bridgeParams?.audioSink?.sendAudio(Buffer.alloc(24_000));
669+
const stopCallsBeforeMeetingNotes = player.stop.mock.calls.length;
670+
const onUtterance = vi.fn(async () => undefined);
671+
672+
const result = await manager.join(
673+
{ guildId: "g1", channelId: "1001" },
674+
{
675+
meetingNotes: {
676+
sessionId: "notes-1",
677+
onUtterance,
678+
},
679+
},
680+
);
681+
682+
expect(result.ok).toBe(true);
683+
expect(entry.meetingNotes?.sessionId).toBe("notes-1");
684+
expect(realtimeSessionMock.close).not.toHaveBeenCalled();
685+
expect(player.stop).toHaveBeenCalledTimes(stopCallsBeforeMeetingNotes);
686+
687+
const turn = entry.realtime?.beginSpeakerTurn(
688+
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
689+
"u-owner",
690+
);
691+
turn?.sendInputAudio(Buffer.alloc(3840));
692+
bridgeParams?.onTranscript?.("user", "meeting note transcript", true);
693+
694+
await vi.waitFor(() =>
695+
expect(onUtterance).toHaveBeenCalledWith(
696+
expect.objectContaining({
697+
final: true,
698+
sessionId: "notes-1",
699+
speaker: { id: "u-owner", label: "Owner" },
700+
text: "meeting note transcript",
701+
metadata: expect.objectContaining({
702+
channel: "discord",
703+
channelId: "1001",
704+
guildId: "g1",
705+
voiceSessionKey: "discord:g1:c1",
706+
}),
707+
}),
708+
),
709+
);
710+
turn?.close();
711+
});
712+
640713
it("destroys stale tracked voice connections before joining", async () => {
641714
const staleConnection = createConnectionMock();
642715
const connection = createConnectionMock();
@@ -1770,8 +1843,7 @@ describe("DiscordVoiceManager", () => {
17701843
},
17711844
realtimeSessionMock,
17721845
);
1773-
expect(player.stop).toHaveBeenCalledTimes(stopCallsBeforeConsult + 1);
1774-
expect(player.stop).toHaveBeenLastCalledWith(true);
1846+
expect(player.stop).toHaveBeenCalledTimes(stopCallsBeforeConsult);
17751847
await vi.waitFor(() =>
17761848
expect(realtimeSessionMock.submitToolResult).toHaveBeenCalledWith("call-1", {
17771849
text: "agent proxy answer",

extensions/discord/src/voice/manager.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -448,8 +448,6 @@ export class DiscordVoiceManager {
448448
const existing = this.sessions.get(guildId);
449449
if (existing && existing.channelId === channelId) {
450450
if (options?.meetingNotes) {
451-
existing.realtime?.close();
452-
existing.realtime = undefined;
453451
existing.meetingNotes = options.meetingNotes;
454452
}
455453
logVoiceVerbose(`join: already connected to guild ${guildId} channel ${channelId}`);

extensions/discord/src/voice/realtime.ts

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,6 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
979979
session.submitToolResult(callId, { text: exactSpeechText });
980980
return;
981981
}
982-
this.clearConsultPreambleAudio(callId);
983982
const consultMessage = buildRealtimeVoiceAgentConsultChatMessage(event.args);
984983
logger.info(
985984
`discord voice: realtime consult requested call=${callId || "unknown"} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} question=${formatRealtimeLogPreview(consultMessage)}`,
@@ -1042,16 +1041,6 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
10421041
});
10431042
}
10441043

1045-
private clearConsultPreambleAudio(callId: string): void {
1046-
if (!this.hasInterruptibleOutputAudio()) {
1047-
return;
1048-
}
1049-
logger.info(
1050-
`discord voice: realtime consult preamble audio cleared call=${callId} guild=${this.params.entry.guildId} channel=${this.params.entry.channelId}`,
1051-
);
1052-
this.clearOutputAudio("agent-consult-start");
1053-
}
1054-
10551044
private async handleAgentControlToolCall(
10561045
event: RealtimeVoiceToolCallEvent,
10571046
session: RealtimeVoiceBridgeSession,
@@ -1095,6 +1084,8 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
10951084
if (!trimmed) {
10961085
return;
10971086
}
1087+
const meetingNotesTurn = this.peekPendingSpeakerTurn();
1088+
this.recordMeetingNotesUtterance(trimmed, meetingNotesTurn);
10981089
const usesAgentProxy = isDiscordAgentProxyVoiceMode(this.params.mode);
10991090
const pendingForcedConsult =
11001091
usesAgentProxy && params.usesRealtimeAgentHandoff
@@ -1133,6 +1124,37 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
11331124
this.talkback.enqueue(trimmed, this.consumePendingSpeakerContext());
11341125
}
11351126

1127+
private recordMeetingNotesUtterance(text: string, turn: PendingSpeakerTurn | undefined): void {
1128+
const meetingNotes = this.params.entry.meetingNotes;
1129+
if (!meetingNotes || !turn) {
1130+
return;
1131+
}
1132+
const context = turn.context;
1133+
const utterance = {
1134+
sessionId: meetingNotes.sessionId,
1135+
startedAt: new Date(turn.startedAt).toISOString(),
1136+
final: true,
1137+
speaker: {
1138+
id: context.userId,
1139+
label: context.speakerLabel,
1140+
},
1141+
text,
1142+
metadata: {
1143+
channel: "discord",
1144+
guildId: this.params.entry.guildId,
1145+
channelId: this.params.entry.channelId,
1146+
voiceSessionKey: this.params.entry.voiceSessionKey,
1147+
},
1148+
};
1149+
void Promise.resolve()
1150+
.then(() => meetingNotes.onUtterance(utterance))
1151+
.catch((error: unknown) => {
1152+
logger.warn(
1153+
`discord voice: realtime meeting notes utterance failed: ${formatErrorMessage(error)}`,
1154+
);
1155+
});
1156+
}
1157+
11361158
private logAgentControlResult(result: RealtimeVoiceAgentControlResult): void {
11371159
logger.info(
11381160
`discord voice: realtime active-run control handled mode=${result.mode} ok=${result.ok} active=${result.active} reason=${result.reason ?? "none"} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId}`,
@@ -1290,6 +1312,12 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
12901312
return turn?.context;
12911313
}
12921314

1315+
private peekPendingSpeakerTurn(): PendingSpeakerTurn | undefined {
1316+
this.prunePendingSpeakerTurns();
1317+
this.expireClosedSpeakerTurnsBeforeLaterAudio();
1318+
return this.pendingSpeakerTurns.find((turn) => turn.hasAudio);
1319+
}
1320+
12931321
private hasPendingSpeakerAudioContext(): boolean {
12941322
this.prunePendingSpeakerTurns();
12951323
this.expireClosedSpeakerTurnsBeforeLaterAudio();

0 commit comments

Comments
 (0)