Skip to content

Commit dc2c4aa

Browse files
authored
fix: rotate realtime voice sessions on max duration
- Rotate OpenAI Realtime voice sessions on provider max-duration events without surfacing the expected expiry as a Discord voice error. - Add lifecycle logging for Realtime rotation/reconnect and regression coverage for max-duration reconnect. - Allowlist the existing Control UI chunking helper for the optional Knip unused-file guard so the dependency shard stays green on the current base.
1 parent fc3cd49 commit dc2c4aa

5 files changed

Lines changed: 151 additions & 10 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
1414

1515
### Fixes
1616

17+
- Discord/OpenAI voice: rotate Realtime sessions at provider max duration without logging the expected session-expiry event as an error.
1718
- Memory/local embeddings: run local GGUF embeddings in an isolated worker sidecar and degrade to configured fallback or keyword search on worker failure so native embedding crashes do not take down the Gateway. (#85348) Thanks @osolmaz.
1819
- Gateway: clear the runtime config snapshot before `SIGUSR1` in-process restarts so config changes survive the next gateway loop. (#86388) Thanks @XuZehan-iCenter.
1920
- Models: show OAuth delegation markers as configured `models.json` auth while keeping runtime route usability checks strict. (#86378) Thanks @rohitjavvadi.

extensions/discord/src/voice/realtime.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,14 @@ function formatRealtimeInterruptionLog(event: RealtimeVoiceBridgeEvent): string
177177
return undefined;
178178
}
179179

180+
function formatRealtimeLifecycleLog(event: RealtimeVoiceBridgeEvent): string | undefined {
181+
if (!event.type.startsWith("session.")) {
182+
return undefined;
183+
}
184+
const detail = event.detail ? ` ${event.detail}` : "";
185+
return `discord voice: realtime lifecycle ${event.direction}:${event.type}${detail}`;
186+
}
187+
180188
function isRealtimeResponseCancelled(event: RealtimeVoiceBridgeEvent): boolean {
181189
return (
182190
event.direction === "server" &&
@@ -591,6 +599,10 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
591599
if (interruptionLog) {
592600
logger.info(interruptionLog);
593601
}
602+
const lifecycleLog = formatRealtimeLifecycleLog(event);
603+
if (lifecycleLog) {
604+
logger.info(lifecycleLog);
605+
}
594606
},
595607
onError: (error) => this.logRealtimeError(formatErrorMessage(error)),
596608
onClose: (reason) => {

extensions/openai/realtime-voice-provider.test.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,81 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
778778
expect(bridge.isConnected()).toBe(true);
779779
});
780780

781+
it("rotates realtime bridges on provider max-duration events without reporting an error", async () => {
782+
vi.useFakeTimers();
783+
const provider = buildOpenAIRealtimeVoiceProvider();
784+
const onError = vi.fn();
785+
const onEvent = vi.fn();
786+
const bridge = provider.createBridge({
787+
providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret
788+
onAudio: vi.fn(),
789+
onClearAudio: vi.fn(),
790+
onError,
791+
onEvent,
792+
});
793+
const connecting = bridge.connect();
794+
const firstSocket = FakeWebSocket.instances[0];
795+
if (!firstSocket) {
796+
throw new Error("expected bridge to create a websocket");
797+
}
798+
799+
firstSocket.readyState = FakeWebSocket.OPEN;
800+
firstSocket.emit("open");
801+
firstSocket.emit("message", Buffer.from(JSON.stringify({ type: "session.updated" })));
802+
await connecting;
803+
804+
firstSocket.emit(
805+
"message",
806+
Buffer.from(
807+
JSON.stringify({
808+
type: "error",
809+
error: { message: "Your session hit the maximum duration of 60 minutes." },
810+
}),
811+
),
812+
);
813+
814+
expect(onError).not.toHaveBeenCalled();
815+
expect(firstSocket.closed).toBe(true);
816+
expect(onEvent).toHaveBeenCalledWith({
817+
direction: "server",
818+
type: "session.rotation",
819+
detail: "reason=max-duration",
820+
});
821+
expect(onEvent).toHaveBeenCalledWith({
822+
direction: "client",
823+
type: "session.reconnect.scheduled",
824+
detail: "reason=max-duration attempt=1 delayMs=1000",
825+
});
826+
827+
await vi.advanceTimersByTimeAsync(1000);
828+
await vi.waitFor(() => expect(FakeWebSocket.instances).toHaveLength(2));
829+
const secondSocket = FakeWebSocket.instances[1];
830+
if (!secondSocket) {
831+
throw new Error("expected bridge to reconnect");
832+
}
833+
secondSocket.readyState = FakeWebSocket.OPEN;
834+
secondSocket.emit("open");
835+
secondSocket.emit("message", Buffer.from(JSON.stringify({ type: "session.updated" })));
836+
837+
await vi.waitFor(() =>
838+
expect(onEvent).toHaveBeenCalledWith({
839+
direction: "server",
840+
type: "session.rotation.ready",
841+
detail: "reason=max-duration",
842+
}),
843+
);
844+
await vi.waitFor(() =>
845+
expect(onEvent).toHaveBeenCalledWith({
846+
direction: "client",
847+
type: "session.reconnect.ready",
848+
detail: "reason=max-duration attempt=1",
849+
}),
850+
);
851+
expect(bridge.isConnected()).toBe(true);
852+
853+
bridge.close();
854+
});
855+
781856
it("keeps Azure deployment bridges on deployment-compatible session payloads", async () => {
782857
const provider = buildOpenAIRealtimeVoiceProvider();
783858
const bridge = provider.createBridge({

extensions/openai/realtime-voice-provider.ts

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ const OPENAI_REALTIME_ACTIVE_RESPONSE_ERROR_PREFIX =
9090
"Conversation already has an active response in progress:";
9191
const OPENAI_REALTIME_NO_ACTIVE_RESPONSE_CANCEL_ERROR =
9292
"Cancellation failed: no active response found";
93+
const OPENAI_REALTIME_MAX_SESSION_DURATION_FRAGMENT = "maximum duration";
9394
const OPENAI_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250;
9495
const OPENAI_REALTIME_VOICES = [
9596
"alloy",
@@ -330,6 +331,14 @@ function prefersCodexOAuthForRealtimeModel(model: string | undefined): boolean {
330331
return (model ?? OPENAI_REALTIME_DEFAULT_MODEL).trim().toLowerCase().startsWith("gpt-");
331332
}
332333

334+
function isOpenAIRealtimeMaxSessionDurationError(detail: string): boolean {
335+
const normalized = detail.toLowerCase();
336+
return (
337+
normalized.includes("session") &&
338+
normalized.includes(OPENAI_REALTIME_MAX_SESSION_DURATION_FRAGMENT)
339+
);
340+
}
341+
333342
async function resolveOpenAIRealtimeDefaultAuth(params: {
334343
configuredApiKey: string | undefined;
335344
cfg: RealtimeVoiceBrowserSessionCreateRequest["cfg"] | undefined;
@@ -426,6 +435,8 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
426435
private deliveredToolCallKeys = new Set<string>();
427436
private readonly flowId = randomUUID();
428437
private sessionReadyFired = false;
438+
private reconnectReason: string | undefined;
439+
private activeConnectionReason: string | undefined;
429440
private readonly audioFormat: RealtimeVoiceAudioFormat;
430441

431442
constructor(private readonly config: OpenAIRealtimeVoiceBridgeConfig) {
@@ -658,7 +669,9 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
658669
settleReject(new Error("OpenAI realtime connection closed before ready"));
659670
return;
660671
}
661-
void this.attemptReconnect();
672+
const reason = this.reconnectReason ?? "websocket-close";
673+
this.reconnectReason = undefined;
674+
void this.attemptReconnect(reason);
662675
});
663676
};
664677

@@ -833,26 +846,41 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
833846
};
834847
}
835848

836-
private async attemptReconnect(): Promise<void> {
849+
private async attemptReconnect(reason: string): Promise<void> {
837850
if (this.intentionallyClosed) {
838851
return;
839852
}
840853
if (this.reconnectAttempts >= OpenAIRealtimeVoiceBridge.MAX_RECONNECT_ATTEMPTS) {
854+
this.config.onEvent?.({
855+
direction: "client",
856+
type: "session.reconnect.exhausted",
857+
detail: `reason=${reason} attempts=${this.reconnectAttempts}`,
858+
});
841859
this.config.onClose?.("error");
842860
return;
843861
}
844862
this.reconnectAttempts += 1;
845-
const delay =
846-
OpenAIRealtimeVoiceBridge.BASE_RECONNECT_DELAY_MS * 2 ** (this.reconnectAttempts - 1);
863+
const attempt = this.reconnectAttempts;
864+
const delay = OpenAIRealtimeVoiceBridge.BASE_RECONNECT_DELAY_MS * 2 ** (attempt - 1);
865+
this.config.onEvent?.({
866+
direction: "client",
867+
type: "session.reconnect.scheduled",
868+
detail: `reason=${reason} attempt=${attempt} delayMs=${delay}`,
869+
});
847870
await new Promise((resolve) => setTimeout(resolve, delay));
848871
if (this.intentionallyClosed) {
849872
return;
850873
}
851874
try {
852875
await this.doConnect();
876+
this.config.onEvent?.({
877+
direction: "client",
878+
type: "session.reconnect.ready",
879+
detail: `reason=${reason} attempt=${attempt}`,
880+
});
853881
} catch (error) {
854882
this.config.onError?.(error instanceof Error ? error : new Error(String(error)));
855-
await this.attemptReconnect();
883+
await this.attemptReconnect(reason);
856884
}
857885
}
858886

@@ -951,11 +979,27 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
951979
}
952980

953981
private handleEvent(event: RealtimeEvent): void {
954-
this.config.onEvent?.({
955-
direction: "server",
956-
type: event.type,
957-
detail: this.describeServerEvent(event),
958-
});
982+
const emitServerEvent = () =>
983+
this.config.onEvent?.({
984+
direction: "server",
985+
type: event.type,
986+
detail: this.describeServerEvent(event),
987+
});
988+
if (
989+
event.type === "error" &&
990+
isOpenAIRealtimeMaxSessionDurationError(readRealtimeErrorDetail(event.error))
991+
) {
992+
this.reconnectReason = "max-duration";
993+
this.activeConnectionReason = "max-duration";
994+
this.config.onEvent?.({
995+
direction: "server",
996+
type: "session.rotation",
997+
detail: "reason=max-duration",
998+
});
999+
this.ws?.close(1000, "max-duration rotation");
1000+
return;
1001+
}
1002+
emitServerEvent();
9591003
switch (event.type) {
9601004
case "session.created":
9611005
return;
@@ -965,6 +1009,14 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
9651009
for (const chunk of this.pendingAudio.splice(0)) {
9661010
this.sendAudio(chunk);
9671011
}
1012+
if (this.activeConnectionReason) {
1013+
this.config.onEvent?.({
1014+
direction: "server",
1015+
type: "session.rotation.ready",
1016+
detail: `reason=${this.activeConnectionReason}`,
1017+
});
1018+
this.activeConnectionReason = undefined;
1019+
}
9681020
if (!this.sessionReadyFired) {
9691021
this.sessionReadyFired = true;
9701022
this.config.onReady?.();

scripts/deadcode-unused-files.allowlist.mjs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,5 @@ export const KNIP_OPTIONAL_UNUSED_FILE_ALLOWLIST = [
4242
"extensions/qa-lab/src/auth-profile.fixture.ts",
4343
"extensions/qa-lab/src/codex-plugin.fixture.ts",
4444
"src/gateway/test/server-sessions-helpers.ts",
45+
"ui/src/ui/control-ui-chunking.ts",
4546
];

0 commit comments

Comments
 (0)