Skip to content

Commit 4532e5d

Browse files
authored
fix(control-ui): preserve Stop after reconnect
Fixes #70991. Adds authenticated Gateway WebSocket protocol pings, exposes active session-run state to Control UI, and keeps session-scoped Stop available after reconnect or reload when the browser lost the local run id. Validation: - pnpm test ui/src/ui/app-chat.test.ts ui/src/ui/app-gateway.node.test.ts src/gateway/server.sessions.list-changed.test.ts src/gateway/server/ws-connection.test.ts - OPENCLAW_LOCAL_CHECK=1 OPENCLAW_LOCAL_CHECK_MODE=throttled pnpm check:changed - GitHub CI and high-signal security checks passed on head 1f4c872
1 parent aaa19fb commit 4532e5d

13 files changed

Lines changed: 230 additions & 15 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
1919
- Plugins/doctor: repair missing configured provider and channel plugins from ClawHub before npm fallback, preserving ClawPack metadata in the install record. Thanks @vincentkoc.
2020
- Gateway/channels: cap startup fanout at four channel/account handoffs and recover from Bonjour ciao self-probe races, reducing Windows startup stalls with many Telegram accounts. Fixes #75687.
2121
- Gateway/sessions: keep `sessions.list` polling responsive on large session stores by reusing list-safe session cache/indexes and returning a lightweight compaction checkpoint preview instead of heavyweight summaries. Thanks @rolandrscheel.
22+
- Control UI/Gateway: keep long-running dashboard WebSocket sessions alive with protocol pings and keep Stop available after reconnect or reload by recovering session-scoped active-run abort state. Fixes #70991. Thanks @alexandre-leng.
2223
- CLI/update: treat inherited Gateway service markers as origin hints and only block package replacement when the managed Gateway is still live, so self-updates can stop the service and continue safely. (#75729) Thanks @hxy91819.
2324
- Agents/failover: exempt run-level timeouts that fire during tool execution from model fallback, timeout-triggered compaction, and generic timeout payload synthesis. Long `process(poll)`, browser, or `exec` tool calls that exceed `agents.defaults.timeoutSeconds` previously rotated auth profiles, switched to a fallback model, and surfaced a misleading "LLM request timed out" error even though the primary model had already responded. Mirrors the existing `timedOutDuringCompaction` precedent (#46889). Fixes #52147. (#75873) Thanks @simonusa.
2425
- Docker: copy Bun 1.3.13 from a digest-pinned image and keep CI on the same version. Fixes #74356. Thanks @fede-kamel and @sallyom.

src/gateway/server-methods/sessions.ts

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,10 @@ function shouldAttachPendingMessageSeq(params: { payload: unknown; cached?: bool
224224
}
225225

226226
function emitSessionsChanged(
227-
context: Pick<GatewayRequestContext, "broadcastToConnIds" | "getSessionEventSubscriberConnIds">,
227+
context: Pick<
228+
GatewayRequestContext,
229+
"broadcastToConnIds" | "chatAbortControllers" | "getSessionEventSubscriberConnIds"
230+
>,
228231
payload: { sessionKey?: string; reason: string; compacted?: boolean },
229232
) {
230233
const connIds = context.getSessionEventSubscriberConnIds();
@@ -282,6 +285,11 @@ function emitSessionsChanged(
282285
modelProvider: sessionRow.modelProvider,
283286
model: sessionRow.model,
284287
status: sessionRow.status,
288+
hasActiveRun: hasTrackedActiveSessionRun({
289+
context,
290+
requestedKey: payload.sessionKey ?? sessionRow.key,
291+
canonicalKey: sessionRow.key,
292+
}),
285293
startedAt: sessionRow.startedAt,
286294
endedAt: sessionRow.endedAt,
287295
runtimeMs: sessionRow.runtimeMs,
@@ -427,10 +435,13 @@ function resolveAbortSessionKey(params: {
427435
}
428436

429437
function hasTrackedActiveSessionRun(params: {
430-
context: Pick<GatewayRequestContext, "chatAbortControllers">;
438+
context: Partial<Pick<GatewayRequestContext, "chatAbortControllers">>;
431439
requestedKey: string;
432440
canonicalKey: string;
433441
}): boolean {
442+
if (!(params.context.chatAbortControllers instanceof Map)) {
443+
return false;
444+
}
434445
for (const active of params.context.chatAbortControllers.values()) {
435446
if (active.sessionKey === params.canonicalKey || active.sessionKey === params.requestedKey) {
436447
return true;
@@ -666,7 +677,22 @@ export const sessionsHandlers: GatewayRequestHandlers = {
666677
modelCatalog,
667678
opts: p,
668679
});
669-
respond(true, result, undefined);
680+
respond(
681+
true,
682+
{
683+
...result,
684+
sessions: result.sessions.map((session) =>
685+
Object.assign({}, session, {
686+
hasActiveRun: hasTrackedActiveSessionRun({
687+
context,
688+
requestedKey: session.key,
689+
canonicalKey: session.key,
690+
}),
691+
}),
692+
),
693+
},
694+
undefined,
695+
);
670696
},
671697
"sessions.cleanup": async ({ params, respond, context }) => {
672698
if (!assertValidParams(params, validateSessionsCleanupParams, "sessions.cleanup", respond)) {

src/gateway/server.sessions.list-changed.test.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,49 @@ test("sessions.list uses the gateway model catalog for effective thinking defaul
157157
);
158158
});
159159

160+
test("sessions.list marks sessions with active abortable runs", async () => {
161+
await createSessionStoreDir();
162+
await writeSessionStore({
163+
entries: {
164+
main: sessionStoreEntry("sess-main"),
165+
},
166+
});
167+
168+
const respond = vi.fn();
169+
const sessionsHandlers = await getSessionsHandlers();
170+
const { getRuntimeConfig } = await getGatewayConfigModule();
171+
await sessionsHandlers["sessions.list"]({
172+
req: {
173+
type: "req",
174+
id: "req-sessions-list-active-run",
175+
method: "sessions.list",
176+
params: {},
177+
},
178+
params: {},
179+
respond,
180+
client: null,
181+
isWebchatConnect: () => false,
182+
context: {
183+
getRuntimeConfig,
184+
loadGatewayModelCatalog: async () => [],
185+
chatAbortControllers: new Map([["run-1", { sessionKey: "agent:main:main" }]]),
186+
} as never,
187+
});
188+
189+
expect(respond).toHaveBeenCalledWith(
190+
true,
191+
expect.objectContaining({
192+
sessions: expect.arrayContaining([
193+
expect.objectContaining({
194+
key: "agent:main:main",
195+
hasActiveRun: true,
196+
}),
197+
]),
198+
}),
199+
undefined,
200+
);
201+
});
202+
160203
test("sessions.list yields before responding during bulk transcript hydration", async () => {
161204
const { dir } = await createSessionStoreDir();
162205
const entries: Record<string, ReturnType<typeof sessionStoreEntry>> = {};

src/gateway/server/ws-connection.test.ts

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { EventEmitter } from "node:events";
2-
import { beforeEach, describe, expect, it, vi } from "vitest";
2+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
33
import type { WebSocketServer } from "ws";
44
import type { ResolvedGatewayAuth } from "../auth.js";
55

@@ -36,6 +36,10 @@ describe("attachGatewayWsConnectionHandler", () => {
3636
attachGatewayWsMessageHandlerMock.mockReset();
3737
});
3838

39+
afterEach(() => {
40+
vi.useRealTimers();
41+
});
42+
3943
it("threads current auth getters into the handshake handler instead of a stale snapshot", () => {
4044
const listeners = new Map<string, (...args: unknown[]) => void>();
4145
const wss = {
@@ -132,6 +136,7 @@ describe("attachGatewayWsConnectionHandler", () => {
132136
port: 19001,
133137
canvasHostEnabled: false,
134138
resolvedAuth: createResolvedAuth("token"),
139+
preauthHandshakeTimeoutMs: 60_000,
135140
gatewayMethods: [],
136141
events: [],
137142
refreshHealthSnapshot: vi.fn(),
@@ -167,4 +172,76 @@ describe("attachGatewayWsConnectionHandler", () => {
167172
expect(registered).toBe(false);
168173
expect(clients.size).toBe(0);
169174
});
175+
176+
it("sends protocol pings until the connection closes", () => {
177+
vi.useFakeTimers();
178+
const listeners = new Map<string, (...args: unknown[]) => void>();
179+
const wss = {
180+
on: vi.fn((event: string, handler: (...args: unknown[]) => void) => {
181+
listeners.set(event, handler);
182+
}),
183+
} as unknown as WebSocketServer;
184+
const socket = Object.assign(new EventEmitter(), {
185+
_socket: {
186+
remoteAddress: "127.0.0.1",
187+
remotePort: 1234,
188+
localAddress: "127.0.0.1",
189+
localPort: 5678,
190+
},
191+
send: vi.fn(),
192+
ping: vi.fn(),
193+
close: vi.fn(),
194+
});
195+
const upgradeReq = {
196+
headers: { host: "127.0.0.1:19001" },
197+
socket: { localAddress: "127.0.0.1" },
198+
};
199+
200+
attachGatewayWsConnectionHandler({
201+
wss,
202+
clients: new Set(),
203+
preauthConnectionBudget: { release: vi.fn() } as never,
204+
port: 19001,
205+
canvasHostEnabled: false,
206+
resolvedAuth: createResolvedAuth("token"),
207+
preauthHandshakeTimeoutMs: 60_000,
208+
gatewayMethods: [],
209+
events: [],
210+
refreshHealthSnapshot: vi.fn(),
211+
logGateway: createLogger() as never,
212+
logHealth: createLogger() as never,
213+
logWsControl: createLogger() as never,
214+
extraHandlers: {},
215+
broadcast: vi.fn(),
216+
buildRequestContext: () =>
217+
({
218+
unsubscribeAllSessionEvents: vi.fn(),
219+
nodeRegistry: { unregister: vi.fn() },
220+
nodeUnsubscribeAll: vi.fn(),
221+
}) as never,
222+
});
223+
224+
const onConnection = listeners.get("connection");
225+
expect(onConnection).toBeTypeOf("function");
226+
onConnection?.(socket, upgradeReq);
227+
228+
const passed = attachGatewayWsMessageHandlerMock.mock.calls[0]?.[0] as {
229+
setClient: (client: unknown) => boolean;
230+
};
231+
expect(
232+
passed.setClient({
233+
socket,
234+
connect: { client: { id: "openclaw-control-ui", mode: "webchat" } },
235+
connId: "ping-client",
236+
usesSharedGatewayAuth: false,
237+
}),
238+
).toBe(true);
239+
240+
vi.advanceTimersByTime(25_000);
241+
expect(socket.ping).toHaveBeenCalledTimes(1);
242+
243+
socket.emit("close", 1000, Buffer.from("done"));
244+
vi.advanceTimersByTime(25_000);
245+
expect(socket.ping).toHaveBeenCalledTimes(1);
246+
});
170247
});

src/gateway/server/ws-connection.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,12 +267,17 @@ export function attachGatewayWsConnectionHandler(params: AttachGatewayWsConnecti
267267
payload: { nonce: connectNonce, ts: Date.now() },
268268
});
269269

270+
let pingTimer: ReturnType<typeof setInterval> | undefined;
271+
270272
const close = (code = 1000, reason?: string) => {
271273
if (closed) {
272274
return;
273275
}
274276
closed = true;
275277
clearTimeout(handshakeTimer);
278+
if (pingTimer !== undefined) {
279+
clearInterval(pingTimer);
280+
}
276281
releasePreauthBudget();
277282
if (client) {
278283
clients.delete(client);
@@ -423,6 +428,13 @@ export function attachGatewayWsConnectionHandler(params: AttachGatewayWsConnecti
423428
releasePreauthBudget();
424429
client = next;
425430
clients.add(next);
431+
pingTimer = setInterval(() => {
432+
try {
433+
socket.ping();
434+
} catch {
435+
// close() clears the timer; ping can race with a socket already entering CLOSING.
436+
}
437+
}, 25_000);
426438
return true;
427439
},
428440
setHandshakeState: (next) => {

src/gateway/session-utils.types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ export type GatewaySessionRow = {
7171
totalTokensFresh?: boolean;
7272
estimatedCostUsd?: number;
7373
status?: SessionRunStatus;
74+
hasActiveRun?: boolean;
7475
subagentRunState?: SubagentRunState;
7576
hasActiveSubagentRun?: boolean;
7677
startedAt?: number;

ui/src/ui/app-chat.test.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,24 @@ describe("handleAbortChat", () => {
10241024
expect(host.chatRunId).toBe("run-main");
10251025
});
10261026

1027+
it("queues a session-scoped abort while disconnected after active run state is recovered", async () => {
1028+
const host = makeHost({
1029+
connected: false,
1030+
chatRunId: null,
1031+
chatMessage: "draft",
1032+
sessionKey: "agent:main",
1033+
sessionsResult: createSessionsResult([
1034+
row("agent:main", { hasActiveRun: true }),
1035+
row("agent:other", { hasActiveRun: true }),
1036+
]),
1037+
});
1038+
1039+
await handleAbortChat(host);
1040+
1041+
expect(host.pendingAbort).toEqual({ runId: null, sessionKey: "agent:main" });
1042+
expect(host.chatMessage).toBe("");
1043+
});
1044+
10271045
it("keeps the draft when disconnected without an active run", async () => {
10281046
const host = makeHost({
10291047
connected: false,

ui/src/ui/app-chat.ts

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ export type ChatHost = ChatInputHistoryState & {
6666
sessionsResult?: SessionsListResult | null;
6767
updateComplete?: Promise<unknown>;
6868
refreshSessionsAfterChat: Set<string>;
69-
pendingAbort?: { runId: string; sessionKey: string } | null;
69+
pendingAbort?: { runId?: string | null; sessionKey: string } | null;
7070
chatSubmitGuards?: Map<string, Promise<void>>;
7171
/** Callback for slash-command side effects that need app-level access. */
7272
onSlashAction?: (action: string) => void | Promise<void>;
@@ -90,6 +90,21 @@ export function isChatBusy(host: ChatHost) {
9090
return host.chatSending || Boolean(host.chatRunId);
9191
}
9292

93+
export function hasAbortableSessionRun(host: {
94+
chatRunId?: string | null;
95+
sessionKey: string;
96+
sessionsResult?: SessionsListResult | null;
97+
}): boolean {
98+
if (host.chatRunId) {
99+
return true;
100+
}
101+
return Boolean(
102+
host.sessionsResult?.sessions.some(
103+
(session) => session.key === host.sessionKey && session.hasActiveRun === true,
104+
),
105+
);
106+
}
107+
93108
export function isChatStopCommand(text: string) {
94109
const trimmed = text.trim();
95110
if (!trimmed) {
@@ -135,11 +150,12 @@ function isBtwCommand(text: string) {
135150
}
136151

137152
export async function handleAbortChat(host: ChatHost) {
138-
// If disconnected but we have an active runId, queue the abort for when we reconnect
139-
if (!host.connected && host.chatRunId) {
153+
const activeRunId = host.chatRunId;
154+
// If disconnected but this session is abortable, queue the abort for when we reconnect.
155+
if (!host.connected && hasAbortableSessionRun(host)) {
140156
host.chatMessage = "";
141157
resetChatInputHistoryNavigation(host);
142-
host.pendingAbort = { runId: host.chatRunId, sessionKey: host.sessionKey };
158+
host.pendingAbort = { runId: activeRunId, sessionKey: host.sessionKey };
143159
return;
144160
}
145161
if (!host.connected) {

ui/src/ui/app-gateway.node.test.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,23 @@ describe("connectGateway", () => {
679679
expect(host.chatStream).toBeNull();
680680
});
681681

682+
it("sends queued session-scoped chat aborts after reconnect", async () => {
683+
const host = createHost();
684+
host.pendingAbort = { sessionKey: "main" };
685+
686+
connectGateway(host);
687+
const client = gatewayClientInstances[0];
688+
expect(client).toBeDefined();
689+
690+
client.emitHello();
691+
await Promise.resolve();
692+
693+
expect(client.request).toHaveBeenCalledWith("chat.abort", {
694+
sessionKey: "main",
695+
});
696+
expect(host.pendingAbort).toBeNull();
697+
});
698+
682699
it("logs and drops stale queued chat abort failures after reconnect", async () => {
683700
const host = createHost();
684701
host.pendingAbort = { runId: "run-stale", sessionKey: "main" };

ui/src/ui/app-gateway.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ type GatewayHost = {
102102
updateStatusBanner: { tone: "danger" | "warn" | "info"; text: string } | null;
103103
sessionKey: string;
104104
chatRunId: string | null;
105-
pendingAbort?: { runId: string; sessionKey: string } | null;
105+
pendingAbort?: { runId?: string | null; sessionKey: string } | null;
106106
refreshSessionsAfterChat: Set<string>;
107107
execApprovalQueue: ExecApprovalRequest[];
108108
execApprovalError: string | null;
@@ -439,10 +439,12 @@ export function connectGateway(host: GatewayHost, options?: ConnectGatewayOption
439439
const abort = host.pendingAbort;
440440
host.pendingAbort = null;
441441
void host.client
442-
.request("chat.abort", {
443-
sessionKey: abort.sessionKey,
444-
runId: abort.runId,
445-
})
442+
.request(
443+
"chat.abort",
444+
abort.runId
445+
? { sessionKey: abort.sessionKey, runId: abort.runId }
446+
: { sessionKey: abort.sessionKey },
447+
)
446448
.catch((err) => {
447449
// Log to console for diagnostics; user sees no feedback for a stale abort
448450
// since the run likely completed during the disconnect window anyway.

0 commit comments

Comments
 (0)