Skip to content

Commit 7ddd815

Browse files
Sathvik-1007mcaxtr
andauthored
fix(whatsapp): report transport activity so stale-socket health detection works (#72656)
Merged via squash. Prepared head SHA: 1b19207 Co-authored-by: Sathvik-1007 <195685832+Sathvik-1007@users.noreply.github.com> Co-authored-by: mcaxtr <7562095+mcaxtr@users.noreply.github.com> Reviewed-by: @mcaxtr
1 parent 8edb99f commit 7ddd815

10 files changed

Lines changed: 289 additions & 8 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ Docs: https://docs.openclaw.ai
9191
- Channels/WhatsApp: restrict pairing verification replies to real inbound user content, preventing unsolicited prompts from receipts, typing indicators, presence updates, and other non-message Baileys upserts. Fixes #73797. (#73823) Thanks @hclsys.
9292
- Configure/Ollama: show the configured Ollama model allowlist after Cloud only or Cloud + Local setup and skip slow per-model cloud metadata fetches. (#73995) Thanks @obviyus.
9393
- Channels/WhatsApp: detect explicit group `@mentions` again when the bot's own E.164 is in `allowFrom`, so shared-number setups no longer skip group pings that directly mention the bot. Fixes #49317. (#73453) Thanks @juan-flores077.
94+
- WhatsApp/reliability: publish real transport-liveness into WhatsApp channel status and force earlier reconnects on silent transport stalls, so quiet healthy sessions stay connected while wedged sockets recover before the later remote 408 path. (#72656) Thanks @Sathvik-1007.
9495

9596
## 2026.4.27
9697

docs/channels/whatsapp.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ OpenClaw recommends running WhatsApp on a separate number when possible. (The ch
150150
- Baileys socket timings are explicit under `web.whatsapp.*`: `keepAliveIntervalMs` controls WhatsApp Web application pings, `connectTimeoutMs` controls the opening handshake timeout, and `defaultQueryTimeoutMs` controls Baileys query timeouts.
151151
- Outbound sends require an active WhatsApp listener for the target account.
152152
- Status and broadcast chats are ignored (`@status`, `@broadcast`).
153+
- The reconnect watchdog follows WhatsApp Web transport activity, not only inbound app-message volume: quiet linked-device sessions stay up while transport frames continue, but a transport stall forces reconnect well before the later remote disconnect path.
153154
- Direct chats use DM session rules (`session.dmScope`; default `main` collapses DMs to the agent main session).
154155
- Group sessions are isolated (`agent:<agentId>:whatsapp:group:<jid>`).
155156
- WhatsApp Web transport honors standard proxy environment variables on the gateway host (`HTTPS_PROXY`, `HTTP_PROXY`, `NO_PROXY` / lowercase variants). Prefer host-level proxy config over channel-specific WhatsApp proxy settings.

extensions/whatsapp/src/auto-reply.test-harness.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,13 @@ type WebAutoReplyMonitorHarness = {
4545
run: Promise<unknown>;
4646
};
4747
type MockSessionSocket = {
48-
ev: { on: ReturnType<typeof vi.fn>; off: ReturnType<typeof vi.fn> };
49-
ws: EventEmitter & { close: ReturnType<typeof vi.fn> };
48+
ev: {
49+
on: ReturnType<typeof vi.fn>;
50+
off: ReturnType<typeof vi.fn>;
51+
};
52+
ws: EventEmitter & {
53+
close: ReturnType<typeof vi.fn>;
54+
};
5055
user: { id: string };
5156
};
5257

@@ -68,16 +73,16 @@ vi.mock("./session.js", async () => {
6873
createWaSocket: vi.fn(async () => {
6974
const ws = new EventEmitter() as MockSessionSocket["ws"];
7075
ws.close = vi.fn();
71-
const sock: MockSessionSocket = {
76+
const socket: MockSessionSocket = {
7277
ev: {
7378
on: vi.fn(),
7479
off: vi.fn(),
7580
},
7681
ws,
7782
user: { id: "123@s.whatsapp.net" },
7883
};
79-
getSessionSockets().push(sock);
80-
return sock;
84+
getSessionSockets().push(socket);
85+
return socket;
8186
}),
8287
waitForWaConnection: vi.fn().mockResolvedValue(undefined),
8388
};
@@ -309,6 +314,7 @@ export function startWebAutoReplyMonitor(params: {
309314
sleep: UnknownMock | AsyncUnknownMock;
310315
signal?: AbortSignal;
311316
heartbeatSeconds?: number;
317+
transportTimeoutMs?: number;
312318
messageTimeoutMs?: number;
313319
watchdogCheckMs?: number;
314320
reconnect?: { initialMs: number; maxMs: number; maxAttempts: number; factor: number };
@@ -326,6 +332,7 @@ export function startWebAutoReplyMonitor(params: {
326332
params.signal ?? controller.signal,
327333
{
328334
heartbeatSeconds: params.heartbeatSeconds ?? 1,
335+
transportTimeoutMs: params.transportTimeoutMs,
329336
messageTimeoutMs: params.messageTimeoutMs,
330337
watchdogCheckMs: params.watchdogCheckMs,
331338
reconnect: params.reconnect ?? { initialMs: 10, maxMs: 10, maxAttempts: 3, factor: 1.1 },

extensions/whatsapp/src/auto-reply.web-auto-reply.connection-and-logging.e2e.test.ts

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,92 @@ describe("web auto-reply connection", () => {
407407
socket.ws.emit("frame");
408408
await vi.advanceTimersByTimeAsync(20);
409409
}
410+
await vi.waitFor(
411+
() => {
412+
expect(scripted.getListenerCount()).toBeGreaterThanOrEqual(2);
413+
},
414+
{ timeout: 250, interval: 2 },
415+
);
416+
417+
controller.abort();
418+
scripted.resolveClose(scripted.getListenerCount() - 1, {
419+
status: 499,
420+
isLoggedOut: false,
421+
error: "aborted",
422+
});
423+
await Promise.resolve();
424+
await run;
425+
} finally {
426+
vi.useRealTimers();
427+
}
428+
});
429+
430+
it("publishes frame-driven transport activity for quiet sessions", async () => {
431+
vi.useFakeTimers();
432+
try {
433+
const sleep = vi.fn(async () => {});
434+
const statuses: Array<Record<string, unknown>> = [];
435+
const scripted = createScriptedWebListenerFactory();
436+
const { controller, run } = startWebAutoReplyMonitor({
437+
monitorWebChannelFn: monitorWebChannel as never,
438+
listenerFactory: scripted.listenerFactory,
439+
sleep,
440+
heartbeatSeconds: 1,
441+
transportTimeoutMs: 60_000,
442+
messageTimeoutMs: 60_000,
443+
watchdogCheckMs: 5,
444+
statusSink: (next) => statuses.push({ ...next }),
445+
});
446+
447+
await vi.waitFor(
448+
() => {
449+
expect(scripted.getListenerCount()).toBe(1);
450+
},
451+
{ timeout: 250, interval: 2 },
452+
);
453+
454+
const initialTransportAt = Number(statuses.at(-1)?.lastTransportActivityAt ?? 0);
455+
const socket = getLastWebAutoReplySessionSocket();
456+
await vi.advanceTimersByTimeAsync(250);
457+
socket.ws.emit("frame");
458+
await vi.advanceTimersByTimeAsync(1_000);
459+
460+
const lastTransportAt = Number(statuses.at(-1)?.lastTransportActivityAt ?? 0);
461+
expect(lastTransportAt).toBeGreaterThan(initialTransportAt);
462+
463+
controller.abort();
464+
scripted.resolveClose(0, { status: 499, isLoggedOut: false, error: "aborted" });
465+
await Promise.resolve();
466+
await run;
467+
} finally {
468+
vi.useRealTimers();
469+
}
470+
});
410471

472+
it("reconnects on transport stall before the long app-silence window", async () => {
473+
vi.useFakeTimers();
474+
try {
475+
const sleep = vi.fn(async () => {});
476+
const scripted = createScriptedWebListenerFactory();
477+
const { controller, run } = startWebAutoReplyMonitor({
478+
monitorWebChannelFn: monitorWebChannel as never,
479+
listenerFactory: scripted.listenerFactory,
480+
sleep,
481+
heartbeatSeconds: 1,
482+
transportTimeoutMs: 30,
483+
messageTimeoutMs: 3_000,
484+
watchdogCheckMs: 5,
485+
});
486+
487+
await vi.waitFor(
488+
() => {
489+
expect(scripted.getListenerCount()).toBe(1);
490+
},
491+
{ timeout: 250, interval: 2 },
492+
);
493+
494+
await vi.advanceTimersByTimeAsync(36);
495+
await Promise.resolve();
411496
await vi.waitFor(
412497
() => {
413498
expect(scripted.getListenerCount()).toBeGreaterThanOrEqual(2);
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import { describe, expect, it } from "vitest";
2+
import { createWebChannelStatusController } from "./monitor-state.js";
3+
4+
describe("createWebChannelStatusController", () => {
5+
it("sets lastTransportActivityAt on noteConnected", () => {
6+
const patches: Record<string, unknown>[] = [];
7+
const controller = createWebChannelStatusController((s) => patches.push({ ...s }));
8+
9+
controller.noteConnected(1000);
10+
11+
const last = patches.at(-1)!;
12+
expect(last.connected).toBe(true);
13+
expect(last.lastTransportActivityAt).toBe(1000);
14+
});
15+
16+
it("updates lastTransportActivityAt on noteInbound", () => {
17+
const patches: Record<string, unknown>[] = [];
18+
const controller = createWebChannelStatusController((s) => patches.push({ ...s }));
19+
20+
controller.noteConnected(1000);
21+
controller.noteInbound(2000);
22+
23+
const last = patches.at(-1)!;
24+
expect(last.lastTransportActivityAt).toBe(2000);
25+
});
26+
27+
it("updates lastTransportActivityAt from explicit transport activity", () => {
28+
const patches: Record<string, unknown>[] = [];
29+
const controller = createWebChannelStatusController((s) => patches.push({ ...s }));
30+
31+
controller.noteConnected(1000);
32+
controller.noteTransportActivity(3000);
33+
34+
const last = patches.at(-1)!;
35+
expect(last.lastTransportActivityAt).toBe(3000);
36+
});
37+
38+
it("does not set lastTransportActivityAt on noteWatchdogStale", () => {
39+
const patches: Record<string, unknown>[] = [];
40+
const controller = createWebChannelStatusController((s) => patches.push({ ...s }));
41+
42+
controller.noteConnected(1000);
43+
controller.noteWatchdogStale(5000);
44+
45+
const last = patches.at(-1)!;
46+
// Watchdog staleness should not refresh transport activity — it means
47+
// the check loop is running but the socket itself is idle/stale.
48+
expect(last.lastTransportActivityAt).toBe(1000);
49+
});
50+
51+
it("produces snapshots that enable stale-socket health detection", () => {
52+
const patches: Record<string, unknown>[] = [];
53+
const controller = createWebChannelStatusController((s) => patches.push({ ...s }));
54+
55+
controller.noteConnected(1000);
56+
57+
const last = patches.at(-1)!;
58+
// The gateway health policy checks `connected === true && lastTransportActivityAt != null`
59+
// to decide whether to run stale-socket detection. Both must be present.
60+
expect(last.connected).toBe(true);
61+
expect(last.lastTransportActivityAt).not.toBeNull();
62+
expect(typeof last.lastTransportActivityAt).toBe("number");
63+
});
64+
});

extensions/whatsapp/src/auto-reply/monitor-state.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
import { createConnectedChannelStatusPatch } from "openclaw/plugin-sdk/gateway-runtime";
1+
import {
2+
createConnectedChannelStatusPatch,
3+
createTransportActivityStatusPatch,
4+
} from "openclaw/plugin-sdk/gateway-runtime";
25
import type { WebChannelHealthState, WebChannelStatus } from "./types.js";
36

47
function cloneStatus(status: WebChannelStatus): WebChannelStatus {
@@ -35,6 +38,7 @@ export function createWebChannelStatusController(statusSink?: (status: WebChanne
3538
snapshot: () => status,
3639
noteConnected(at = Date.now()) {
3740
Object.assign(status, createConnectedChannelStatusPatch(at));
41+
Object.assign(status, createTransportActivityStatusPatch(at));
3842
status.lastError = null;
3943
status.healthState = "healthy";
4044
emit();
@@ -43,11 +47,19 @@ export function createWebChannelStatusController(statusSink?: (status: WebChanne
4347
status.lastInboundAt = at;
4448
status.lastMessageAt = at;
4549
status.lastEventAt = at;
50+
Object.assign(status, createTransportActivityStatusPatch(at));
4651
if (status.connected) {
4752
status.healthState = "healthy";
4853
}
4954
emit();
5055
},
56+
noteTransportActivity(at = Date.now()) {
57+
if (status.lastTransportActivityAt === at) {
58+
return;
59+
}
60+
Object.assign(status, createTransportActivityStatusPatch(at));
61+
emit();
62+
},
5163
noteWatchdogStale(at = Date.now()) {
5264
status.lastEventAt = at;
5365
if (status.connected) {

extensions/whatsapp/src/auto-reply/monitor.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ async function clearTerminalWebAuthState(params: {
134134
);
135135
}
136136
}
137+
const DEFAULT_TRANSPORT_TIMEOUT_MS = 5 * 60 * 1000;
137138

138139
export async function monitorWebChannel(
139140
verbose: boolean,
@@ -220,6 +221,7 @@ export async function monitorWebChannel(
220221
};
221222
process.once("SIGINT", handleSigint);
222223

224+
const transportTimeoutMs = tuning.transportTimeoutMs ?? DEFAULT_TRANSPORT_TIMEOUT_MS;
223225
const messageTimeoutMs = tuning.messageTimeoutMs ?? 30 * 60 * 1000;
224226
const watchdogCheckMs = tuning.watchdogCheckMs ?? 60 * 1000;
225227
const controller = new WhatsAppConnectionController({
@@ -228,6 +230,7 @@ export async function monitorWebChannel(
228230
verbose,
229231
keepAlive,
230232
heartbeatSeconds,
233+
transportTimeoutMs,
231234
messageTimeoutMs,
232235
watchdogCheckMs,
233236
reconnectPolicy,
@@ -328,6 +331,7 @@ export async function monitorWebChannel(
328331
? { minutesSinceLastMessage }
329332
: {}),
330333
};
334+
statusController.noteTransportActivity(snapshot.lastTransportActivityAt);
331335

332336
if (minutesSinceLastMessage && minutesSinceLastMessage > 30) {
333337
heartbeatLogger.warn(
@@ -345,7 +349,7 @@ export async function monitorWebChannel(
345349
const minutesSinceTransportActivity = Math.floor(transportSilentMs / 60000);
346350
const minutesSinceAppActivity = Math.floor((now - appBaselineAt) / 60000);
347351
const watchdogReason =
348-
transportSilentMs > messageTimeoutMs ? "transport-inactive" : "app-silent";
352+
transportSilentMs > transportTimeoutMs ? "transport-inactive" : "app-silent";
349353
statusController.noteWatchdogStale();
350354
heartbeatLogger.warn(
351355
{

extensions/whatsapp/src/auto-reply/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export type WebChannelStatus = {
2727
lastInboundAt?: number | null;
2828
lastMessageAt?: number | null;
2929
lastEventAt?: number | null;
30+
lastTransportActivityAt?: number | null;
3031
lastError?: string | null;
3132
healthState?: WebChannelHealthState;
3233
};
@@ -35,6 +36,7 @@ export type WebMonitorTuning = {
3536
reconnect?: Partial<ReconnectPolicy>;
3637
socketTiming?: WhatsAppSocketTimingOptions;
3738
heartbeatSeconds?: number;
39+
transportTimeoutMs?: number;
3840
messageTimeoutMs?: number;
3941
watchdogCheckMs?: number;
4042
sleep?: (ms: number, signal?: AbortSignal) => Promise<void>;

0 commit comments

Comments
 (0)