Skip to content

Commit 7dfabab

Browse files
committed
fix(feishu): repair WebSocket reconnect and heartbeat config
1 parent 199d5f7 commit 7dfabab

5 files changed

Lines changed: 140 additions & 41 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
66

77
### Fixes
88

9+
- Feishu: retry WebSocket startup failures with monitor-owned backoff while preserving SDK-local heartbeat defaults, so persistent-connection startup failures no longer leave the monitor hung. Fixes #68766; related #42354 and #55532. Thanks @alex-xuweilong, @120106835, @sirfengyu, and @tianhaocui.
910
- Feishu: extract quoted/replied interactive-card text across schema 1.0, schema 2.0, i18n, template-variable, and post-format fallback shapes without carrying broad generated/config churn from related parser experiments. (#38776, #60383, #42218, #45936) Thanks @lishuaigit, @lskun, @just2gooo, and @Br1an67.
1011
- Exec approvals: accept a symlinked `OPENCLAW_HOME` as the trusted approvals root while still rejecting symlinked `.openclaw` path components below it. (#64663) Thanks @FunJim.
1112
- Logging: add top-level `hostname`, flattened `message`, and available `agent_id`, `session_id`, and `channel` fields to file-log JSONL records for multi-agent filtering without removing existing structured log arguments. Fixes #51075. Thanks @stevengonsalvez.

extensions/feishu/src/client.test.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ function readCallOptions(
124124
return isRecord(call) ? call : {};
125125
}
126126

127-
function firstWsClientOptions(): { agent?: unknown } {
127+
function firstWsClientOptions(): { agent?: unknown; wsConfig?: unknown } {
128128
const options = readCallOptions(wsClientCtorMock, 0);
129-
return { agent: options.agent };
129+
return { agent: options.agent, wsConfig: options.wsConfig };
130130
}
131131

132132
beforeAll(async () => {
@@ -350,6 +350,16 @@ describe("createFeishuClient HTTP timeout", () => {
350350
});
351351

352352
describe("createFeishuWSClient proxy handling", () => {
353+
it("passes heartbeat wsConfig defaults to Lark.WSClient", async () => {
354+
await createFeishuWSClient(baseAccount);
355+
356+
const options = firstWsClientOptions();
357+
expect(options.wsConfig).toEqual({
358+
PingInterval: 30,
359+
PingTimeout: 3,
360+
});
361+
});
362+
353363
it("does not set a ws proxy agent when proxy env is absent", async () => {
354364
await createFeishuWSClient(baseAccount);
355365

extensions/feishu/src/client.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ export { pluginVersion };
1515
const FEISHU_USER_AGENT = `openclaw-feishu-builtin/${pluginVersion}/${process.platform}`;
1616
export { FEISHU_USER_AGENT };
1717

18+
const FEISHU_WS_CONFIG = {
19+
PingInterval: 30,
20+
PingTimeout: 3,
21+
} as const;
22+
1823
/** User-Agent header value for all Feishu API requests. */
1924
export function getFeishuUserAgent(): string {
2025
return FEISHU_USER_AGENT;
@@ -232,7 +237,10 @@ export async function createFeishuWSClient(account: ResolvedFeishuAccount): Prom
232237
appSecret,
233238
domain: resolveDomain(domain),
234239
loggerLevel: feishuClientSdk.LoggerLevel.info,
240+
wsConfig: FEISHU_WS_CONFIG,
235241
...(agent ? { agent } : {}),
242+
} as ConstructorParameters<typeof feishuClientSdk.WSClient>[0] & {
243+
wsConfig: typeof FEISHU_WS_CONFIG;
236244
});
237245
}
238246

extensions/feishu/src/monitor.cleanup.test.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ function createWsClient(): MockWsClient {
3838
}
3939

4040
afterEach(() => {
41+
vi.useRealTimers();
4142
stopFeishuMonitorState();
4243
vi.clearAllMocks();
4344
});
@@ -79,6 +80,54 @@ describe("feishu websocket cleanup", () => {
7980
expect(botNames.has(accountId)).toBe(false);
8081
});
8182

83+
it("retries with backoff after websocket start rejects", async () => {
84+
vi.useFakeTimers();
85+
const failedClient = createWsClient();
86+
failedClient.start.mockRejectedValueOnce(new Error("connect failed"));
87+
const recoveredClient = createWsClient();
88+
createFeishuWSClientMock
89+
.mockResolvedValueOnce(failedClient)
90+
.mockResolvedValueOnce(recoveredClient);
91+
92+
const abortController = new AbortController();
93+
const runtime = {
94+
log: vi.fn(),
95+
error: vi.fn(),
96+
exit: vi.fn(),
97+
};
98+
const accountId = "retry";
99+
100+
const monitorPromise = monitorWebSocket({
101+
account: createAccount(accountId),
102+
accountId,
103+
runtime,
104+
abortSignal: abortController.signal,
105+
eventDispatcher: {} as never,
106+
});
107+
108+
await vi.waitFor(() => {
109+
expect(failedClient.start).toHaveBeenCalledTimes(1);
110+
expect(failedClient.close).toHaveBeenCalledTimes(1);
111+
expect(wsClients.has(accountId)).toBe(false);
112+
});
113+
114+
await vi.advanceTimersByTimeAsync(1_000);
115+
116+
await vi.waitFor(() => {
117+
expect(recoveredClient.start).toHaveBeenCalledTimes(1);
118+
expect(wsClients.get(accountId)).toBe(recoveredClient);
119+
});
120+
121+
abortController.abort();
122+
await monitorPromise;
123+
124+
expect(createFeishuWSClientMock).toHaveBeenCalledTimes(2);
125+
expect(recoveredClient.close).toHaveBeenCalledTimes(1);
126+
expect(runtime.error).toHaveBeenCalledWith(
127+
expect.stringContaining("WebSocket start failed, retrying in 1000ms"),
128+
);
129+
});
130+
82131
it("closes targeted websocket clients during stop cleanup", () => {
83132
const alphaClient = createWsClient();
84133
const betaClient = createWsClient();

extensions/feishu/src/monitor.transport.ts

Lines changed: 70 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import crypto from "node:crypto";
22
import * as http from "node:http";
33
import * as Lark from "@larksuiteoapi/node-sdk";
4+
import { waitForAbortableDelay } from "./async.js";
45
import { createFeishuWSClient } from "./client.js";
56
import {
67
applyBasicWebhookRequestGuards,
@@ -29,6 +30,9 @@ export type MonitorTransportParams = {
2930
eventDispatcher: Lark.EventDispatcher;
3031
};
3132

33+
const FEISHU_WS_RECONNECT_INITIAL_DELAY_MS = 1_000;
34+
const FEISHU_WS_RECONNECT_MAX_DELAY_MS = 30_000;
35+
3236
function isFeishuWebhookPayload(value: unknown): value is Record<string, unknown> {
3337
return !!value && typeof value === "object" && !Array.isArray(value);
3438
}
@@ -82,6 +86,40 @@ function respondText(res: http.ServerResponse, statusCode: number, body: string)
8286
res.end(body);
8387
}
8488

89+
function getFeishuWsReconnectDelayMs(attempt: number): number {
90+
return Math.min(
91+
FEISHU_WS_RECONNECT_INITIAL_DELAY_MS * 2 ** Math.max(0, attempt - 1),
92+
FEISHU_WS_RECONNECT_MAX_DELAY_MS,
93+
);
94+
}
95+
96+
function cleanupFeishuWsClient(params: {
97+
accountId: string;
98+
wsClient?: Lark.WSClient;
99+
error: (message: string) => void;
100+
}): void {
101+
const { accountId, wsClient, error } = params;
102+
if (wsClient) {
103+
try {
104+
wsClient.close();
105+
} catch (err) {
106+
error(`feishu[${accountId}]: error closing WebSocket client: ${String(err)}`);
107+
}
108+
}
109+
wsClients.delete(accountId);
110+
botOpenIds.delete(accountId);
111+
botNames.delete(accountId);
112+
}
113+
114+
function waitForFeishuWsAbort(abortSignal?: AbortSignal): Promise<void> {
115+
if (abortSignal?.aborted) {
116+
return Promise.resolve();
117+
}
118+
return new Promise((resolve) => {
119+
abortSignal?.addEventListener("abort", () => resolve(), { once: true });
120+
});
121+
}
122+
85123
export async function monitorWebSocket({
86124
account,
87125
accountId,
@@ -91,53 +129,46 @@ export async function monitorWebSocket({
91129
}: MonitorTransportParams): Promise<void> {
92130
const log = runtime?.log ?? console.log;
93131
const error = runtime?.error ?? console.error;
94-
log(`feishu[${accountId}]: starting WebSocket connection...`);
95-
96-
const wsClient = await createFeishuWSClient(account);
97-
wsClients.set(accountId, wsClient);
98-
99-
return new Promise((resolve, reject) => {
100-
let cleanedUp = false;
101-
102-
const cleanup = () => {
103-
if (cleanedUp) {
104-
return;
105-
}
106-
cleanedUp = true;
107-
abortSignal?.removeEventListener("abort", handleAbort);
108-
try {
109-
wsClient.close();
110-
} catch (err) {
111-
error(`feishu[${accountId}]: error closing WebSocket client: ${String(err)}`);
112-
} finally {
113-
wsClients.delete(accountId);
114-
botOpenIds.delete(accountId);
115-
botNames.delete(accountId);
116-
}
117-
};
118-
119-
function handleAbort() {
120-
log(`feishu[${accountId}]: abort signal received, stopping`);
121-
cleanup();
122-
resolve();
123-
}
124132

133+
let attempt = 0;
134+
while (true) {
125135
if (abortSignal?.aborted) {
126-
cleanup();
127-
resolve();
128-
return;
136+
break;
129137
}
130138

131-
abortSignal?.addEventListener("abort", handleAbort, { once: true });
132-
139+
let wsClient: Lark.WSClient | undefined;
133140
try {
134-
void wsClient.start({ eventDispatcher });
141+
log(`feishu[${accountId}]: starting WebSocket connection...`);
142+
wsClient = await createFeishuWSClient(account);
143+
if (abortSignal?.aborted) {
144+
cleanupFeishuWsClient({ accountId, wsClient, error });
145+
break;
146+
}
147+
wsClients.set(accountId, wsClient);
148+
await wsClient.start({ eventDispatcher });
149+
attempt = 0;
135150
log(`feishu[${accountId}]: WebSocket client started`);
151+
await waitForFeishuWsAbort(abortSignal);
152+
log(`feishu[${accountId}]: abort signal received, stopping`);
153+
cleanupFeishuWsClient({ accountId, wsClient, error });
154+
return;
136155
} catch (err) {
137-
cleanup();
138-
reject(err);
156+
cleanupFeishuWsClient({ accountId, wsClient, error });
157+
if (abortSignal?.aborted) {
158+
break;
159+
}
160+
161+
attempt += 1;
162+
const delayMs = getFeishuWsReconnectDelayMs(attempt);
163+
error(
164+
`feishu[${accountId}]: WebSocket start failed, retrying in ${delayMs}ms: ${String(err)}`,
165+
);
166+
const shouldRetry = await waitForAbortableDelay(delayMs, abortSignal);
167+
if (!shouldRetry) {
168+
break;
169+
}
139170
}
140-
});
171+
}
141172
}
142173

143174
export async function monitorWebhook({

0 commit comments

Comments
 (0)