Skip to content

Commit 255e126

Browse files
fix(feishu): reconcile WebSocket reconnect backoff
1 parent 43da089 commit 255e126

5 files changed

Lines changed: 392 additions & 37 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
1313

1414
### Fixes
1515

16+
- Channels/Feishu: keep WebSocket reconnect supervision aligned with the Lark SDK reconnect loop, retry only after app-layer startup failure or SDK reconnect exhaustion, and guard invalid heartbeat config so recovery no longer stalls or spins. Fixes #55532; carries forward #55619. Thanks @sirfengyu and @alex-xuweilong.
1617
- Gateway/shutdown: report structured shutdown warnings and HTTP close timeout warnings through `ShutdownResult` while preserving lifecycle hook hardening. Carries forward #41296. Thanks @edenfunf.
1718
- Plugins/QA: prebuild the private QA channel runtime before plugin gauntlet source runs so wrapper CPU/RSS measurements are not polluted by private QA dist rebuild work. Thanks @vincentkoc.
1819
- Gateway/reload: bound default restart deferral and SIGUSR1 restart drain to five minutes while preserving explicit `deferralTimeoutMs: 0` indefinite waits, so stale active work accounting cannot block config reloads forever. Thanks @vincentkoc.

extensions/feishu/src/client.test.ts

Lines changed: 134 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ function isRecord(value: unknown): value is Record<string, unknown> {
107107
}
108108

109109
type HttpInstanceLike = {
110+
request: (options?: Record<string, unknown>) => Promise<unknown>;
110111
get: (url: string, options?: Record<string, unknown>) => Promise<unknown>;
111112
post: (url: string, body?: unknown, options?: Record<string, unknown>) => Promise<unknown>;
112113
};
@@ -119,9 +120,36 @@ function readCallOptions(
119120
return isRecord(call) ? call : {};
120121
}
121122

122-
function firstWsClientOptions(): { agent?: unknown; wsConfig?: unknown } {
123+
function firstWsClientOptions(): {
124+
agent?: unknown;
125+
autoReconnect?: unknown;
126+
httpInstance?: HttpInstanceLike;
127+
onReady?: unknown;
128+
onError?: unknown;
129+
onReconnecting?: unknown;
130+
onReconnected?: unknown;
131+
} {
123132
const options = readCallOptions(wsClientCtorMock, 0);
124-
return { agent: options.agent, wsConfig: options.wsConfig };
133+
const httpInstance = options.httpInstance;
134+
return {
135+
agent: options.agent,
136+
autoReconnect: options.autoReconnect,
137+
httpInstance:
138+
isRecord(httpInstance) &&
139+
typeof httpInstance.request === "function" &&
140+
typeof httpInstance.get === "function" &&
141+
typeof httpInstance.post === "function"
142+
? {
143+
request: httpInstance.request as HttpInstanceLike["request"],
144+
get: httpInstance.get as HttpInstanceLike["get"],
145+
post: httpInstance.post as HttpInstanceLike["post"],
146+
}
147+
: undefined,
148+
onReady: options.onReady,
149+
onError: options.onError,
150+
onReconnecting: options.onReconnecting,
151+
onReconnected: options.onReconnected,
152+
};
125153
}
126154

127155
beforeAll(async () => {
@@ -197,10 +225,12 @@ describe("createFeishuClient HTTP timeout", () => {
197225
const httpInstance = readCallOptions(clientCtorMock).httpInstance;
198226
if (
199227
isRecord(httpInstance) &&
228+
typeof httpInstance.request === "function" &&
200229
typeof httpInstance.get === "function" &&
201230
typeof httpInstance.post === "function"
202231
) {
203232
return {
233+
request: httpInstance.request as HttpInstanceLike["request"],
204234
get: httpInstance.get as HttpInstanceLike["get"],
205235
post: httpInstance.post as HttpInstanceLike["post"],
206236
};
@@ -345,13 +375,111 @@ describe("createFeishuClient HTTP timeout", () => {
345375
});
346376

347377
describe("createFeishuWSClient proxy handling", () => {
348-
it("passes heartbeat wsConfig defaults to Lark.WSClient", async () => {
378+
it("passes a guarded HTTP instance and lifecycle hooks to Lark.WSClient", async () => {
379+
const lifecycleHooks = {
380+
onReady: vi.fn(),
381+
onError: vi.fn(),
382+
onReconnecting: vi.fn(),
383+
onReconnected: vi.fn(),
384+
};
385+
386+
await createFeishuWSClient(baseAccount, lifecycleHooks);
387+
388+
const options = firstWsClientOptions();
389+
expect(options.autoReconnect).toBe(true);
390+
expect(options.httpInstance).toBeDefined();
391+
expect(options.onReady).toBe(lifecycleHooks.onReady);
392+
expect(options.onError).toBe(lifecycleHooks.onError);
393+
expect(options.onReconnecting).toBe(lifecycleHooks.onReconnecting);
394+
expect(options.onReconnected).toBe(lifecycleHooks.onReconnected);
395+
});
396+
397+
it("guards invalid websocket heartbeat and reconnect endpoint config", async () => {
398+
mockBaseHttpInstance.request.mockResolvedValueOnce({
399+
code: 0,
400+
data: {
401+
URL: "wss://example.test/ws",
402+
ClientConfig: {
403+
PingInterval: 0,
404+
ReconnectCount: -2,
405+
ReconnectInterval: Number.NaN,
406+
ReconnectNonce: -1,
407+
ExtraField: "kept",
408+
},
409+
},
410+
});
411+
412+
await createFeishuWSClient(baseAccount);
413+
414+
const options = firstWsClientOptions();
415+
const response = await options.httpInstance?.request({ url: "https://example.test/ws-config" });
416+
expect(mockBaseHttpInstance.request).toHaveBeenCalledWith(
417+
expect.objectContaining({ timeout: FEISHU_HTTP_TIMEOUT_MS }),
418+
);
419+
expect(response).toEqual({
420+
code: 0,
421+
data: {
422+
URL: "wss://example.test/ws",
423+
ClientConfig: {
424+
ExtraField: "kept",
425+
PingInterval: 30,
426+
ReconnectCount: -1,
427+
ReconnectInterval: 120,
428+
ReconnectNonce: 30,
429+
},
430+
},
431+
});
432+
});
433+
434+
it("preserves valid websocket endpoint reconnect config", async () => {
435+
mockBaseHttpInstance.request.mockResolvedValueOnce({
436+
code: 0,
437+
data: {
438+
URL: "wss://example.test/ws",
439+
ClientConfig: {
440+
PingInterval: 45,
441+
ReconnectCount: 2.7,
442+
ReconnectInterval: 15,
443+
ReconnectNonce: 0,
444+
},
445+
},
446+
});
447+
448+
await createFeishuWSClient(baseAccount);
449+
450+
const options = firstWsClientOptions();
451+
const response = await options.httpInstance?.request({ url: "https://example.test/ws-config" });
452+
expect(response).toEqual({
453+
code: 0,
454+
data: {
455+
URL: "wss://example.test/ws",
456+
ClientConfig: {
457+
PingInterval: 45,
458+
ReconnectCount: 2,
459+
ReconnectInterval: 15,
460+
ReconnectNonce: 0,
461+
},
462+
},
463+
});
464+
});
465+
466+
it("leaves non-websocket endpoint responses unchanged", async () => {
467+
mockBaseHttpInstance.request.mockResolvedValueOnce({
468+
code: 0,
469+
data: {
470+
value: true,
471+
},
472+
});
473+
349474
await createFeishuWSClient(baseAccount);
350475

351476
const options = firstWsClientOptions();
352-
expect(options.wsConfig).toEqual({
353-
PingInterval: 30,
354-
PingTimeout: 3,
477+
const response = await options.httpInstance?.request({ url: "https://example.test/other" });
478+
expect(response).toEqual({
479+
code: 0,
480+
data: {
481+
value: true,
482+
},
355483
});
356484
});
357485

extensions/feishu/src/client.ts

Lines changed: 97 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ export { pluginVersion };
1515
const FEISHU_USER_AGENT = `openclaw-feishu-builtin/${pluginVersion}/${process.platform}`;
1616
export { FEISHU_USER_AGENT };
1717

18-
const FEISHU_WS_CONFIG = {
18+
const FEISHU_WS_CLIENT_CONFIG_DEFAULTS = {
1919
PingInterval: 30,
20-
PingTimeout: 3,
20+
ReconnectCount: -1,
21+
ReconnectInterval: 120,
22+
ReconnectNonce: 30,
2123
} as const;
2224

2325
/** User-Agent header value for all Feishu API requests. */
@@ -87,6 +89,71 @@ type FeishuHttpInstanceLike = Pick<
8789
"request" | "get" | "post" | "put" | "patch" | "delete" | "head" | "options"
8890
>;
8991

92+
export type FeishuWsLifecycleHooks = {
93+
onReady?: () => void;
94+
onError?: (err: Error) => void;
95+
onReconnecting?: () => void;
96+
onReconnected?: () => void;
97+
};
98+
99+
type FeishuHttpResponseTransform = <R>(response: R) => R;
100+
101+
function isRecord(value: unknown): value is Record<string, unknown> {
102+
return typeof value === "object" && value !== null;
103+
}
104+
105+
function coercePositiveNumber(value: unknown, fallback: number): number {
106+
return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
107+
}
108+
109+
function coerceNonNegativeNumber(value: unknown, fallback: number): number {
110+
return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : fallback;
111+
}
112+
113+
function coerceReconnectCount(value: unknown, fallback: number): number {
114+
if (typeof value !== "number" || !Number.isFinite(value)) {
115+
return fallback;
116+
}
117+
const normalized = Math.floor(value);
118+
return normalized >= -1 ? normalized : fallback;
119+
}
120+
121+
function sanitizeFeishuWsEndpointResponse<R>(response: R): R {
122+
if (!isRecord(response) || !isRecord(response.data)) {
123+
return response;
124+
}
125+
const clientConfig = response.data.ClientConfig;
126+
if (!isRecord(clientConfig)) {
127+
return response;
128+
}
129+
130+
return {
131+
...response,
132+
data: {
133+
...response.data,
134+
ClientConfig: {
135+
...clientConfig,
136+
PingInterval: coercePositiveNumber(
137+
clientConfig.PingInterval,
138+
FEISHU_WS_CLIENT_CONFIG_DEFAULTS.PingInterval,
139+
),
140+
ReconnectCount: coerceReconnectCount(
141+
clientConfig.ReconnectCount,
142+
FEISHU_WS_CLIENT_CONFIG_DEFAULTS.ReconnectCount,
143+
),
144+
ReconnectInterval: coercePositiveNumber(
145+
clientConfig.ReconnectInterval,
146+
FEISHU_WS_CLIENT_CONFIG_DEFAULTS.ReconnectInterval,
147+
),
148+
ReconnectNonce: coerceNonNegativeNumber(
149+
clientConfig.ReconnectNonce,
150+
FEISHU_WS_CLIENT_CONFIG_DEFAULTS.ReconnectNonce,
151+
),
152+
},
153+
},
154+
} as R;
155+
}
156+
90157
async function getWsProxyAgent() {
91158
return resolveAmbientNodeProxyAgent<Agent>();
92159
}
@@ -115,22 +182,30 @@ function resolveDomain(domain: FeishuDomain | undefined): Lark.Domain | string {
115182
* but injects a default request timeout and User-Agent header to prevent
116183
* indefinite hangs and set a standardized User-Agent per OAPI best practices.
117184
*/
118-
function createTimeoutHttpInstance(defaultTimeoutMs: number): Lark.HttpInstance {
185+
function createTimeoutHttpInstance(
186+
defaultTimeoutMs: number,
187+
transformResponse?: FeishuHttpResponseTransform,
188+
): Lark.HttpInstance {
119189
const base: FeishuHttpInstanceLike = feishuClientSdk.defaultHttpInstance;
120190

121191
function injectTimeout<D>(opts?: Lark.HttpRequestOptions<D>): Lark.HttpRequestOptions<D> {
122192
return { timeout: defaultTimeoutMs, ...opts } as Lark.HttpRequestOptions<D>;
123193
}
124194

195+
async function transform<R>(promise: Promise<R>): Promise<R> {
196+
const response = await promise;
197+
return transformResponse ? transformResponse(response) : response;
198+
}
199+
125200
return {
126-
request: (opts) => base.request(injectTimeout(opts)),
127-
get: (url, opts) => base.get(url, injectTimeout(opts)),
128-
post: (url, data, opts) => base.post(url, data, injectTimeout(opts)),
129-
put: (url, data, opts) => base.put(url, data, injectTimeout(opts)),
130-
patch: (url, data, opts) => base.patch(url, data, injectTimeout(opts)),
131-
delete: (url, opts) => base.delete(url, injectTimeout(opts)),
132-
head: (url, opts) => base.head(url, injectTimeout(opts)),
133-
options: (url, opts) => base.options(url, injectTimeout(opts)),
201+
request: (opts) => transform(base.request(injectTimeout(opts))),
202+
get: (url, opts) => transform(base.get(url, injectTimeout(opts))),
203+
post: (url, data, opts) => transform(base.post(url, data, injectTimeout(opts))),
204+
put: (url, data, opts) => transform(base.put(url, data, injectTimeout(opts))),
205+
patch: (url, data, opts) => transform(base.patch(url, data, injectTimeout(opts))),
206+
delete: (url, opts) => transform(base.delete(url, injectTimeout(opts))),
207+
head: (url, opts) => transform(base.head(url, injectTimeout(opts))),
208+
options: (url, opts) => transform(base.options(url, injectTimeout(opts))),
134209
};
135210
}
136211

@@ -224,8 +299,12 @@ export function createFeishuClient(creds: FeishuClientCredentials): Lark.Client
224299
* Create a Feishu WebSocket client for an account.
225300
* Note: WSClient is not cached since each call creates a new connection.
226301
*/
227-
export async function createFeishuWSClient(account: ResolvedFeishuAccount): Promise<Lark.WSClient> {
302+
export async function createFeishuWSClient(
303+
account: ResolvedFeishuAccount,
304+
lifecycleHooks: FeishuWsLifecycleHooks = {},
305+
): Promise<Lark.WSClient> {
228306
const { accountId, appId, appSecret, domain } = account;
307+
const defaultHttpTimeoutMs = resolveConfiguredHttpTimeoutMs(account);
229308

230309
if (!appId || !appSecret) {
231310
throw new Error(`Feishu credentials not configured for account "${accountId}"`);
@@ -237,10 +316,13 @@ export async function createFeishuWSClient(account: ResolvedFeishuAccount): Prom
237316
appSecret,
238317
domain: resolveDomain(domain),
239318
loggerLevel: feishuClientSdk.LoggerLevel.info,
240-
wsConfig: FEISHU_WS_CONFIG,
319+
httpInstance: createTimeoutHttpInstance(defaultHttpTimeoutMs, sanitizeFeishuWsEndpointResponse),
320+
autoReconnect: true,
321+
onReady: lifecycleHooks.onReady,
322+
onError: lifecycleHooks.onError,
323+
onReconnecting: lifecycleHooks.onReconnecting,
324+
onReconnected: lifecycleHooks.onReconnected,
241325
...(agent ? { agent } : {}),
242-
} as ConstructorParameters<typeof feishuClientSdk.WSClient>[0] & {
243-
wsConfig: typeof FEISHU_WS_CONFIG;
244326
});
245327
}
246328

0 commit comments

Comments
 (0)