@@ -213,68 +213,67 @@ export async function monitorWebSocket({
213213 botNames . delete ( accountId ) ;
214214 } ;
215215
216- if ( abortSignal ?. aborted ) {
217- cleanup ( ) ;
218- return ;
219- }
220-
221- let supervisorAttempt = 0 ;
222-
223- // Supervisor loop: each iteration creates a fresh WSClient and runs it until
224- // either (a) abort is requested or (b) the SDK's internal retry budget is
225- // exhausted. We then back off and start a new cycle.
226- while ( ! abortSignal ?. aborted ) {
227- log (
228- `feishu[${ accountId } ]: starting WebSocket connection... (supervisor cycle ${ supervisorAttempt + 1 } )` ,
229- ) ;
230-
231- let wsClient : Lark . WSClient ;
232- try {
233- wsClient = createFeishuWSClient ( account ) ;
234- } catch ( err ) {
235- // Non-recoverable config error (missing credentials, etc.).
236- cleanup ( ) ;
237- throw err ;
216+ try {
217+ if ( abortSignal ?. aborted ) {
218+ return ;
238219 }
239220
240- wsClients . set ( accountId , wsClient ) ;
241-
242- try {
243- await runFeishuWSClientUntilDead ( {
244- wsClient,
245- eventDispatcher,
246- accountId,
247- log,
248- abortSignal,
249- } ) ;
250- } finally {
251- // Always close the stale SDK client before creating a fresh one.
221+ let supervisorAttempt = 0 ;
222+
223+ // Supervisor loop: each iteration creates a fresh WSClient and runs it until
224+ // either (a) abort is requested or (b) the SDK's internal retry budget is
225+ // exhausted. We then back off and start a new cycle.
226+ while ( ! abortSignal ?. aborted ) {
227+ log (
228+ `feishu[${ accountId } ]: starting WebSocket connection... (supervisor cycle ${ supervisorAttempt + 1 } )` ,
229+ ) ;
230+
231+ const wsClient = createFeishuWSClient ( account ) ;
232+ wsClients . set ( accountId , wsClient ) ;
233+
252234 try {
253- wsClient . close ( { force : true } ) ;
254- } catch {
255- // Ignore close errors; the new client will start clean.
235+ await runFeishuWSClientUntilDead ( {
236+ wsClient,
237+ eventDispatcher,
238+ accountId,
239+ log,
240+ abortSignal,
241+ } ) ;
242+ } finally {
243+ // Always close the stale SDK client before creating a fresh one.
244+ // NOTE: @larksuiteoapi /node-sdk's WSClient.reConnect() is known to leak timers
245+ // (upstream larksuite/node-sdk#177, tracked in openclaw#40451). close({force:true})
246+ // stops processing new events but does not cancel those orphaned timeouts.
247+ // This supervisor makes restarts more frequent, so the leak becomes more visible.
248+ try {
249+ wsClient . close ( { force : true } ) ;
250+ } catch {
251+ // Ignore close errors; the new client will start clean.
252+ }
256253 }
257- }
258254
259- if ( abortSignal ?. aborted ) {
260- break ;
261- }
255+ if ( abortSignal ?. aborted ) {
256+ break ;
257+ }
262258
263- supervisorAttempt += 1 ;
264- const delayMs = computeBackoff ( FEISHU_WS_SUPERVISOR_RECONNECT_POLICY , supervisorAttempt ) ;
265- error (
266- `feishu[${ accountId } ]: WebSocket supervisor restarting (attempt ${ supervisorAttempt } ) in ${ Math . round ( delayMs / 1000 ) } s` ,
267- ) ;
268-
269- try {
270- await sleepWithAbort ( delayMs , abortSignal ) ;
271- } catch {
272- // Abort during sleep — exit loop.
273- break ;
259+ supervisorAttempt += 1 ;
260+ const delayMs = computeBackoff ( FEISHU_WS_SUPERVISOR_RECONNECT_POLICY , supervisorAttempt ) ;
261+ error (
262+ `feishu[${ accountId } ]: WebSocket supervisor restarting (attempt ${ supervisorAttempt } ) in ${ Math . round ( delayMs / 1000 ) } s` ,
263+ ) ;
264+
265+ try {
266+ await sleepWithAbort ( delayMs , abortSignal ) ;
267+ } catch {
268+ // Abort during sleep — exit loop.
269+ break ;
270+ }
274271 }
272+ } finally {
273+ // Ensure we always clean up tracking maps, even if a nested SDK call throws
274+ // synchronously (for example inside a Promise executor).
275+ cleanup ( ) ;
275276 }
276-
277- cleanup ( ) ;
278277}
279278
280279export async function monitorWebhook ( {
0 commit comments