@@ -52,6 +52,7 @@ const EMBEDDED_FALLBACK_META = {
5252 fallbackFrom : "gateway" ,
5353} as const ;
5454const GATEWAY_TIMEOUT_FALLBACK_SESSION_PREFIX = "gateway-fallback-" ;
55+ const GATEWAY_TRANSIENT_CONNECT_RETRY_DELAYS_MS = [ 1_000 , 2_000 , 5_000 , 10_000 , 15_000 ] as const ;
5556
5657type AgentCliOpts = {
5758 message : string ;
@@ -147,6 +148,15 @@ function isGatewayAgentEmbeddedFallbackError(err: unknown): boolean {
147148 return isGatewayTransportError ( err ) ;
148149}
149150
151+ function isTransientGatewayAgentConnectClose ( err : unknown ) : boolean {
152+ if ( ! isGatewayTransportError ( err ) || err . kind !== "closed" ) {
153+ return false ;
154+ }
155+ const code = typeof err . code === "number" ? err . code : undefined ;
156+ const reason = normalizeOptionalString ( err . reason ) ;
157+ return code === 1000 && ( ! reason || reason === "no close reason" ) ;
158+ }
159+
150160function validateExplicitSessionKeyForDispatch (
151161 opts : Pick < AgentCliOpts , "agent" | "sessionKey" > ,
152162) : void {
@@ -268,8 +278,28 @@ function resolveAgentCliProcessLike(deps: AgentCliDeps | undefined): AgentCliPro
268278 return isAgentCliProcessLike ( processLike ) ? processLike : process ;
269279}
270280
271- function delayMs ( ms : number ) : Promise < void > {
272- return new Promise ( ( resolve ) => setTimeout ( resolve , ms ) ) ;
281+ function createAbortDelayError ( ) : Error {
282+ const err = new Error ( "gateway agent retry aborted" ) ;
283+ err . name = "AbortError" ;
284+ return err ;
285+ }
286+
287+ function delayMs ( ms : number , signal ?: AbortSignal ) : Promise < void > {
288+ if ( signal ?. aborted ) {
289+ return Promise . reject ( createAbortDelayError ( ) ) ;
290+ }
291+ return new Promise ( ( resolve , reject ) => {
292+ const timer = setTimeout ( ( ) => {
293+ signal ?. removeEventListener ( "abort" , onAbort ) ;
294+ resolve ( ) ;
295+ } , ms ) ;
296+ const onAbort = ( ) => {
297+ clearTimeout ( timer ) ;
298+ signal ?. removeEventListener ( "abort" , onAbort ) ;
299+ reject ( createAbortDelayError ( ) ) ;
300+ } ;
301+ signal ?. addEventListener ( "abort" , onAbort , { once : true } ) ;
302+ } ) ;
273303}
274304
275305function isConfirmedChatAbortResponseForRun ( value : unknown , runId : string ) : boolean {
@@ -631,6 +661,34 @@ async function agentViaGatewayCommand(
631661 return response ;
632662}
633663
664+ async function agentViaGatewayCommandWithTransientRetries (
665+ opts : AgentCliOpts ,
666+ runtime : RuntimeEnv ,
667+ signalBridge : ReturnType < typeof createAgentCliSignalBridge > ,
668+ ) {
669+ for ( const [ attempt , retryDelayMs ] of [
670+ ...GATEWAY_TRANSIENT_CONNECT_RETRY_DELAYS_MS ,
671+ 0 ,
672+ ] . entries ( ) ) {
673+ try {
674+ return await agentViaGatewayCommand ( opts , runtime , signalBridge ) ;
675+ } catch ( err ) {
676+ if ( isAbortError ( err ) ) {
677+ throw err ;
678+ }
679+ const isFinalAttempt = attempt === GATEWAY_TRANSIENT_CONNECT_RETRY_DELAYS_MS . length ;
680+ if ( isFinalAttempt || ! isTransientGatewayAgentConnectClose ( err ) ) {
681+ throw err ;
682+ }
683+ runtime . error ?.(
684+ `Gateway agent connection closed during handshake; retrying in ${ retryDelayMs } ms before embedded fallback.` ,
685+ ) ;
686+ await delayMs ( retryDelayMs , signalBridge . signal ) ;
687+ }
688+ }
689+ throw new Error ( "Gateway agent retry loop exhausted unexpectedly." ) ;
690+ }
691+
634692export async function agentCliCommand (
635693 opts : AgentCliOpts ,
636694 runtime : RuntimeEnv ,
@@ -639,11 +697,14 @@ export async function agentCliCommand(
639697 protectJsonStdout ( opts ) ;
640698 const dispatchOpts = normalizeSessionKeyOptsForDispatch ( opts ) ;
641699 validateExplicitSessionKeyForDispatch ( dispatchOpts ) ;
700+ const gatewayDispatchOpts = dispatchOpts . runId
701+ ? dispatchOpts
702+ : { ...dispatchOpts , runId : randomIdempotencyKey ( ) } ;
642703 const signalBridge = createAgentCliSignalBridge ( resolveAgentCliProcessLike ( deps ) ) ;
643704 const localOpts = {
644- ...dispatchOpts ,
645- agentId : dispatchOpts . agent ,
646- replyAccountId : dispatchOpts . replyAccount ,
705+ ...gatewayDispatchOpts ,
706+ agentId : gatewayDispatchOpts . agent ,
707+ replyAccountId : gatewayDispatchOpts . replyAccount ,
647708 cleanupBundleMcpOnRunEnd : true ,
648709 cleanupCliLiveSessionOnRunEnd : true ,
649710 abortSignal : signalBridge . signal ,
@@ -655,7 +716,11 @@ export async function agentCliCommand(
655716 }
656717
657718 try {
658- const result = await agentViaGatewayCommand ( dispatchOpts , runtime , signalBridge ) ;
719+ const result = await agentViaGatewayCommandWithTransientRetries (
720+ gatewayDispatchOpts ,
721+ runtime ,
722+ signalBridge ,
723+ ) ;
659724 return returnAfterSignalExit ( result , signalBridge . getReceivedSignal ( ) , runtime ) ;
660725 } catch ( err ) {
661726 if ( isAbortError ( err ) ) {
0 commit comments