@@ -156,6 +156,7 @@ import {
156156 resolveOverloadFailoverBackoffMs ,
157157 resolveOverloadProfileRotationLimit ,
158158 resolveRateLimitProfileRotationLimit ,
159+ resolveNextSameModelRateLimitRetryCount ,
159160 resolveSameModelRateLimitBackoffMs ,
160161 type RuntimeAuthState ,
161162 scrubAnthropicRefusalMagic ,
@@ -1214,7 +1215,7 @@ export async function runEmbeddedAgent(
12141215 let lastContextBudgetStatus : EmbeddedAgentMeta [ "contextBudgetStatus" ] ;
12151216 let runLoopIterations = 0 ;
12161217 let overloadProfileRotations = 0 ;
1217- let sameModelRateLimitRetries = 0 ;
1218+ let consecutiveSameModelRateLimitRetries = 0 ;
12181219 let planningOnlyRetryAttempts = 0 ;
12191220 let reasoningOnlyRetryAttempts = 0 ;
12201221 let emptyResponseRetryAttempts = 0 ;
@@ -1376,12 +1377,12 @@ export async function runEmbeddedAgent(
13761377 }
13771378 } ;
13781379 const maybeRetrySameModelRateLimit = async ( ) : Promise < boolean > => {
1379- if ( sameModelRateLimitRetries >= MAX_SAME_MODEL_RATE_LIMIT_RETRIES ) {
1380+ if ( consecutiveSameModelRateLimitRetries >= MAX_SAME_MODEL_RATE_LIMIT_RETRIES ) {
13801381 return false ;
13811382 }
1382- const delayMs = resolveSameModelRateLimitBackoffMs ( sameModelRateLimitRetries ) ;
1383+ const delayMs = resolveSameModelRateLimitBackoffMs ( consecutiveSameModelRateLimitRetries ) ;
13831384 log . warn (
1384- `rate-limit same-model retry ${ sameModelRateLimitRetries + 1 } /${ MAX_SAME_MODEL_RATE_LIMIT_RETRIES } for ${ sanitizeForLog ( provider ) } /${ sanitizeForLog ( modelId ) } : delayMs=${ delayMs } ` ,
1385+ `rate-limit same-model retry ${ consecutiveSameModelRateLimitRetries + 1 } /${ MAX_SAME_MODEL_RATE_LIMIT_RETRIES } for ${ sanitizeForLog ( provider ) } /${ sanitizeForLog ( modelId ) } : delayMs=${ delayMs } ` ,
13851386 ) ;
13861387 try {
13871388 await sleepWithAbort ( delayMs , params . abortSignal ) ;
@@ -1393,7 +1394,10 @@ export async function runEmbeddedAgent(
13931394 }
13941395 throw err ;
13951396 }
1396- sameModelRateLimitRetries += 1 ;
1397+ consecutiveSameModelRateLimitRetries = resolveNextSameModelRateLimitRetryCount ( {
1398+ retriesSoFar : consecutiveSameModelRateLimitRetries ,
1399+ retriedSameModelRateLimit : true ,
1400+ } ) ;
13971401 return true ;
13981402 } ;
13991403 // Resolve the context engine once and reuse across retries to avoid
@@ -2926,9 +2930,19 @@ export async function runEmbeddedAgent(
29262930 if ( assistantFailoverOutcome . retryKind === "same_model_idle_timeout" ) {
29272931 sameModelIdleTimeoutRetries += 1 ;
29282932 }
2933+ if ( assistantFailoverOutcome . retryKind !== "same_model_rate_limit" ) {
2934+ consecutiveSameModelRateLimitRetries = resolveNextSameModelRateLimitRetryCount ( {
2935+ retriesSoFar : consecutiveSameModelRateLimitRetries ,
2936+ retriedSameModelRateLimit : false ,
2937+ } ) ;
2938+ }
29292939 lastRetryFailoverReason = assistantFailoverOutcome . lastRetryFailoverReason ;
29302940 continue ;
29312941 }
2942+ consecutiveSameModelRateLimitRetries = resolveNextSameModelRateLimitRetryCount ( {
2943+ retriesSoFar : consecutiveSameModelRateLimitRetries ,
2944+ retriedSameModelRateLimit : false ,
2945+ } ) ;
29322946 if ( assistantFailoverOutcome . action === "throw" ) {
29332947 traceAttempts . push ( {
29342948 provider : activeErrorContext . provider ,
0 commit comments