Skip to content

Commit 9a6c71a

Browse files
authored
fix(agents): retry same model across short rate-limit windows (#91911)
Bound same-model rate-limit retries to explicit short-window signals or parsed short Retry-After values, honor Retry-After in the retry sleep, preserve zero-rotation fallback behavior, and record same-model rate-limit retries separately from profile rotations. Verification: - node scripts/run-vitest.mjs src/agents/embedded-agent-runner/run/assistant-failover.test.ts src/agents/embedded-agent-runner/run/helpers.test.ts - Azure Crabbox cbx_bdb5a7807a1f / coral-shrimp: OPENCLAW_CHECK_CHANGED_REMOTE_CHILD=1 OPENCLAW_CHANGED_LANES_RAW_SYNC=1 corepack pnpm check:changed - .agents/skills/autoreview/scripts/autoreview --mode branch --base origin/main
1 parent 99d0bdc commit 9a6c71a

8 files changed

Lines changed: 691 additions & 9 deletions

File tree

src/agents/embedded-agent-runner/run.incomplete-turn.test.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,12 @@ import {
1010
loadRunOverflowCompactionHarness,
1111
mockedClassifyFailoverReason,
1212
mockedGlobalHookRunner,
13+
mockedIsFailoverAssistantError,
14+
mockedIsRateLimitAssistantError,
1315
mockedLog,
1416
mockedRunEmbeddedAttempt,
1517
mockedResolveModelAsync,
18+
mockedSleepWithAbort,
1619
overflowBaseRunParams,
1720
resetRunOverflowCompactionHarnessMocks,
1821
} from "./run.overflow-compaction.harness.js";
@@ -408,6 +411,71 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
408411
});
409412
});
410413

414+
it("records same-model rate-limit retries without a profile-rotation trace", async () => {
415+
const rateLimitMessage =
416+
"429 rate_limit_exceeded: requests per minute exceeded; Retry-After: 30";
417+
mockedClassifyFailoverReason.mockImplementation((raw) =>
418+
raw.includes("429") ? "rate_limit" : null,
419+
);
420+
mockedIsFailoverAssistantError.mockImplementation((assistant) =>
421+
Boolean(assistant?.errorMessage?.includes("429")),
422+
);
423+
mockedIsRateLimitAssistantError.mockImplementation((assistant) =>
424+
Boolean(assistant?.errorMessage?.includes("429")),
425+
);
426+
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
427+
makeAttemptResult({
428+
assistantTexts: [],
429+
lastAssistant: {
430+
role: "assistant",
431+
stopReason: "error",
432+
provider: "openai",
433+
model: "gpt-5.5",
434+
errorMessage: rateLimitMessage,
435+
content: [],
436+
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
437+
}),
438+
);
439+
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
440+
makeAttemptResult({
441+
assistantTexts: ["Recovered after a short rate-limit wait."],
442+
lastAssistant: {
443+
role: "assistant",
444+
stopReason: "stop",
445+
provider: "openai",
446+
model: "gpt-5.5",
447+
content: [{ type: "text", text: "Recovered after a short rate-limit wait." }],
448+
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
449+
}),
450+
);
451+
452+
const result = await runEmbeddedAgent({
453+
...overflowBaseRunParams,
454+
provider: "openai",
455+
model: "gpt-5.5",
456+
runId: "run-same-model-rate-limit-trace",
457+
});
458+
459+
expect(mockedSleepWithAbort).toHaveBeenCalledWith(30_000, undefined);
460+
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
461+
expect(result.meta.executionTrace?.fallbackUsed).toBe(false);
462+
expect(result.meta.executionTrace?.attempts).toMatchObject([
463+
{
464+
provider: "openai",
465+
model: "gpt-5.5",
466+
result: "same_model_rate_limit",
467+
reason: "rate_limit",
468+
stage: "assistant",
469+
},
470+
{
471+
provider: "openai",
472+
model: "gpt-5.5",
473+
result: "success",
474+
stage: "assistant",
475+
},
476+
]);
477+
});
478+
411479
it("auto-activates strict-agentic for unconfigured GPT-5 openai runs and surfaces the blocked state", async () => {
412480
// Criterion 1 of the GPT-5.4 parity gate ("no stalls after planning") must
413481
// cover out-of-the-box installs, not only users who opted in. An

src/agents/embedded-agent-runner/run.overflow-compaction.harness.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ export const mockedResolveContextEngine = vi.fn(async () => mockedContextEngine)
118118
export const mockedResolveContextEngineOwnerPluginId = vi.fn(() => undefined);
119119
export const mockedBuildAgentRuntimePlan = vi.fn(() => ({}));
120120
export const mockedRunPostCompactionSideEffects = vi.fn(async () => {});
121+
export const mockedSleepWithAbort = vi.fn(
122+
async (_ms: number, _abortSignal?: AbortSignal) => undefined,
123+
);
121124
export const mockedEnsureRuntimePluginsLoaded = vi.fn<(params?: unknown) => void>();
122125
export const mockedResolveModelAsync = vi.fn(
123126
async (): Promise<MockResolveModelResult> => ({
@@ -171,6 +174,7 @@ type MockCoerceToFailoverError = (
171174
) => unknown;
172175
type MockDescribeFailoverError = (err: unknown) => MockFailoverErrorDescription;
173176
type MockResolveFailoverStatus = (reason: string) => number | undefined;
177+
type MockAssistantErrorProbe = (assistant?: { errorMessage?: string }) => boolean;
174178
export class MockedFailoverError extends Error {
175179
constructor(message: string) {
176180
super(message);
@@ -215,7 +219,7 @@ export const mockedFormatAssistantErrorText = vi.fn(() => "");
215219
export const mockedIsAuthAssistantError = vi.fn(() => false);
216220
export const mockedIsBillingAssistantError = vi.fn(() => false);
217221
export const mockedIsCompactionFailureError = vi.fn(() => false);
218-
export const mockedIsFailoverAssistantError = vi.fn(() => false);
222+
export const mockedIsFailoverAssistantError = vi.fn<MockAssistantErrorProbe>(() => false);
219223
export const mockedIsFailoverErrorMessage = vi.fn(() => false);
220224
export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => {
221225
const lower = normalizeLowercaseStringOrEmpty(msg ?? "");
@@ -228,7 +232,7 @@ export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => {
228232
});
229233
export const mockedParseImageSizeError = vi.fn(() => null);
230234
export const mockedParseImageDimensionError = vi.fn(() => null);
231-
export const mockedIsRateLimitAssistantError = vi.fn(() => false);
235+
export const mockedIsRateLimitAssistantError = vi.fn<MockAssistantErrorProbe>(() => false);
232236
export const mockedIsTimeoutErrorMessage = vi.fn(() => false);
233237
export const mockedPickFallbackThinkingLevel = vi.fn<(params?: unknown) => ThinkLevel | null>(
234238
() => null,
@@ -466,6 +470,8 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
466470
mockedShouldPreferExplicitConfigApiKeyAuth.mockReturnValue(false);
467471
mockedRunPostCompactionSideEffects.mockReset();
468472
mockedRunPostCompactionSideEffects.mockResolvedValue(undefined);
473+
mockedSleepWithAbort.mockReset();
474+
mockedSleepWithAbort.mockResolvedValue(undefined);
469475
}
470476

471477
/** Install module mocks, import the runner, and return the mocked entrypoint. */
@@ -483,6 +489,9 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
483489
vi.doMock("../../context-engine/init.js", () => ({
484490
ensureContextEnginesInitialized: vi.fn(),
485491
}));
492+
vi.doMock("../../infra/backoff.js", () => ({
493+
sleepWithAbort: mockedSleepWithAbort,
494+
}));
486495
vi.doMock("../../context-engine/registry.js", () => ({
487496
resolveContextEngine: mockedResolveContextEngine,
488497
resolveContextEngineOwnerPluginId: mockedResolveContextEngineOwnerPluginId,

src/agents/embedded-agent-runner/run.ts

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,12 @@ import {
152152
resolveFinalAssistantVisibleText,
153153
resolveMaxRunRetryIterations,
154154
resolveReportedModelRef,
155+
MAX_SAME_MODEL_RATE_LIMIT_RETRIES,
155156
resolveOverloadFailoverBackoffMs,
156157
resolveOverloadProfileRotationLimit,
157158
resolveRateLimitProfileRotationLimit,
159+
resolveNextSameModelRateLimitRetryCount,
160+
resolveSameModelRateLimitRetryDelayMs,
158161
type RuntimeAuthState,
159162
scrubAnthropicRefusalMagic,
160163
} from "./run/helpers.js";
@@ -1055,6 +1058,8 @@ export async function runEmbeddedAgent(
10551058
const profileFailureStore = pluginHarnessOwnsTransport ? attemptAuthProfileStore : authStore;
10561059
let profileIndex = 0;
10571060
const traceAttempts: TraceAttempt[] = [];
1061+
const traceAttemptUsesFallback = (attempt: TraceAttempt): boolean =>
1062+
attempt.result === "rotate_profile" || attempt.result === "fallback_model";
10581063

10591064
const initialThinkLevel = resolveInitialThinkLevel({
10601065
requested: params.thinkLevel,
@@ -1212,6 +1217,7 @@ export async function runEmbeddedAgent(
12121217
let lastContextBudgetStatus: EmbeddedAgentMeta["contextBudgetStatus"];
12131218
let runLoopIterations = 0;
12141219
let overloadProfileRotations = 0;
1220+
let consecutiveSameModelRateLimitRetries = 0;
12151221
let planningOnlyRetryAttempts = 0;
12161222
let reasoningOnlyRetryAttempts = 0;
12171223
let emptyResponseRetryAttempts = 0;
@@ -1372,6 +1378,35 @@ export async function runEmbeddedAgent(
13721378
throw err;
13731379
}
13741380
};
1381+
const maybeRetrySameModelRateLimit = async (retry?: {
1382+
retryAfterSeconds?: number;
1383+
}): Promise<boolean> => {
1384+
if (consecutiveSameModelRateLimitRetries >= MAX_SAME_MODEL_RATE_LIMIT_RETRIES) {
1385+
return false;
1386+
}
1387+
const delayMs = resolveSameModelRateLimitRetryDelayMs({
1388+
retriesSoFar: consecutiveSameModelRateLimitRetries,
1389+
retryAfterSeconds: retry?.retryAfterSeconds,
1390+
});
1391+
log.warn(
1392+
`rate-limit same-model retry ${consecutiveSameModelRateLimitRetries + 1}/${MAX_SAME_MODEL_RATE_LIMIT_RETRIES} for ${sanitizeForLog(provider)}/${sanitizeForLog(modelId)}: delayMs=${delayMs}`,
1393+
);
1394+
try {
1395+
await sleepWithAbort(delayMs, params.abortSignal);
1396+
} catch (err) {
1397+
if (params.abortSignal?.aborted) {
1398+
const abortErr = new Error("Operation aborted", { cause: err });
1399+
abortErr.name = "AbortError";
1400+
throw abortErr;
1401+
}
1402+
throw err;
1403+
}
1404+
consecutiveSameModelRateLimitRetries = resolveNextSameModelRateLimitRetryCount({
1405+
retriesSoFar: consecutiveSameModelRateLimitRetries,
1406+
retriedSameModelRateLimit: true,
1407+
});
1408+
return true;
1409+
};
13751410
// Resolve the context engine once and reuse across retries to avoid
13761411
// repeated initialization/connection overhead per attempt.
13771412
ensureContextEnginesInitialized();
@@ -2865,6 +2900,7 @@ export async function runEmbeddedAgent(
28652900
!fallbackConfigured &&
28662901
canRestartForLiveSwitch &&
28672902
sameModelIdleTimeoutRetries < MAX_SAME_MODEL_IDLE_TIMEOUT_RETRIES,
2903+
allowSameModelRateLimitRetry: rateLimitProfileRotations < rateLimitProfileRotationLimit,
28682904
assistantProfileFailureReason,
28692905
lastProfileId,
28702906
modelId,
@@ -2885,28 +2921,42 @@ export async function runEmbeddedAgent(
28852921
warn: (message) => log.warn(message),
28862922
maybeMarkAuthProfileFailure,
28872923
maybeEscalateRateLimitProfileFallback,
2924+
maybeRetrySameModelRateLimit,
28882925
maybeBackoffBeforeOverloadFailover,
28892926
advanceAuthProfile: advanceAttemptAuthProfile,
28902927
});
28912928
overloadProfileRotations = assistantFailoverOutcome.overloadProfileRotations;
28922929
if (assistantFailoverOutcome.action === "retry") {
2930+
const retryTraceResult =
2931+
assistantFailoverOutcome.retryKind === "same_model_rate_limit"
2932+
? "same_model_rate_limit"
2933+
: assistantFailoverOutcome.retryKind === "same_model_idle_timeout" ||
2934+
assistantFailoverReason === "timeout"
2935+
? "timeout"
2936+
: "rotate_profile";
28932937
traceAttempts.push({
28942938
provider: activeErrorContext.provider,
28952939
model: activeErrorContext.model,
2896-
result:
2897-
assistantFailoverOutcome.retryKind === "same_model_idle_timeout" ||
2898-
assistantFailoverReason === "timeout"
2899-
? "timeout"
2900-
: "rotate_profile",
2940+
result: retryTraceResult,
29012941
...(assistantFailoverReason ? { reason: assistantFailoverReason } : {}),
29022942
stage: "assistant",
29032943
});
29042944
if (assistantFailoverOutcome.retryKind === "same_model_idle_timeout") {
29052945
sameModelIdleTimeoutRetries += 1;
29062946
}
2947+
if (assistantFailoverOutcome.retryKind !== "same_model_rate_limit") {
2948+
consecutiveSameModelRateLimitRetries = resolveNextSameModelRateLimitRetryCount({
2949+
retriesSoFar: consecutiveSameModelRateLimitRetries,
2950+
retriedSameModelRateLimit: false,
2951+
});
2952+
}
29072953
lastRetryFailoverReason = assistantFailoverOutcome.lastRetryFailoverReason;
29082954
continue;
29092955
}
2956+
consecutiveSameModelRateLimitRetries = resolveNextSameModelRateLimitRetryCount({
2957+
retriesSoFar: consecutiveSameModelRateLimitRetries,
2958+
retriedSameModelRateLimit: false,
2959+
});
29102960
if (assistantFailoverOutcome.action === "throw") {
29112961
traceAttempts.push({
29122962
provider: activeErrorContext.provider,
@@ -3658,7 +3708,7 @@ export async function runEmbeddedAgent(
36583708
},
36593709
]
36603710
: undefined,
3661-
fallbackUsed: traceAttempts.length > 0,
3711+
fallbackUsed: traceAttempts.some(traceAttemptUsesFallback),
36623712
runner: "embedded",
36633713
},
36643714
requestShaping: {

0 commit comments

Comments
 (0)