Skip to content

Commit fccb2f5

Browse files
committed
fix: probe stale rate-limit cooldown primaries
1 parent 61cf005 commit fccb2f5

3 files changed

Lines changed: 84 additions & 36 deletions

File tree

src/agents/model-fallback.probe.test.ts

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -135,19 +135,6 @@ async function loadModelFallbackProbeModules() {
135135

136136
beforeAll(loadModelFallbackProbeModules);
137137

138-
function expectFallbackUsed(
139-
result: { result: unknown; attempts: Array<{ reason?: string }> },
140-
run: {
141-
(...args: unknown[]): unknown;
142-
mock: { calls: unknown[][] };
143-
},
144-
) {
145-
expect(result.result).toBe("ok");
146-
expect(run).toHaveBeenCalledTimes(1);
147-
expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
148-
expect(result.attempts[0]?.reason).toBe("rate_limit");
149-
}
150-
151138
function expectPrimarySkippedForReason(
152139
result: { result: unknown; attempts: Array<{ reason?: string }> },
153140
run: {
@@ -259,9 +246,14 @@ describe("runWithModelFallback – probe logic", () => {
259246
hasFallbackCandidates?: boolean;
260247
requestedModel?: boolean;
261248
throttleKey?: string;
249+
usageStats?: AuthProfileStore["usageStats"];
262250
}) {
263251
mockedGetSoonestCooldownExpiry.mockReturnValue(params.soonest);
264252
mockedResolveProfilesUnavailableReason.mockReturnValue(params.reason);
253+
const authStore: AuthProfileStore = { version: 1, profiles: {} };
254+
if (params.usageStats) {
255+
authStore.usageStats = params.usageStats;
256+
}
265257
return modelFallbackTesting.resolveCooldownDecision({
266258
candidate: OPENAI_PROBE_CANDIDATE,
267259
isPrimary: params.isPrimary ?? true,
@@ -275,7 +267,7 @@ describe("runWithModelFallback – probe logic", () => {
275267
} as unknown as Parameters<
276268
typeof modelFallbackTesting.resolveCooldownDecision
277269
>[0]["authRuntime"],
278-
authStore: { version: 1, profiles: {} },
270+
authStore,
279271
profileIds: ["openai-profile-1"],
280272
});
281273
}
@@ -291,7 +283,7 @@ describe("runWithModelFallback – probe logic", () => {
291283
});
292284
}
293285

294-
async function expectPrimarySkippedAfterLongCooldown(reason: "billing" | "rate_limit") {
286+
async function expectPrimarySkippedAfterLongCooldown(reason: "billing") {
295287
const cfg = makeCfg();
296288
const expiresIn30Min = NOW + 30 * 60 * 1000;
297289
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
@@ -348,25 +340,43 @@ describe("runWithModelFallback – probe logic", () => {
348340
vi.restoreAllMocks();
349341
});
350342

351-
it("skips primary model when far from cooldown expiry (30 min remaining)", async () => {
343+
it("probes rate-limited primary model when far from cooldown expiry", async () => {
352344
const cfg = makeCfg();
353-
// Cooldown expires in 30 min — well beyond the 2-min margin
354345
const expiresIn30Min = NOW + 30 * 60 * 1000;
355346
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
356347

357348
const run = vi.fn().mockResolvedValue("ok");
358349

359350
const result = await runPrimaryCandidate(cfg, run);
360351

361-
// Should skip primary and use fallback
362-
expectFallbackUsed(result, run);
352+
expectPrimaryProbeSuccess(result, run, "ok");
363353
});
364354

365355
it("uses inferred unavailable reason when skipping a cooldowned primary model", async () => {
366356
await expectPrimarySkippedAfterLongCooldown("billing");
367357
});
368358

369359
it("decides when cooldowned primary probes are allowed", () => {
360+
expect(
361+
resolveOpenAiCooldownDecision({
362+
reason: "rate_limit",
363+
soonest: NOW + 30 * 60 * 1000,
364+
}),
365+
).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
366+
expectOpenAiProbeSuspension(
367+
resolveOpenAiCooldownDecision({
368+
reason: "rate_limit",
369+
soonest: NOW + 30 * 60 * 1000,
370+
usageStats: {
371+
"openai-profile-1": {
372+
blockedUntil: NOW + 30 * 60 * 1000,
373+
blockedReason: "subscription_limit",
374+
blockedSource: "wham",
375+
},
376+
},
377+
}),
378+
"rate_limit",
379+
);
370380
expect(
371381
resolveOpenAiCooldownDecision({
372382
reason: "rate_limit",

src/agents/model-fallback.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2519,7 +2519,7 @@ describe("runWithModelFallback", () => {
25192519
});
25202520
});
25212521

2522-
it("keeps alias-resolved primary models subject to transient cooldowns", async () => {
2522+
it("probes alias-resolved primary models during rate-limit cooldowns", async () => {
25232523
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
25242524
const cfg = makeCfg({
25252525
agents: {
@@ -2535,7 +2535,7 @@ describe("runWithModelFallback", () => {
25352535
},
25362536
});
25372537

2538-
const run = vi.fn().mockResolvedValueOnce("haiku success");
2538+
const run = vi.fn().mockResolvedValueOnce("sonnet success");
25392539

25402540
const result = await runWithModelFallback({
25412541
cfg,
@@ -2545,9 +2545,9 @@ describe("runWithModelFallback", () => {
25452545
agentDir: dir,
25462546
});
25472547

2548-
expect(result.result).toBe("haiku success");
2548+
expect(result.result).toBe("sonnet success");
25492549
expect(run).toHaveBeenCalledTimes(1);
2550-
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-haiku-3-5", {
2550+
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-6", {
25512551
allowTransientCooldownProbe: true,
25522552
});
25532553
});
@@ -2623,7 +2623,7 @@ describe("runWithModelFallback", () => {
26232623

26242624
expect(result.result).toBe("groq success");
26252625
expect(run).toHaveBeenCalledTimes(2);
2626-
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
2626+
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-opus-4-6", {
26272627
allowTransientCooldownProbe: true,
26282628
});
26292629
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
@@ -2661,7 +2661,7 @@ describe("runWithModelFallback", () => {
26612661

26622662
expect(result.result).toBe("groq success");
26632663
expect(run).toHaveBeenCalledTimes(2);
2664-
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
2664+
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-opus-4-6", {
26652665
allowTransientCooldownProbe: true,
26662666
});
26672667
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
@@ -2686,8 +2686,8 @@ describe("runWithModelFallback", () => {
26862686

26872687
const run = vi
26882688
.fn()
2689-
.mockRejectedValueOnce(new Error("Model not found: anthropic/claude-sonnet-4-5"))
2690-
.mockResolvedValueOnce("haiku success");
2689+
.mockRejectedValueOnce(new Error("Model not found: anthropic/claude-opus-4-6"))
2690+
.mockResolvedValueOnce("sonnet success");
26912691

26922692
const result = await runWithModelFallback({
26932693
cfg,
@@ -2697,12 +2697,12 @@ describe("runWithModelFallback", () => {
26972697
agentDir: dir,
26982698
});
26992699

2700-
expect(result.result).toBe("haiku success");
2700+
expect(result.result).toBe("sonnet success");
27012701
expect(run).toHaveBeenCalledTimes(2);
2702-
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
2702+
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-opus-4-6", {
27032703
allowTransientCooldownProbe: true,
27042704
});
2705-
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5", {
2705+
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-sonnet-4-5", {
27062706
allowTransientCooldownProbe: true,
27072707
});
27082708
});

src/agents/model-fallback.ts

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import { normalizeOptionalAgentRuntimeId } from "./agent-runtime-id.js";
2222
import { externalCliDiscoveryForProviders } from "./auth-profiles/external-cli-discovery.js";
2323
import { hasAnyAuthProfileStoreSource } from "./auth-profiles/source-check.js";
2424
import type { AuthProfileStore } from "./auth-profiles/types.js";
25+
import { isActiveUnusableWindow } from "./auth-profiles/usage-state.js";
2526
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
2627
import { isLikelyContextOverflowError } from "./embedded-agent-helpers/errors.js";
2728
import type { FailoverReason } from "./embedded-agent-helpers/types.js";
@@ -943,9 +944,31 @@ function markProbeAttempt(now: number, throttleKey: string): void {
943944
enforceProbeStateCap();
944945
}
945946

947+
function hasActiveProviderRateLimitResetWindow(params: {
948+
authStore: AuthProfileStore;
949+
profileIds: string[];
950+
now: number;
951+
model: string;
952+
}): boolean {
953+
return params.profileIds.some((profileId) => {
954+
const stats = params.authStore.usageStats?.[profileId];
955+
if (!stats) {
956+
return false;
957+
}
958+
if (!isActiveUnusableWindow(stats.blockedUntil, params.now)) {
959+
return false;
960+
}
961+
if (stats.blockedReason !== "subscription_limit" || !stats.blockedSource) {
962+
return false;
963+
}
964+
return !stats.blockedModel || stats.blockedModel === params.model;
965+
});
966+
}
967+
946968
function shouldProbePrimaryDuringCooldown(params: {
947969
isPrimary: boolean;
948970
hasFallbackCandidates: boolean;
971+
reason: FailoverReason | null | undefined;
949972
now: number;
950973
throttleKey: string;
951974
authRuntime: ModelFallbackAuthRuntime;
@@ -965,6 +988,20 @@ function shouldProbePrimaryDuringCooldown(params: {
965988
now: params.now,
966989
forModel: params.model,
967990
});
991+
// Generic 429 backoff can become stale before its local cooldown expires.
992+
// Provider-recorded reset windows still remain authoritative until near expiry.
993+
if (
994+
params.reason === "rate_limit" &&
995+
!hasActiveProviderRateLimitResetWindow({
996+
authStore: params.authStore,
997+
profileIds: params.profileIds,
998+
now: params.now,
999+
model: params.model,
1000+
})
1001+
) {
1002+
return true;
1003+
}
1004+
9681005
if (soonest === null || !Number.isFinite(soonest)) {
9691006
return true;
9701007
}
@@ -1014,9 +1051,16 @@ function resolveCooldownDecision(params: {
10141051
authStore: AuthProfileStore;
10151052
profileIds: string[];
10161053
}): CooldownDecision {
1054+
const inferredReason =
1055+
params.authRuntime.resolveProfilesUnavailableReason({
1056+
store: params.authStore,
1057+
profileIds: params.profileIds,
1058+
now: params.now,
1059+
}) ?? "unknown";
10171060
const shouldProbe = shouldProbePrimaryDuringCooldown({
10181061
isPrimary: params.isPrimary,
10191062
hasFallbackCandidates: params.hasFallbackCandidates,
1063+
reason: inferredReason,
10201064
now: params.now,
10211065
throttleKey: params.probeThrottleKey,
10221066
authRuntime: params.authRuntime,
@@ -1025,12 +1069,6 @@ function resolveCooldownDecision(params: {
10251069
model: params.candidate.model,
10261070
});
10271071

1028-
const inferredReason =
1029-
params.authRuntime.resolveProfilesUnavailableReason({
1030-
store: params.authStore,
1031-
profileIds: params.profileIds,
1032-
now: params.now,
1033-
}) ?? "unknown";
10341072
const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
10351073
if (isPersistentAuthIssue) {
10361074
return {

0 commit comments

Comments
 (0)