Merge 6fa95d6 into 4752e9a

849261680 · web-flow · commit 93a76ad466df · 2026-06-06T00:46:52.000+08:00
diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts
@@ -360,6 +360,44 @@ describe("runWithModelFallback – probe logic", () => {
     await expectPrimarySkippedAfterLongCooldown("billing");
   });
 
+  it("re-probes a single-provider primary blocked by a far-future subscription_limit (#90702)", () => {
+    // fallbacks:[] + a multi-day subscription_limit reset must still re-probe on
+    // the throttle instead of suspending until blockedUntil literally arrives,
+    // since the rolling cap usually recovers earlier. Multi-fallback setups keep
+    // preferring the fallback chain (covered above).
+    const sixDays = 6 * 24 * 60 * 60 * 1000;
+    const usageStats = {
+      "openai-profile-1": {
+        blockedUntil: NOW + sixDays,
+        blockedReason: "subscription_limit",
+        blockedSource: "wham",
+      },
+    } satisfies AuthProfileStore["usageStats"];
+
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW + sixDays,
+        hasFallbackCandidates: false,
+        usageStats,
+      }),
+    ).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
+
+    // The 30s probe throttle is still honored so recovery probing cannot hammer
+    // the upstream: a recent probe on the same key suspends until the slot opens.
+    probeThrottleInternals.lastProbeAttempt.set("recent-openai", NOW - 10_000);
+    expectOpenAiProbeSuspension(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW + sixDays,
+        hasFallbackCandidates: false,
+        throttleKey: "recent-openai",
+        usageStats,
+      }),
+      "rate_limit",
+    );
+  });
+
   it("decides when cooldowned primary probes are allowed", () => {
     expect(
       resolveOpenAiCooldownDecision({
@@ -674,7 +712,7 @@ describe("runWithModelFallback – probe logic", () => {
     }
   });
 
-  it("single candidate skips with rate_limit and exhausts candidates", async () => {
+  it("re-probes a single-provider rate-limited primary instead of suspending", async () => {
     const cfg = makeCfg({
       agents: {
         defaults: {
@@ -686,22 +724,26 @@ describe("runWithModelFallback – probe logic", () => {
       },
     } as Partial<OpenClawConfig>);
 
-    const almostExpired = NOW + 30 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
+    // Far-future cooldown with no fallback chain: the primary must still be
+    // probed so a recovered rolling cap resumes work instead of staying silent
+    // until blockedUntil arrives. See #90702.
+    mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 6 * 24 * 60 * 60 * 1000);
 
-    const run = vi.fn().mockResolvedValue("unreachable");
+    const run = vi.fn().mockResolvedValue("probed-ok");
 
-    await expect(
-      runWithModelFallback({
-        cfg,
-        provider: "openai",
-        model: "gpt-4.1-mini",
-        fallbacksOverride: [],
-        run,
-      }),
-    ).rejects.toThrow("All models failed");
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      fallbacksOverride: [],
+      run,
+    });
 
-    expect(run).not.toHaveBeenCalled();
+    expect(result.result).toBe("probed-ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
+      allowTransientCooldownProbe: true,
+    });
   });
 
   it("scopes probe throttling by agentDir to avoid cross-agent suppression", () => {
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
@@ -1060,14 +1060,24 @@ function shouldProbePrimaryDuringCooldown(params: {
   profileIds: string[];
   model: string;
 }): boolean {
-  if (!params.isPrimary || !params.hasFallbackCandidates) {
+  if (!params.isPrimary) {
     return false;
   }
 
   if (!isProbeThrottleOpen(params.now, params.throttleKey)) {
     return false;
   }
 
+  // A single-provider primary has no fallback chain to prefer, so every open
+  // throttle slot is a recovery probe: "is the primary callable yet?" is a
+  // recovery question independent of fallback configuration. Without this, a
+  // fallbacks:[] setup that hits a rate/subscription cap stays suspended until
+  // the provider-reported reset (which can be days out) even though the rolling
+  // cap usually recovers earlier. See #90702.
+  if (!params.hasFallbackCandidates) {
+    return true;
+  }
+
   const soonest = params.authRuntime.getSoonestCooldownExpiry(params.authStore, params.profileIds, {
     now: params.now,
     forModel: params.model,
@@ -1163,15 +1173,11 @@ function resolveCooldownDecision(params: {
   }
 
   // Billing is semi-persistent: the user may fix their balance, or a transient
-  // 402 might have been misclassified. Probe single-provider setups on the
-  // standard throttle so they can recover without a restart; when fallbacks
-  // exist, only probe near cooldown expiry so the fallback chain stays preferred.
+  // 402 might have been misclassified. shouldProbe already re-probes
+  // single-provider setups on the throttle (no fallback chain to prefer) and
+  // multi-fallback setups near cooldown expiry, so both recover without a restart.
   if (inferredReason === "billing") {
-    const shouldProbeSingleProviderBilling =
-      params.isPrimary &&
-      !params.hasFallbackCandidates &&
-      isProbeThrottleOpen(params.now, params.probeThrottleKey);
-    if (params.isPrimary && (shouldProbe || shouldProbeSingleProviderBilling)) {
+    if (params.isPrimary && shouldProbe) {
       return { type: "attempt", reason: inferredReason, markProbe: true };
     }
     return {