Skip to content

Commit 12cb2ba

Browse files
committed
fix: preserve provider fallback metadata
1 parent 91feed3 commit 12cb2ba

4 files changed

Lines changed: 89 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Docs: https://docs.openclaw.ai
5050
- Gateway: defer update-check startup until after readiness so package update checks no longer block sidecar-ready startup, while preserving update broadcasts and shutdown cleanup. (#83520) Thanks @samzong.
5151
- Telegram: keep `/btw` and read-only status commands from aborting active runs, and avoid retaining raw update payloads in timed-out spool tombstones. Refs #83272.
5252
- Agents: log strict-agentic execution contract diagnostics only when the planning-only retry path actually triggers.
53+
- Agents: stop embedded session takeover and session write-lock errors from consuming model fallbacks while preserving provider fallback metadata. Fixes #83510. Thanks @luyao618.
5354
- Agents/video: hide `video_generate` reference-audio parameters unless a registered video provider supports audio inputs.
5455
- Plugins: fall back to npm for official ClawHub updates when artifact downloads are unavailable, including beta-to-default fallback and dry-run version reporting.
5556
- Plugins/xAI: echo PKCE challenge fields during OAuth authorization-code token exchange for xAI token-endpoint compatibility. (#83499) Thanks @fuller-stack-dev.

src/agents/failover-error.test.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,14 @@ describe("failover-error", () => {
11511151
expect(isNonProviderRuntimeCoordinationError({ status: 429, message: "rate limit" })).toBe(
11521152
false,
11531153
);
1154+
expect(
1155+
isNonProviderRuntimeCoordinationError({
1156+
status: 429,
1157+
code: "RESOURCE_EXHAUSTED",
1158+
message: "upstream quota pressure",
1159+
cause: makeSessionLockError(),
1160+
}),
1161+
).toBe(false);
11541162
expect(isNonProviderRuntimeCoordinationError(null)).toBe(false);
11551163
expect(isNonProviderRuntimeCoordinationError(undefined)).toBe(false);
11561164
});

src/agents/failover-error.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,13 @@ function hasEmbeddedAttemptSessionTakeover(err: unknown, seen: Set<object> = new
268268
* See #83510.
269269
*/
270270
export function isNonProviderRuntimeCoordinationError(err: unknown): boolean {
271-
return hasSessionWriteLockTimeout(err) || hasEmbeddedAttemptSessionTakeover(err);
271+
if (!hasSessionWriteLockTimeout(err) && !hasEmbeddedAttemptSessionTakeover(err)) {
272+
return false;
273+
}
274+
if (isFailoverError(err)) {
275+
return false;
276+
}
277+
return resolveFailoverClassificationFromError(err) === null;
272278
}
273279

274280
function hasTimeoutHint(err: unknown): boolean {

src/agents/model-fallback.test.ts

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import {
2323
} from "./model-fallback.js";
2424
import { classifyEmbeddedPiRunResultForModelFallback } from "./pi-embedded-runner/result-fallback-classifier.js";
2525
import type { EmbeddedPiRunResult } from "./pi-embedded-runner/types.js";
26+
import { SessionWriteLockTimeoutError } from "./session-write-lock-error.js";
2627
import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js";
2728

2829
vi.mock("../infra/file-lock.js", () => ({
@@ -796,6 +797,78 @@ describe("runWithModelFallback", () => {
796797
expect(run).toHaveBeenCalledTimes(1);
797798
});
798799

800+
it("aborts the fallback chain on session write-lock timeout instead of trying every model (#83510)", async () => {
801+
const cfg = makeCfg({
802+
agents: {
803+
defaults: {
804+
model: {
805+
primary: "openai/gpt-5.4",
806+
fallbacks: ["anthropic/claude-sonnet-4-6", "openai/gpt-4.1-mini"],
807+
},
808+
},
809+
},
810+
});
811+
const lockError = new SessionWriteLockTimeoutError({
812+
timeoutMs: 10_000,
813+
owner: "pid=37121",
814+
lockPath: "/tmp/openclaw/session.jsonl.lock",
815+
});
816+
const run = vi.fn().mockRejectedValue(lockError);
817+
818+
await expect(
819+
runWithModelFallback({
820+
cfg,
821+
provider: "openai",
822+
model: "gpt-5.4",
823+
run,
824+
}),
825+
).rejects.toBe(lockError);
826+
expect(run).toHaveBeenCalledTimes(1);
827+
});
828+
829+
it("keeps provider failover metadata authoritative over nested session locks", async () => {
830+
const cfg = makeCfg({
831+
agents: {
832+
defaults: {
833+
model: {
834+
primary: "openai/gpt-5.4",
835+
fallbacks: ["anthropic/claude-sonnet-4-6"],
836+
},
837+
},
838+
},
839+
});
840+
const lockError = new SessionWriteLockTimeoutError({
841+
timeoutMs: 10_000,
842+
owner: "pid=37121",
843+
lockPath: "/tmp/openclaw/session.jsonl.lock",
844+
});
845+
const providerError = {
846+
status: 429,
847+
code: "RESOURCE_EXHAUSTED",
848+
message: "upstream quota pressure",
849+
cause: lockError,
850+
};
851+
const run = vi.fn().mockRejectedValueOnce(providerError).mockResolvedValueOnce("fallback ok");
852+
853+
const result = await runWithModelFallback({
854+
cfg,
855+
provider: "openai",
856+
model: "gpt-5.4",
857+
run,
858+
});
859+
860+
expect(result.result).toBe("fallback ok");
861+
expect(result.provider).toBe("anthropic");
862+
expect(run).toHaveBeenCalledTimes(2);
863+
expect(result.attempts[0]).toMatchObject({
864+
provider: "openai",
865+
model: "gpt-5.4",
866+
reason: "rate_limit",
867+
status: 429,
868+
code: "RESOURCE_EXHAUSTED",
869+
});
870+
});
871+
799872
it("keeps raw provider schema errors in fallback summaries", async () => {
800873
const cfg = makeCfg({
801874
agents: {

0 commit comments

Comments
 (0)