Skip to content

Commit a74894a

Browse files
authored
fix(agents): fail fast on session lock fallback (#78633)
Signed-off-by: sallyom <somalley@redhat.com>
1 parent 20c34b8 commit a74894a

4 files changed

Lines changed: 36 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ Docs: https://docs.openclaw.ai
132132
- Doctor/OpenAI Codex: revert the 2026.5.5 `doctor --fix` repair that rewrote valid `openai-codex/*` ChatGPT/Codex OAuth routes to `openai/*`, which could break OAuth-only GPT-5.5 setups or accidentally move users onto the OpenAI API-key route. If 2026.5.5 already changed your default model, run `openclaw models set openai-codex/gpt-5.5 && openclaw config validate` to switch the default agent back to the Codex OAuth PI route. Fixes #78407.
133133
- Discord/groups: instruct group-chat agents to stay silent when a message is addressed to someone else, replying only when invited or correcting key facts. (#78615)
134134
- Discord/groups: tell Discord-channel agents to wrap bare URLs as `<https://example.com>` so link previews do not expand into uninvited embeds. (#78614)
135+
- Agents/fallback: fail fast on session write-lock timeouts instead of trying fallback models for local file contention. Fixes #66646. Thanks @sallyom.
135136
- Telegram/Codex: generate DM topic labels with Codex-compatible simple-completion requests so auto-created private topics can be renamed instead of staying `New Chat`.
136137
- Plugins/runtime fetch: drop third-party symbol metadata from plain request header dictionaries before passing them into native `fetch` or `Headers`, so SDK and guarded/proxy fetch paths do not reject otherwise valid plugin requests. Fixes #77846. Thanks @shakkernerd.
137138
- Web fetch: bound guarded dispatcher cleanup after request timeouts so timed-out fetches return tool errors instead of leaving Gateway tool lanes active. (#78439) Thanks @obviyus.

src/agents/failover-error.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ function normalizeDirectErrorSignal(err: unknown): FailoverSignal {
210210
};
211211
}
212212

213-
function hasSessionWriteLockTimeout(err: unknown, seen: Set<object> = new Set()): boolean {
213+
export function hasSessionWriteLockTimeout(err: unknown, seen: Set<object> = new Set()): boolean {
214214
if (isSessionWriteLockTimeoutError(err)) {
215215
return true;
216216
}

src/agents/model-fallback.test.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import {
2121
} from "./model-fallback.js";
2222
import { classifyEmbeddedPiRunResultForModelFallback } from "./pi-embedded-runner/result-fallback-classifier.js";
2323
import type { EmbeddedPiRunResult } from "./pi-embedded-runner/types.js";
24+
import { SessionWriteLockTimeoutError } from "./session-write-lock-error.js";
2425
import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js";
2526

2627
vi.mock("../infra/file-lock.js", () => ({
@@ -555,6 +556,35 @@ describe("runWithModelFallback", () => {
555556
}
556557
});
557558

559+
it("fails fast on session write-lock timeouts instead of trying model fallbacks", async () => {
560+
const cfg = makeCfg({
561+
agents: {
562+
defaults: {
563+
model: {
564+
primary: "openai/gpt-5.4",
565+
fallbacks: ["anthropic/claude-opus-4-6"],
566+
},
567+
},
568+
},
569+
});
570+
const lockError = new SessionWriteLockTimeoutError({
571+
timeoutMs: 10_000,
572+
owner: "pid=37121",
573+
lockPath: "/tmp/openclaw/session.jsonl.lock",
574+
});
575+
const run = vi.fn().mockRejectedValueOnce(lockError);
576+
577+
await expect(
578+
runWithModelFallback({
579+
cfg,
580+
provider: "openai",
581+
model: "gpt-5.4",
582+
run,
583+
}),
584+
).rejects.toBe(lockError);
585+
expect(run).toHaveBeenCalledTimes(1);
586+
});
587+
558588
it("uses optional result classification to continue to configured fallbacks", async () => {
559589
const cfg = makeCfg({
560590
agents: {

src/agents/model-fallback.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import {
1616
FailoverError,
1717
coerceToFailoverError,
1818
describeFailoverError,
19+
hasSessionWriteLockTimeout,
1920
isFailoverError,
2021
isTimeoutError,
2122
} from "./failover-error.js";
@@ -1048,6 +1049,9 @@ export async function runWithModelFallback<T>(params: {
10481049
sessionId: params.sessionId,
10491050
lane: params.lane,
10501051
}) ?? err;
1052+
if (hasSessionWriteLockTimeout(normalized)) {
1053+
throw err;
1054+
}
10511055

10521056
// LiveSessionModelSwitchError during fallback may point at a later
10531057
// candidate that is already the active live-session selection. Jump

0 commit comments

Comments
 (0)