Skip to content

Commit 7f943b5

Browse files
clawsweeper[bot]samson1357924Takhoffman
authored
fix(json): retry on transient File changed during read race condition (#85029)
Summary: - The PR wraps the async JSON file readers in `src/infra/json-files.ts` with bounded retries for fs-safe `File changed during read` races, adds regression tests, and adds a changelog entry. - Reproducibility: yes. Source inspection shows fs-safe throws `File changed during read`, current main re-exp ... R proof includes before/after gateway logs; I did not run a new live race harness in this read-only review. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(json): preserve strict reader types (Promise<T> for readJson/read… - PR branch already contained follow-up commit before automerge: test(json): add retry-success and retry-exhaustion coverage - PR branch already contained follow-up commit before automerge: fix(json): resolve lint warnings (prefer-exponentiation-operator, cur… - PR branch already contained follow-up commit before automerge: fix(json): retry on transient File changed during read race condition Validation: - ClawSweeper review passed for head 00602a1. - Required merge gates passed before the squash merge. Prepared head SHA: 00602a1 Review: #85029 (comment) Co-authored-by: samson1357924 <98934496+samson1357924@users.noreply.github.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: takhoffman Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
1 parent 5955f35 commit 7f943b5

3 files changed

Lines changed: 191 additions & 23 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai
1717

1818
### Fixes
1919

20+
- Infra/json: retry transient `File changed during read` races while loading JSON state so config and state reads recover instead of failing the turn. (#84285)
2021
- Gateway/sessions: preserve compatible session auth profile overrides when switching models within the same provider, including provider-auth aliases. Fixes #81837. (#81886) Thanks @TurboTheTurtle.
2122
- Gateway/status: surface inbound delivery telemetry counters and transport-liveness warnings in `openclaw status --all`. Fixes #49577. (#72724)
2223
- Providers/Ollama: treat Docker/OrbStack host aliases as local Ollama endpoints so `ollama-local` marker auth works when OpenClaw runs inside a VM/container and Ollama runs on the host. Fixes #84875.

src/infra/json-files.test.ts

Lines changed: 93 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1-
import fs from "node:fs/promises";
1+
import fs from "node:fs";
2+
import fsPromises from "node:fs/promises";
23
import path from "node:path";
34
import { afterEach, describe, expect, it, vi } from "vitest";
45
import { withTempDir } from "../test-helpers/temp-dir.js";
56
import {
67
JsonFileReadError,
78
createAsyncLock,
89
readDurableJsonFile,
10+
readJson,
911
readJsonFile,
12+
tryReadJson,
1013
writeJsonAtomic,
1114
writeTextAtomic,
1215
} from "./json-files.js";
@@ -26,7 +29,7 @@ describe("json file helpers", () => {
2629
name: "reads valid json",
2730
setup: async (base: string) => {
2831
const filePath = path.join(base, "valid.json");
29-
await fs.writeFile(filePath, '{"ok":true}', "utf8");
32+
await fsPromises.writeFile(filePath, '{"ok":true}', "utf8");
3033
return filePath;
3134
},
3235
expected: { ok: true },
@@ -35,7 +38,7 @@ describe("json file helpers", () => {
3538
name: "returns null for invalid files",
3639
setup: async (base: string) => {
3740
const filePath = path.join(base, "invalid.json");
38-
await fs.writeFile(filePath, "{not-json}", "utf8");
41+
await fsPromises.writeFile(filePath, "{not-json}", "utf8");
3942
return filePath;
4043
},
4144
expected: null,
@@ -56,8 +59,8 @@ describe("json file helpers", () => {
5659
const validPath = path.join(base, "valid.json");
5760
const invalidPath = path.join(base, "invalid.json");
5861
const missingPath = path.join(base, "missing.json");
59-
await fs.writeFile(validPath, '{"ok":true}', "utf8");
60-
await fs.writeFile(invalidPath, "{not-json}", "utf8");
62+
await fsPromises.writeFile(validPath, '{"ok":true}', "utf8");
63+
await fsPromises.writeFile(invalidPath, "{not-json}", "utf8");
6164

6265
await expect(readDurableJsonFile(validPath)).resolves.toEqual({ ok: true });
6366
await expect(readDurableJsonFile(missingPath)).resolves.toBeNull();
@@ -82,7 +85,7 @@ describe("json file helpers", () => {
8285
{ trailingNewline: true, dirMode: 0o755 },
8386
);
8487

85-
await expect(fs.readFile(filePath, "utf8")).resolves.toBe(
88+
await expect(fsPromises.readFile(filePath, "utf8")).resolves.toBe(
8689
'{\n "ok": true,\n "nested": {\n "value": 1\n }\n}\n',
8790
);
8891
});
@@ -95,56 +98,56 @@ describe("json file helpers", () => {
9598
await withTempDir({ prefix: "openclaw-json-files-" }, async (base) => {
9699
const filePath = path.join(base, "nested", "note.txt");
97100
await writeTextAtomic(filePath, input, { trailingNewline: true });
98-
await expect(fs.readFile(filePath, "utf8")).resolves.toBe(expected);
101+
await expect(fsPromises.readFile(filePath, "utf8")).resolves.toBe(expected);
99102
});
100103
});
101104

102105
it("can skip durable fsync work for hot state writes", async () => {
103106
await withTempDir({ prefix: "openclaw-json-files-" }, async (base) => {
104107
const filePath = path.join(base, "state.json");
105-
const openSpy = vi.spyOn(fs, "open");
108+
const openSpy = vi.spyOn(fsPromises, "open");
106109

107110
await writeTextAtomic(filePath, "new", { durable: false });
108111

109112
expect(openSpy).not.toHaveBeenCalled();
110-
await expect(fs.readFile(filePath, "utf8")).resolves.toBe("new");
113+
await expect(fsPromises.readFile(filePath, "utf8")).resolves.toBe("new");
111114
});
112115
});
113116

114117
it("preserves text when Windows rename reports EPERM", async () => {
115118
await withTempDir({ prefix: "openclaw-json-files-" }, async (base) => {
116119
const filePath = path.join(base, "state.json");
117-
await fs.writeFile(filePath, "old", "utf8");
120+
await fsPromises.writeFile(filePath, "old", "utf8");
118121

119122
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
120123
const renameError = Object.assign(new Error("EPERM"), { code: "EPERM" });
121-
const renameSpy = vi.spyOn(fs, "rename").mockRejectedValueOnce(renameError);
124+
const renameSpy = vi.spyOn(fsPromises, "rename").mockRejectedValueOnce(renameError);
122125

123126
await writeTextAtomic(filePath, "new");
124127

125128
expect(renameSpy).toHaveBeenCalledOnce();
126-
await expect(fs.readFile(filePath, "utf8")).resolves.toBe("new");
129+
await expect(fsPromises.readFile(filePath, "utf8")).resolves.toBe("new");
127130
});
128131
});
129132

130133
it("refuses Windows copy fallback through symlink destinations", async () => {
131134
await withTempDir({ prefix: "openclaw-json-files-" }, async (base) => {
132135
const filePath = path.join(base, "state.json");
133136
const outsidePath = path.join(base, "outside.json");
134-
await fs.writeFile(outsidePath, "outside", "utf8");
135-
await fs.symlink(outsidePath, filePath);
137+
await fsPromises.writeFile(outsidePath, "outside", "utf8");
138+
await fsPromises.symlink(outsidePath, filePath);
136139

137140
Object.defineProperty(process, "platform", { value: "win32", configurable: true });
138141
const renameError = Object.assign(new Error("EPERM"), { code: "EPERM" });
139-
vi.spyOn(fs, "rename").mockRejectedValueOnce(renameError);
142+
vi.spyOn(fsPromises, "rename").mockRejectedValueOnce(renameError);
140143

141144
await expect(writeTextAtomic(filePath, "new")).rejects.toThrow(
142145
"Refusing copy fallback through symlink destination",
143146
);
144147

145-
const fileStat = await fs.lstat(filePath);
148+
const fileStat = await fsPromises.lstat(filePath);
146149
expect(fileStat.isSymbolicLink()).toBe(true);
147-
await expect(fs.readFile(outsidePath, "utf8")).resolves.toBe("outside");
150+
await expect(fsPromises.readFile(outsidePath, "utf8")).resolves.toBe("outside");
148151
});
149152
});
150153

@@ -185,4 +188,77 @@ describe("json file helpers", () => {
185188
await expect(second).resolves.toBe("ok");
186189
expect(events).toEqual(expectedEvents);
187190
});
191+
192+
describe("retry behaviors on 'File changed during read'", () => {
193+
/**
194+
* Helper: spy on fsPromises.lstat for our target file path.
195+
* Returns a real Stats object with a modified ino to trigger
196+
* verifyStableReadTarget in @openclaw/fs-safe.
197+
* Object.assign + Object.create preserves the Stats prototype.
198+
*/
199+
function setupLstatSpy(targetPath: string, targetCallCount: number): () => number {
200+
const origLstat = fsPromises.lstat.bind(fsPromises);
201+
let callCount = 0;
202+
203+
vi.spyOn(fsPromises, "lstat").mockImplementation(async (p, ...args) => {
204+
const stat = await origLstat(p, ...args);
205+
const pathStr = typeof p === "string" ? p : String(p);
206+
if (pathStr === targetPath) {
207+
callCount++;
208+
if (callCount <= targetCallCount) {
209+
// Modify ino: for BigInt ino add 100n, for number ino add 100
210+
const modifiedIno = typeof stat.ino === "bigint" ? stat.ino + 100n : stat.ino + 100;
211+
212+
// Clone stat preserving prototype, override ino
213+
return Object.assign(Object.create(Object.getPrototypeOf(stat)), stat, {
214+
ino: modifiedIno,
215+
});
216+
}
217+
}
218+
return stat;
219+
});
220+
221+
return () => callCount;
222+
}
223+
224+
it("retries on transient File changed during read and succeeds", async () => {
225+
await withTempDir({ prefix: "openclaw-json-files-retry-" }, async (base) => {
226+
const filePath = path.join(base, "config.json");
227+
await fsPromises.writeFile(filePath, '{"ok":true}', "utf8");
228+
229+
// Only fail lstat once (first call) — retry should succeed on 2nd attempt
230+
const getCalls = setupLstatSpy(filePath, 1);
231+
232+
const result = await readJson<{ ok: boolean }>(filePath);
233+
expect(result).toEqual({ ok: true });
234+
// Should have at least 2 lstat calls: one failed, one successful
235+
expect(getCalls()).toBeGreaterThanOrEqual(2);
236+
});
237+
});
238+
239+
it("throws JsonFileReadError after exhausting retries on persistent race", async () => {
240+
await withTempDir({ prefix: "openclaw-json-files-exhaust-" }, async (base) => {
241+
const filePath = path.join(base, "config.json");
242+
await fsPromises.writeFile(filePath, '{"ok":true}', "utf8");
243+
244+
// Always fail lstat — all 3 retries should exhaust
245+
setupLstatSpy(filePath, Infinity);
246+
247+
await expect(readJson(filePath)).rejects.toThrow(JsonFileReadError);
248+
});
249+
});
250+
251+
it("tryReadJson returns null after exhausting retries", async () => {
252+
await withTempDir({ prefix: "openclaw-json-files-try-" }, async (base) => {
253+
const filePath = path.join(base, "config.json");
254+
await fsPromises.writeFile(filePath, '{"ok":true}', "utf8");
255+
256+
// Always fail lstat — tryReadJson catches and returns null
257+
setupLstatSpy(filePath, Infinity);
258+
259+
const result = await tryReadJson(filePath);
260+
expect(result).toBeNull();
261+
});
262+
});
263+
});
188264
});

src/infra/json-files.ts

Lines changed: 97 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,115 @@
11
import "./fs-safe-defaults.js";
2+
import {
3+
JsonFileReadError,
4+
readJson as readJsonImpl,
5+
readJsonIfExists as readJsonIfExistsImpl,
6+
} from "@openclaw/fs-safe/json";
27
import { replaceFileAtomic } from "./replace-file.js";
38

49
export {
510
JsonFileReadError,
6-
readJson,
7-
readJson as readJsonFileStrict,
8-
readJsonIfExists,
9-
readJsonIfExists as readDurableJsonFile,
1011
readJsonSync,
1112
readRootJsonObjectSync,
1213
readRootJsonSync,
1314
readRootStructuredFileSync,
14-
tryReadJson,
15-
tryReadJson as readJsonFile,
1615
tryReadJsonSync,
1716
tryReadJsonSync as readJsonFileSync,
1817
writeJson,
1918
writeJson as writeJsonAtomic,
2019
writeJsonSync,
2120
} from "@openclaw/fs-safe/json";
21+
22+
const RETRY_MAX_ATTEMPTS = 3;
23+
const RETRY_BASE_DELAY_MS = 50;
24+
25+
/**
26+
* Recursively walks the error cause chain to detect
27+
* "File changed during read" errors wrapped inside
28+
* JsonFileReadError by @openclaw/fs-safe.
29+
*/
30+
function isFileChangedDuringRead(err: unknown): boolean {
31+
let current: unknown = err;
32+
while (current) {
33+
if (current instanceof Error) {
34+
if (
35+
typeof current.message === "string" &&
36+
current.message.includes("File changed during read")
37+
) {
38+
return true;
39+
}
40+
current = (current as Error & { cause?: unknown }).cause;
41+
} else {
42+
break;
43+
}
44+
}
45+
return false;
46+
}
47+
48+
async function withRetryOnFileChanged<T>(fn: () => Promise<T>): Promise<T> {
49+
for (let attempt = 0; ; attempt++) {
50+
try {
51+
return await fn();
52+
} catch (err) {
53+
if (isFileChangedDuringRead(err) && attempt < RETRY_MAX_ATTEMPTS - 1) {
54+
await new Promise((r) => setTimeout(r, RETRY_BASE_DELAY_MS * 2 ** attempt));
55+
continue;
56+
}
57+
throw err;
58+
}
59+
}
60+
}
61+
62+
export async function readJson<T>(filePath: string): Promise<T> {
63+
try {
64+
return await withRetryOnFileChanged(() => readJsonImpl<T>(filePath));
65+
} catch (err) {
66+
throw err instanceof JsonFileReadError ? err : new JsonFileReadError(filePath, "read", err);
67+
}
68+
}
69+
70+
export async function readJsonFileStrict<T>(filePath: string): Promise<T> {
71+
return readJson<T>(filePath);
72+
}
73+
74+
export async function readJsonIfExists<T>(filePath: string): Promise<T | null> {
75+
try {
76+
return await withRetryOnFileChanged(() => readJsonIfExistsImpl<T>(filePath));
77+
} catch (err) {
78+
if (err instanceof JsonFileReadError) {
79+
throw err;
80+
}
81+
throw new JsonFileReadError(filePath, "read", err);
82+
}
83+
}
84+
85+
export async function readDurableJsonFile<T>(filePath: string): Promise<T | null> {
86+
return readJsonIfExists<T>(filePath);
87+
}
88+
89+
/**
90+
* tryReadJson delegates to readJsonIfExists instead of the internal
91+
* tryReadJsonImpl from @openclaw/fs-safe. The fs-safe implementation
92+
* swallows all errors internally and returns null, which prevents
93+
* the retry wrapper from detecting transient "File changed during read"
94+
* race conditions.
95+
*
96+
* By routing through readJsonIfExists, fs-safe propagates errors on
97+
* race conditions, our retry wrapper intercepts and retries them,
98+
* and the outer try-catch still handles parse errors / file-not-found
99+
* gracefully.
100+
*/
101+
export async function tryReadJson<T>(filePath: string): Promise<T | null> {
102+
try {
103+
return await readJsonIfExists<T>(filePath);
104+
} catch {
105+
return null;
106+
}
107+
}
108+
109+
export async function readJsonFile<T>(filePath: string): Promise<T | null> {
110+
return tryReadJson<T>(filePath);
111+
}
112+
22113
export { createAsyncLock } from "@openclaw/fs-safe/advanced";
23114

24115
export type WriteTextAtomicOptions = {

0 commit comments

Comments
 (0)