Skip to content

Commit 53a97fe

Browse files
committed
fix(memory): harden atomic reindex cleanup
1 parent 8d42854 commit 53a97fe

4 files changed

Lines changed: 175 additions & 19 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ Docs: https://docs.openclaw.ai
158158

159159
### Fixes
160160

161+
- Memory: close temp SQLite handles before failed atomic reindex cleanup and retry Windows EBUSY/EPERM/EACCES temp file removals, so `memory index --force` does not abort or leave temp sidecars on locked filesystems. Fixes #79708. Thanks @LobsterFarmerAmp and @hclsys.
161162
- Agents/CLI: handle resumed CLI JSONL output and bound supervisor output buffering so resumed runs stay readable without letting noisy child output grow unbounded.
162163
- Agents/sandbox: include the container workspace path hint in sandbox-root escape errors while preserving shortened host workspace roots. Fixes #79712. Thanks @haumanto and @hclsys.
163164
- Image generation: honor configured web-fetch SSRF policy across OpenAI, Google, MiniMax, OpenRouter, and Vydra provider requests so RFC2544 fake-IP proxy opt-ins reach generation calls. Fixes #79716. (#79765) Thanks @hclsys.

extensions/memory-core/src/memory/manager-atomic-reindex.ts

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,30 +8,48 @@ type MemoryIndexFileOps = {
88
wait: (ms: number) => Promise<void>;
99
};
1010

11-
type MoveMemoryIndexFilesOptions = {
11+
type MemoryIndexFileOptions = {
1212
fileOps?: MemoryIndexFileOps;
1313
maxRenameAttempts?: number;
1414
renameRetryDelayMs?: number;
15+
maxRemoveAttempts?: number;
16+
removeRetryDelayMs?: number;
1517
};
1618

19+
type ResolvedMemoryIndexFileOptions = Required<MemoryIndexFileOptions>;
20+
1721
const defaultFileOps: MemoryIndexFileOps = {
1822
rename: fs.rename,
1923
rm: fs.rm,
2024
wait: sleep,
2125
};
2226

23-
const transientRenameErrorCodes = new Set(["EBUSY", "EPERM", "EACCES"]);
27+
const transientFileErrorCodes = new Set(["EBUSY", "EPERM", "EACCES"]);
2428
const defaultMaxRenameAttempts = 6;
2529
const defaultRenameRetryDelayMs = 25;
30+
const defaultMaxRemoveAttempts = 10;
31+
const defaultRemoveRetryDelayMs = 50;
32+
33+
function isTransientFileError(err: unknown): boolean {
34+
return transientFileErrorCodes.has((err as NodeJS.ErrnoException).code ?? "");
35+
}
2636

27-
function isTransientRenameError(err: unknown): boolean {
28-
return transientRenameErrorCodes.has((err as NodeJS.ErrnoException).code ?? "");
37+
function resolveMemoryIndexFileOptions(
38+
options: MemoryIndexFileOptions = {},
39+
): ResolvedMemoryIndexFileOptions {
40+
return {
41+
fileOps: options.fileOps ?? defaultFileOps,
42+
maxRenameAttempts: Math.max(1, options.maxRenameAttempts ?? defaultMaxRenameAttempts),
43+
renameRetryDelayMs: options.renameRetryDelayMs ?? defaultRenameRetryDelayMs,
44+
maxRemoveAttempts: Math.max(1, options.maxRemoveAttempts ?? defaultMaxRemoveAttempts),
45+
removeRetryDelayMs: options.removeRetryDelayMs ?? defaultRemoveRetryDelayMs,
46+
};
2947
}
3048

3149
async function renameWithRetry(
3250
source: string,
3351
target: string,
34-
options: Required<MoveMemoryIndexFilesOptions>,
52+
options: ResolvedMemoryIndexFileOptions,
3553
): Promise<void> {
3654
for (let attempt = 1; attempt <= options.maxRenameAttempts; attempt++) {
3755
try {
@@ -41,7 +59,7 @@ async function renameWithRetry(
4159
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
4260
return;
4361
}
44-
if (!isTransientRenameError(err) || attempt === options.maxRenameAttempts) {
62+
if (!isTransientFileError(err) || attempt === options.maxRenameAttempts) {
4563
throw err;
4664
}
4765
await options.fileOps.wait(options.renameRetryDelayMs * attempt);
@@ -53,13 +71,9 @@ async function renameWithRetry(
5371
export async function moveMemoryIndexFiles(
5472
sourceBase: string,
5573
targetBase: string,
56-
options: MoveMemoryIndexFilesOptions = {},
74+
options: MemoryIndexFileOptions = {},
5775
): Promise<void> {
58-
const resolvedOptions: Required<MoveMemoryIndexFilesOptions> = {
59-
fileOps: options.fileOps ?? defaultFileOps,
60-
maxRenameAttempts: Math.max(1, options.maxRenameAttempts ?? defaultMaxRenameAttempts),
61-
renameRetryDelayMs: options.renameRetryDelayMs ?? defaultRenameRetryDelayMs,
62-
};
76+
const resolvedOptions = resolveMemoryIndexFileOptions(options);
6377
const suffixes = ["", "-wal", "-shm"];
6478
for (const suffix of suffixes) {
6579
const source = `${sourceBase}${suffix}`;
@@ -68,12 +82,33 @@ export async function moveMemoryIndexFiles(
6882
}
6983
}
7084

71-
async function removeMemoryIndexFiles(
85+
async function rmWithRetry(path: string, options: ResolvedMemoryIndexFileOptions): Promise<void> {
86+
for (let attempt = 1; attempt <= options.maxRemoveAttempts; attempt++) {
87+
try {
88+
await options.fileOps.rm(path, { force: true });
89+
return;
90+
} catch (err) {
91+
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
92+
return;
93+
}
94+
if (!isTransientFileError(err) || attempt === options.maxRemoveAttempts) {
95+
throw err;
96+
}
97+
await options.fileOps.wait(options.removeRetryDelayMs * attempt);
98+
}
99+
}
100+
throw new Error("rm retry loop exited unexpectedly");
101+
}
102+
103+
export async function removeMemoryIndexFiles(
72104
basePath: string,
73-
fileOps: MemoryIndexFileOps = defaultFileOps,
105+
options: MemoryIndexFileOptions = {},
74106
): Promise<void> {
107+
const resolvedOptions = resolveMemoryIndexFileOptions(options);
75108
const suffixes = ["", "-wal", "-shm"];
76-
await Promise.all(suffixes.map((suffix) => fileOps.rm(`${basePath}${suffix}`, { force: true })));
109+
for (const suffix of suffixes) {
110+
await rmWithRetry(`${basePath}${suffix}`, resolvedOptions);
111+
}
77112
}
78113

79114
async function swapMemoryIndexFiles(targetPath: string, tempPath: string): Promise<void> {
@@ -92,13 +127,23 @@ export async function runMemoryAtomicReindex<T>(params: {
92127
targetPath: string;
93128
tempPath: string;
94129
build: () => Promise<T>;
130+
beforeTempCleanup?: () => Promise<void> | void;
131+
fileOptions?: MemoryIndexFileOptions;
95132
}): Promise<T> {
96133
try {
97134
const result = await params.build();
98135
await swapMemoryIndexFiles(params.targetPath, params.tempPath);
99136
return result;
100137
} catch (err) {
101-
await removeMemoryIndexFiles(params.tempPath);
138+
try {
139+
await params.beforeTempCleanup?.();
140+
await removeMemoryIndexFiles(params.tempPath, params.fileOptions);
141+
} catch (cleanupErr) {
142+
throw new AggregateError(
143+
[err, cleanupErr],
144+
"memory atomic reindex failed and temp cleanup failed",
145+
);
146+
}
102147
throw err;
103148
}
104149
}

extensions/memory-core/src/memory/manager-sync-ops.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,6 +1178,7 @@ export abstract class MemoryManagerSyncOps {
11781178
const tempDb = openMemoryDatabaseAtPath(tempDbPath, this.settings.store.vector.enabled);
11791179

11801180
const originalDb = this.db;
1181+
let tempDbClosed = false;
11811182
let originalDbClosed = false;
11821183
const originalState = {
11831184
ftsAvailable: this.fts.available,
@@ -1216,6 +1217,12 @@ export abstract class MemoryManagerSyncOps {
12161217
nextMeta = await runMemoryAtomicReindex({
12171218
targetPath: dbPath,
12181219
tempPath: tempDbPath,
1220+
beforeTempCleanup: () => {
1221+
if (!tempDbClosed) {
1222+
closeMemoryDatabase(tempDb);
1223+
tempDbClosed = true;
1224+
}
1225+
},
12191226
build: async () => {
12201227
await this.seedEmbeddingCache(originalDb);
12211228
const shouldSyncMemory = this.sources.has("memory");
@@ -1265,7 +1272,8 @@ export abstract class MemoryManagerSyncOps {
12651272
this.writeMeta(meta);
12661273
this.pruneEmbeddingCacheIfNeeded?.();
12671274

1268-
closeMemoryDatabase(this.db);
1275+
closeMemoryDatabase(tempDb);
1276+
tempDbClosed = true;
12691277
closeMemoryDatabase(originalDb);
12701278
originalDbClosed = true;
12711279
return meta;
@@ -1278,7 +1286,10 @@ export abstract class MemoryManagerSyncOps {
12781286
this.vector.dims = nextMeta?.vectorDims;
12791287
} catch (err) {
12801288
try {
1281-
closeMemoryDatabase(this.db);
1289+
if (!tempDbClosed && this.db === tempDb) {
1290+
closeMemoryDatabase(tempDb);
1291+
tempDbClosed = true;
1292+
}
12821293
} catch {}
12831294
restoreOriginalState();
12841295
throw err;

extensions/memory-core/src/memory/manager.atomic-reindex.test.ts

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ import os from "node:os";
33
import path from "node:path";
44
import { DatabaseSync } from "node:sqlite";
55
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
6-
import { moveMemoryIndexFiles, runMemoryAtomicReindex } from "./manager-atomic-reindex.js";
6+
import {
7+
moveMemoryIndexFiles,
8+
removeMemoryIndexFiles,
9+
runMemoryAtomicReindex,
10+
} from "./manager-atomic-reindex.js";
711

812
async function expectPathMissing(targetPath: string): Promise<void> {
913
await expect(fs.access(targetPath)).rejects.toMatchObject({ code: "ENOENT" });
@@ -133,6 +137,101 @@ describe("memory manager atomic reindex", () => {
133137
expect(rename).toHaveBeenCalledTimes(1);
134138
expect(wait).not.toHaveBeenCalled();
135139
});
140+
141+
it.each(["EBUSY", "EPERM", "EACCES"] as const)(
142+
"retries transient %s rm failures during index file cleanup",
143+
async (code) => {
144+
const calls: string[] = [];
145+
const rm = vi.fn(async (filePath: string) => {
146+
calls.push(filePath);
147+
if (calls.length === 1) {
148+
throw Object.assign(new Error("busy"), { code });
149+
}
150+
});
151+
const wait = vi.fn().mockResolvedValue(undefined);
152+
153+
await removeMemoryIndexFiles("index.sqlite.tmp", {
154+
fileOps: { rename: fs.rename, rm, wait },
155+
maxRemoveAttempts: 3,
156+
removeRetryDelayMs: 10,
157+
});
158+
159+
expect(calls).toEqual([
160+
"index.sqlite.tmp",
161+
"index.sqlite.tmp",
162+
"index.sqlite.tmp-wal",
163+
"index.sqlite.tmp-shm",
164+
]);
165+
expect(wait).toHaveBeenCalledTimes(1);
166+
expect(wait).toHaveBeenCalledWith(10);
167+
},
168+
);
169+
170+
it("throws after exhausting transient rm retries", async () => {
171+
const rm = vi.fn().mockRejectedValue(Object.assign(new Error("busy"), { code: "EBUSY" }));
172+
const wait = vi.fn().mockResolvedValue(undefined);
173+
174+
await expect(
175+
removeMemoryIndexFiles("index.sqlite.tmp", {
176+
fileOps: { rename: fs.rename, rm, wait },
177+
maxRemoveAttempts: 3,
178+
removeRetryDelayMs: 10,
179+
}),
180+
).rejects.toMatchObject({ code: "EBUSY" });
181+
182+
expect(rm).toHaveBeenCalledTimes(3);
183+
expect(wait).toHaveBeenCalledTimes(2);
184+
expect(wait).toHaveBeenNthCalledWith(1, 10);
185+
expect(wait).toHaveBeenNthCalledWith(2, 20);
186+
});
187+
188+
it("does not retry non-transient rm failures", async () => {
189+
const rm = vi.fn().mockRejectedValue(Object.assign(new Error("invalid"), { code: "EINVAL" }));
190+
const wait = vi.fn().mockResolvedValue(undefined);
191+
192+
await expect(
193+
removeMemoryIndexFiles("index.sqlite.tmp", {
194+
fileOps: { rename: fs.rename, rm, wait },
195+
maxRemoveAttempts: 3,
196+
removeRetryDelayMs: 10,
197+
}),
198+
).rejects.toMatchObject({ code: "EINVAL" });
199+
200+
expect(rm).toHaveBeenCalledTimes(1);
201+
expect(wait).not.toHaveBeenCalled();
202+
});
203+
204+
it("closes temp resources before removing temp files after build failure", async () => {
205+
const events: string[] = [];
206+
let tempClosed = false;
207+
const rm = vi.fn(async (filePath: string) => {
208+
events.push(tempClosed ? `rm:${filePath}:closed` : `rm:${filePath}:open`);
209+
});
210+
211+
await expect(
212+
runMemoryAtomicReindex({
213+
targetPath: "index.sqlite",
214+
tempPath: "index.sqlite.tmp",
215+
beforeTempCleanup: async () => {
216+
events.push("close-temp");
217+
tempClosed = true;
218+
},
219+
fileOptions: {
220+
fileOps: { rename: fs.rename, rm, wait: vi.fn().mockResolvedValue(undefined) },
221+
},
222+
build: async () => {
223+
throw new Error("embedding failure");
224+
},
225+
}),
226+
).rejects.toThrow("embedding failure");
227+
228+
expect(events).toEqual([
229+
"close-temp",
230+
"rm:index.sqlite.tmp:closed",
231+
"rm:index.sqlite.tmp-wal:closed",
232+
"rm:index.sqlite.tmp-shm:closed",
233+
]);
234+
});
136235
});
137236

138237
function writeChunkMarker(dbPath: string, marker: string): void {

0 commit comments

Comments
 (0)