Skip to content

Commit 7ff29a9

Browse files
osolmazOnur Solmazclawsweeper[bot]
authored
Fix local embedding worker safety (#85348)
Summary: - The PR routes local GGUF memory embeddings through a bundled worker sidecar, adds structured degradation and fallback handling, updates memory tests/build output, and keeps the local config contract unchanged. - PR surface: Source +831, Tests +503, Docs +1, Other +2. Total +1337 across 23 files. - Reproducibility: Do we have a high-confidence way to reproduce the issue? Source and report evidence are str ... cludes native crash logs; the exact Metal teardown abort was not reproduced in this review or the PR proof. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(memory): keep local embedding config unchanged - PR branch already contained follow-up commit before automerge: fix(memory): type local embedding degradation - PR branch already contained follow-up commit before automerge: fix(memory): refresh keywords after embedding fallback - PR branch already contained follow-up commit before automerge: fix(memory): keep worker errors internal - PR branch already contained follow-up commit before automerge: test: satisfy memory provider lifecycle harnesses - PR branch already contained follow-up commit before automerge: fix: harden local embedding worker fallback Validation: - ClawSweeper review passed for head 1d1fe41. - Required merge gates passed before the squash merge. Prepared head SHA: 1d1fe41 Review: #85348 (comment) Co-authored-by: Onur Solmaz <onur@Onurs-MacBook-Pro.local> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: osolmaz Co-authored-by: osolmaz <2453968+osolmaz@users.noreply.github.com>
1 parent 70c7d6f commit 7ff29a9

23 files changed

Lines changed: 1429 additions & 92 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
1414

1515
### Fixes
1616

17+
- Memory/local embeddings: run local GGUF embeddings in an isolated worker sidecar and degrade to configured fallback or keyword search on worker failure so native embedding crashes do not take down the Gateway. (#85348) Thanks @osolmaz.
1718
- Agents/heartbeat: stop heartbeat turns after the first valid `heartbeat_respond` so repeated response loops do not burn tokens. (#86357) Thanks @udaymanish6.
1819
- Memory-core: keep REM dreaming focused on live light-staged memories and mark staged entries as considered so old recall history no longer dominates fresh candidates. (#86302) Thanks @SebTardif.
1920
- Telegram: propagate forum topic names through the account-scoped topic cache for native command context and topic create/edit actions. (#86299) Thanks @SebTardif.

extensions/memory-core/src/memory/index.test.ts

Lines changed: 162 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi }
1212
import "./test-runtime-mocks.js";
1313
import type { MemoryIndexManager } from "./index.js";
1414
import { closeAllMemorySearchManagers, getMemorySearchManager } from "./index.js";
15+
import { LOCAL_EMBEDDING_WORKER_ERROR_CODES } from "./manager-local-worker-errors.js";
1516
import { closeMemoryIndexManagersForAgent, EMBEDDING_PROBE_CACHE_TTL_MS } from "./manager.js";
1617
import {
1718
DEFAULT_LOCAL_MODEL,
@@ -34,6 +35,14 @@ let providerCloseGate: Promise<void> | null = null;
3435
let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = [];
3536
let forceNoProvider = false;
3637

38+
function createLocalWorkerExitError(): Error {
39+
return Object.assign(new Error("Local embedding worker exited unexpectedly (exit code 134)"), {
40+
code: LOCAL_EMBEDDING_WORKER_ERROR_CODES.exited,
41+
reason: "exit",
42+
exitCode: 134,
43+
});
44+
}
45+
3746
vi.mock("./embeddings.js", () => {
3847
const embedText = (text: string) => {
3948
const lower = text.toLowerCase();
@@ -44,6 +53,10 @@ vi.mock("./embeddings.js", () => {
4453
return [alpha, beta, image, audio];
4554
};
4655
return {
56+
resolveEmbeddingProviderFallbackModel: (providerId: string, fallbackSourceModel: string) =>
57+
providerId === "gemini" || providerId === "fallback-provider"
58+
? `${providerId}-embed`
59+
: fallbackSourceModel,
4760
createEmbeddingProvider: async (options: {
4861
provider?: string;
4962
model?: string;
@@ -61,7 +74,10 @@ vi.mock("./embeddings.js", () => {
6174
providerUnavailableReason: "No API key found for provider",
6275
};
6376
}
64-
const providerId = options.provider === "gemini" ? "gemini" : "mock";
77+
const providerId =
78+
options.provider === "gemini" || options.provider === "fallback-provider"
79+
? options.provider
80+
: "mock";
6581
const model = options.model ?? "mock-embed";
6682
return {
6783
requestedProvider: options.provider ?? "openai",
@@ -81,7 +97,7 @@ vi.mock("./embeddings.js", () => {
8197
embedBatchCalls += 1;
8298
return texts.map(embedText);
8399
},
84-
...(providerId === "gemini"
100+
...(providerId === "gemini" || providerId === "fallback-provider"
85101
? {
86102
embedBatchInputs: async (
87103
inputs: Array<{
@@ -112,12 +128,12 @@ vi.mock("./embeddings.js", () => {
112128
}
113129
: {}),
114130
},
115-
...(providerId === "gemini"
131+
...(providerId === "gemini" || providerId === "fallback-provider"
116132
? {
117133
runtime: {
118-
id: "gemini",
134+
id: providerId,
119135
cacheKeyData: {
120-
provider: "gemini",
136+
provider: providerId,
121137
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
122138
model,
123139
outputDimensionality: options.outputDimensionality,
@@ -242,7 +258,8 @@ describe("memory index", () => {
242258
extraPaths?: string[];
243259
sources?: Array<"memory" | "sessions">;
244260
sessionMemory?: boolean;
245-
provider?: "openai" | "gemini";
261+
provider?: "openai" | "gemini" | "fallback-provider";
262+
fallback?: "none" | "gemini" | "fallback-provider";
246263
model?: string;
247264
outputDimensionality?: number;
248265
multimodal?: {
@@ -263,6 +280,7 @@ describe("memory index", () => {
263280
memorySearch: {
264281
provider: params.provider ?? "openai",
265282
model: params.model ?? "mock-embed",
283+
fallback: params.fallback,
266284
outputDimensionality: params.outputDimensionality,
267285
store: { path: params.storePath, vector: { enabled: params.vectorEnabled ?? false } },
268286
// Perf: keep test indexes to a single chunk to reduce sqlite work.
@@ -577,6 +595,144 @@ describe("memory index", () => {
577595
);
578596
});
579597

598+
it("clears cached embedding probe readiness when local embeddings degrade", async () => {
599+
const cfg = createCfg({ storePath: path.join(workspaceDir, "index-probe-degraded.sqlite") });
600+
const manager = await getPersistentManager(cfg);
601+
602+
await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({ ok: true });
603+
expect(manager.getCachedEmbeddingAvailability()?.ok).toBe(true);
604+
(
605+
manager as unknown as {
606+
provider: {
607+
id: string;
608+
model: string;
609+
embedQuery: (text: string) => Promise<number[]>;
610+
embedBatch: (texts: string[]) => Promise<number[][]>;
611+
close: () => Promise<void>;
612+
};
613+
}
614+
).provider = {
615+
id: "local",
616+
model: "local-model",
617+
embedQuery: async () => [1, 0],
618+
embedBatch: async (texts: string[]) => texts.map(() => [1, 0]),
619+
close: async () => {},
620+
};
621+
622+
(
623+
manager as unknown as {
624+
markLocalEmbeddingProviderDegraded: (err: unknown) => void;
625+
}
626+
).markLocalEmbeddingProviderDegraded(createLocalWorkerExitError());
627+
628+
expect(manager.getCachedEmbeddingAvailability()).toBeNull();
629+
await expect(manager.probeEmbeddingAvailability()).resolves.toMatchObject({
630+
ok: false,
631+
error: expect.stringContaining("Local embeddings degraded"),
632+
});
633+
});
634+
635+
it("activates configured fallback when local embeddings degrade during search", async () => {
636+
const cfg = createCfg({
637+
storePath: path.join(workspaceDir, "index-search-degraded-fallback.sqlite"),
638+
fallback: "fallback-provider",
639+
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
640+
});
641+
const manager = await getPersistentManager(cfg);
642+
643+
await manager.sync({ reason: "test" });
644+
const callsBeforeSearch = providerCalls.length;
645+
(
646+
manager as unknown as {
647+
provider: {
648+
id: string;
649+
model: string;
650+
embedQuery: () => Promise<number[]>;
651+
embedBatch: (texts: string[]) => Promise<number[][]>;
652+
close: () => Promise<void>;
653+
};
654+
}
655+
).provider = {
656+
id: "local",
657+
model: "mock-embed",
658+
embedQuery: async () => {
659+
throw createLocalWorkerExitError();
660+
},
661+
embedBatch: async (texts: string[]) => texts.map(() => [1, 0, 0, 0]),
662+
close: async () => {},
663+
};
664+
665+
const results = await manager.search("alpha");
666+
667+
expect(results.length).toBeGreaterThan(0);
668+
const resultKeys = results.map(
669+
(result) => `${result.source}:${result.path}:${result.startLine}:${result.endLine}`,
670+
);
671+
expect(new Set(resultKeys).size).toBe(resultKeys.length);
672+
expect(providerCalls.slice(callsBeforeSearch).map((call) => call.provider)).toContain(
673+
"fallback-provider",
674+
);
675+
expect(
676+
(
677+
manager as unknown as {
678+
provider: { id: string } | null;
679+
}
680+
).provider?.id,
681+
).toBe("fallback-provider");
682+
});
683+
684+
it("activates configured fallback after probe-time local degradation", async () => {
685+
const cfg = createCfg({
686+
storePath: path.join(workspaceDir, "index-probe-degraded-fallback.sqlite"),
687+
fallback: "fallback-provider",
688+
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
689+
});
690+
const manager = await getPersistentManager(cfg);
691+
692+
await manager.sync({ reason: "test" });
693+
(
694+
manager as unknown as {
695+
provider: {
696+
id: string;
697+
model: string;
698+
embedQuery: () => Promise<number[]>;
699+
embedBatch: () => Promise<number[][]>;
700+
close: () => Promise<void>;
701+
};
702+
}
703+
).provider = {
704+
id: "local",
705+
model: "mock-embed",
706+
embedQuery: async () => {
707+
throw createLocalWorkerExitError();
708+
},
709+
embedBatch: async () => {
710+
throw createLocalWorkerExitError();
711+
},
712+
close: async () => {},
713+
};
714+
const callsBeforeSearch = providerCalls.length;
715+
716+
await expect(manager.probeEmbeddingAvailability()).resolves.toMatchObject({
717+
ok: false,
718+
error: expect.stringContaining("Local embedding worker exited"),
719+
});
720+
721+
const results = await manager.search("alpha");
722+
723+
expect(results.length).toBeGreaterThan(0);
724+
expect(providerCalls.slice(callsBeforeSearch).map((call) => call.provider)).toContain(
725+
"fallback-provider",
726+
);
727+
expect(
728+
(
729+
manager as unknown as {
730+
provider: { id: string } | null;
731+
}
732+
).provider?.id,
733+
).toBe("fallback-provider");
734+
});
735+
580736
it("streams embedding cache rows during safe reindex", async () => {
581737
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "0");
582738
type EmbeddingCacheRow = {
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
2+
3+
export const MEMORY_EMBEDDING_OPERATION_ERROR_CODE = "MEMORY_EMBEDDING_OPERATION_FAILED";
4+
5+
export type MemoryEmbeddingOperationKind = "query" | "batch" | "structured-batch";
6+
7+
export type MemoryEmbeddingOperationError = Error & {
8+
code: typeof MEMORY_EMBEDDING_OPERATION_ERROR_CODE;
9+
operation: MemoryEmbeddingOperationKind;
10+
providerId?: string;
11+
cause?: unknown;
12+
};
13+
14+
export function createMemoryEmbeddingOperationError(params: {
15+
operation: MemoryEmbeddingOperationKind;
16+
providerId?: string;
17+
cause: unknown;
18+
}): MemoryEmbeddingOperationError {
19+
const message = formatErrorMessage(params.cause);
20+
const error = new Error(message) as MemoryEmbeddingOperationError;
21+
error.code = MEMORY_EMBEDDING_OPERATION_ERROR_CODE;
22+
error.operation = params.operation;
23+
if (params.providerId) {
24+
error.providerId = params.providerId;
25+
}
26+
error.cause = params.cause;
27+
return error;
28+
}
29+
30+
export function isMemoryEmbeddingOperationError(
31+
err: unknown,
32+
): err is MemoryEmbeddingOperationError {
33+
return (
34+
err instanceof Error &&
35+
(err as { code?: unknown }).code === MEMORY_EMBEDDING_OPERATION_ERROR_CODE
36+
);
37+
}

0 commit comments

Comments
 (0)