Skip to content

Commit 14ce873

Browse files
committed
fix(context-engine): quarantine broken plugin engines
1 parent 9813ff2 commit 14ce873

11 files changed

Lines changed: 864 additions & 48 deletions

File tree

docs/concepts/context-engine.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,20 @@ Native Codex and OpenClaw embedded agent runs satisfy `assemble-before-prompt`.
251251
Generic CLI backends do not, so engines that require it are rejected before the
252252
CLI process starts.
253253

254+
### Failure isolation
255+
256+
OpenClaw isolates the selected plugin engine from the core reply path. If a
257+
non-legacy engine is missing, fails contract validation, throws during factory
258+
creation, or throws from a lifecycle method, OpenClaw quarantines that engine
259+
for the current Gateway process and downgrades context-engine work to the
260+
built-in `legacy` engine. The error is logged with the failed operation so the
261+
operator can repair, update, or disable the plugin without the agent going
262+
silent.
263+
264+
Host requirement failures are different: when an engine declares that a runtime
265+
lacks a required capability, OpenClaw fails closed before starting the run. That
266+
protects engines that would corrupt state if they ran in an unsupported host.
267+
254268
### ownsCompaction
255269

256270
`ownsCompaction` controls whether OpenClaw runtime's built-in in-attempt auto-compaction stays enabled for the run:
@@ -321,7 +335,7 @@ The slot is exclusive at run time - only one registered context engine is resolv
321335

322336
- Use `openclaw doctor` to verify your engine is loading correctly.
323337
- If switching engines, existing sessions continue with their current history. The new engine takes over for future runs.
324-
- Engine errors are logged and surfaced in diagnostics. If a plugin engine fails to register or the selected engine id cannot be resolved, OpenClaw does not fall back automatically; runs fail until you fix the plugin or switch `plugins.slots.contextEngine` back to `"legacy"`.
338+
- Engine errors are logged and the selected plugin engine is quarantined for the current Gateway process. OpenClaw falls back to `legacy` for user turns so replies can continue, but you should still repair, update, disable, or uninstall the broken plugin.
325339
- For development, use `openclaw plugins install -l ./my-engine` to link a local plugin directory without copying.
326340

327341
## Related

scripts/e2e/lib/clawhub-fixture-server.cjs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,28 @@ export default definePluginEntry({
127127
docsPath: "/providers/kitchen-sink",
128128
auth: [],
129129
});
130+
api.registerContextEngine("${pluginId}", () => ({
131+
info: {
132+
id: "${pluginId}",
133+
name: "Kitchen Sink Context Engine",
134+
},
135+
async ingest() {
136+
return { ingested: false };
137+
},
138+
async assemble(params) {
139+
return {
140+
messages: params.messages,
141+
estimatedTokens: 0,
142+
};
143+
},
144+
async compact() {
145+
return {
146+
ok: true,
147+
compacted: false,
148+
reason: "kitchen-sink fixture does not compact",
149+
};
150+
},
151+
}));
130152
api.registerChannel({
131153
plugin: {
132154
id: "kitchen-sink-channel",
@@ -151,6 +173,7 @@ export default definePluginEntry({
151173
manifest: {
152174
id: pluginId,
153175
name: "OpenClaw Kitchen Sink",
176+
kind: "context-engine",
154177
channels: ["kitchen-sink-channel"],
155178
channelConfigs: {
156179
"kitchen-sink-channel": {

scripts/e2e/lib/kitchen-sink-plugin/assertions.mjs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@ function assertInstalled() {
373373
expectIncludes(inspect.plugin?.channelIds, "kitchen-sink-channel", "channels");
374374
expectIncludes(inspect.plugin?.providerIds, "kitchen-sink-provider", "providers");
375375
}
376+
if (source === "clawhub") {
377+
expectIncludes(inspect.plugin?.contextEngineIds, pluginId, "context engines");
378+
}
376379

377380
const diagnostics = [
378381
...(list.diagnostics || []),

src/commands/health.test.ts

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
22
import { stripAnsi } from "../terminal/ansi.js";
33
import { formatHealthCheckFailure } from "./health-format.js";
44
import type { HealthSummary } from "./health.js";
5-
import { formatHealthChannelLines, formatModelPricingHealthLine, healthCommand } from "./health.js";
5+
import {
6+
formatContextEngineHealthLine,
7+
formatHealthChannelLines,
8+
formatModelPricingHealthLine,
9+
healthCommand,
10+
} from "./health.js";
611

712
const runtime = {
813
log: vi.fn(),
@@ -341,6 +346,31 @@ describe("healthCommand", () => {
341346
});
342347
});
343348

349+
describe("formatContextEngineHealthLine", () => {
350+
it("summarizes quarantined context engines", () => {
351+
const summary = createHealthSummary({
352+
channels: {},
353+
channelOrder: [],
354+
channelLabels: {},
355+
});
356+
summary.contextEngines = {
357+
quarantined: [
358+
{
359+
engineId: "lossless-claw",
360+
owner: "plugin:lossless-claw",
361+
operation: "assemble",
362+
reason: "db corrupt",
363+
failedAt: 123,
364+
},
365+
],
366+
};
367+
368+
expect(formatContextEngineHealthLine(summary)).toBe(
369+
"Context engine: warning (1 quarantined; downgraded to legacy: lossless-claw)",
370+
);
371+
});
372+
});
373+
344374
describe("formatHealthCheckFailure", () => {
345375
it("keeps non-rich output stable", () => {
346376
const err = new Error("gateway closed (1006 abnormal closure): no close reason");

src/commands/health.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import { withProgress } from "../cli/progress.js";
1313
import { getRuntimeConfig } from "../config/config.js";
1414
import { resolveStorePath } from "../config/sessions/paths.js";
1515
import type { OpenClawConfig } from "../config/types.openclaw.js";
16+
import { listContextEngineQuarantines } from "../context-engine/registry.js";
1617
import {
1718
buildGatewayConnectionDetails,
1819
callGateway,
@@ -43,6 +44,7 @@ import type {
4344
AgentHealthSummary,
4445
ChannelAccountHealthSummary,
4546
ChannelHealthSummary,
47+
ContextEngineHealthSummary,
4648
HealthSummary,
4749
PluginHealthErrorSummary,
4850
PluginHealthSummary,
@@ -166,6 +168,32 @@ export function formatModelPricingHealthLine(summary: HealthSummary): string | n
166168
return `Model pricing: warning (optional pricing refresh degraded)${detail}`;
167169
}
168170

171+
function buildContextEngineHealthSummary(): ContextEngineHealthSummary | undefined {
172+
const quarantined: ContextEngineHealthSummary["quarantined"] = [];
173+
for (const entry of listContextEngineQuarantines()) {
174+
const summary: ContextEngineHealthSummary["quarantined"][number] = {
175+
engineId: entry.engineId,
176+
operation: entry.operation,
177+
reason: entry.reason,
178+
failedAt: entry.failedAt.getTime(),
179+
};
180+
if (entry.owner) {
181+
summary.owner = entry.owner;
182+
}
183+
quarantined.push(summary);
184+
}
185+
return quarantined.length > 0 ? { quarantined } : undefined;
186+
}
187+
188+
export function formatContextEngineHealthLine(summary: HealthSummary): string | null {
189+
const quarantined = summary.contextEngines?.quarantined ?? [];
190+
if (quarantined.length === 0) {
191+
return null;
192+
}
193+
const engines = quarantined.map((entry) => entry.engineId).join(", ");
194+
return `Context engine: warning (${quarantined.length} quarantined; downgraded to legacy: ${engines})`;
195+
}
196+
169197
const resolveHeartbeatSummary = (cfg: OpenClawConfig, agentId: string) =>
170198
resolveHeartbeatSummaryForAgent(cfg, agentId);
171199

@@ -571,12 +599,14 @@ export async function getHealthSnapshot(params?: {
571599
}
572600

573601
const pluginHealth = buildPluginHealthSummary();
602+
const contextEngineHealth = buildContextEngineHealthSummary();
574603
const summary: HealthSummary = {
575604
ok: true,
576605
ts: Date.now(),
577606
durationMs: Date.now() - start,
578607
...(params?.eventLoop ? { eventLoop: params.eventLoop } : {}),
579608
...(pluginHealth ? { plugins: pluginHealth } : {}),
609+
...(contextEngineHealth ? { contextEngines: contextEngineHealth } : {}),
580610
modelPricing: getGatewayModelPricingHealth({ enabled: isGatewayModelPricingEnabled(cfg) }),
581611
channels,
582612
channelOrder,
@@ -782,6 +812,10 @@ export async function healthCommand(
782812
if (modelPricingLine) {
783813
runtime.log(styleHealthChannelLine(modelPricingLine, rich));
784814
}
815+
const contextEngineLine = formatContextEngineHealthLine(summary);
816+
if (contextEngineLine) {
817+
runtime.log(styleHealthChannelLine(contextEngineLine, rich));
818+
}
785819
for (const plugin of displayPlugins) {
786820
const channelSummary = summary.channels?.[plugin.id];
787821
if (!channelSummary || channelSummary.linked !== true) {

src/commands/health.types.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,18 @@ export type PluginHealthSummary = {
3535
errors: PluginHealthErrorSummary[];
3636
};
3737

38+
export type ContextEngineHealthQuarantineSummary = {
39+
engineId: string;
40+
owner?: string;
41+
operation: string;
42+
reason: string;
43+
failedAt: number;
44+
};
45+
46+
export type ContextEngineHealthSummary = {
47+
quarantined: ContextEngineHealthQuarantineSummary[];
48+
};
49+
3850
export type ModelPricingHealthSummary =
3951
import("../gateway/model-pricing-cache-state.js").GatewayModelPricingHealth;
4052

@@ -44,6 +56,7 @@ export type HealthSummary = {
4456
durationMs: number;
4557
eventLoop?: import("../gateway/server/event-loop-health.js").GatewayEventLoopHealth;
4658
plugins?: PluginHealthSummary;
59+
contextEngines?: ContextEngineHealthSummary;
4760
modelPricing?: ModelPricingHealthSummary;
4861
channels: Record<string, ChannelHealthSummary>;
4962
channelOrder: string[];

0 commit comments

Comments
 (0)