Skip to content

Commit 73e2151

Browse files
committed
fix: fail updates on activated plugin load errors
1 parent ad5c00b commit 73e2151

10 files changed

Lines changed: 407 additions & 25 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ Docs: https://docs.openclaw.ai
8080
`openclaw node start` command, and show an actionable browser-control error
8181
when the local control service is missing. Fixes #66637.
8282
- Gateway/update: fail package updates when the restarted managed gateway reports the wrong version, avoiding false-success mixed-version restarts after macOS LaunchAgent updates. Fixes #71835. Thanks @abhinas90 and @jsompis.
83+
- Plugins/runtime deps: surface activated plugin load failures in health and fail package-update restart verification or doctor repair when bundled runtime deps still cannot load, avoiding false-success repairs. (#71883) Thanks @Solvely-Colin.
8384
- WhatsApp: remove ack reactions after a visible reply when `messages.removeAckAfterReply` is enabled, matching other reaction-capable channels. Fixes #26183. Thanks @MrUnforsaken.
8485
- Providers/Z.AI: map OpenClaw thinking controls to Z.AI's `thinking` payload and add opt-in preserved thinking replay via `params.preserveThinking`, so GLM 5.x can keep prior `reasoning_content` when requested. Fixes #58680. Thanks @xuanmingguo.
8586
- Channels/status: keep read-only channel lists on manifest and package metadata by default, loading setup runtime only for explicit fallback callers. Thanks @shakkernerd.

src/cli/daemon-cli/restart-health.test.ts

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,107 @@ describe("inspectGatewayRestart", () => {
305305
expect(snapshot.versionMismatch).toBeUndefined();
306306
});
307307

308+
it("marks matching-version restarts unhealthy when activated plugins failed to load", async () => {
309+
probeGateway.mockResolvedValue({
310+
ok: true,
311+
close: null,
312+
server: { version: "2026.4.24", connId: "new" },
313+
health: {
314+
ok: true,
315+
plugins: {
316+
errors: [
317+
{
318+
id: "telegram",
319+
origin: "bundled",
320+
activated: true,
321+
error: "failed to install bundled runtime deps: ENOSPC",
322+
},
323+
{
324+
id: "optional",
325+
origin: "workspace",
326+
activated: false,
327+
error: "disabled plugin ignored",
328+
},
329+
],
330+
},
331+
},
332+
});
333+
334+
const snapshot = await inspectGatewayRestartWithSnapshot({
335+
runtime: { status: "running", pid: 8000 },
336+
expectedVersion: "2026.4.24",
337+
portUsage: {
338+
port: 18789,
339+
status: "busy",
340+
listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }],
341+
hints: [],
342+
},
343+
});
344+
345+
expect(snapshot).toMatchObject({
346+
healthy: false,
347+
gatewayVersion: "2026.4.24",
348+
expectedVersion: "2026.4.24",
349+
activatedPluginErrors: [
350+
{
351+
id: "telegram",
352+
origin: "bundled",
353+
activated: true,
354+
error: "failed to install bundled runtime deps: ENOSPC",
355+
},
356+
],
357+
});
358+
expect(snapshot.versionMismatch).toBeUndefined();
359+
expect(probeGateway).toHaveBeenCalledWith(expect.objectContaining({ includeDetails: true }));
360+
361+
const { renderRestartDiagnostics } = await import("./restart-health.js");
362+
expect(renderRestartDiagnostics(snapshot).join("\n")).toContain(
363+
"Activated plugin load errors:\n- telegram: failed to install bundled runtime deps: ENOSPC",
364+
);
365+
});
366+
367+
it("stops waiting once the expected-version gateway reports activated plugin errors", async () => {
368+
probeGateway.mockResolvedValue({
369+
ok: true,
370+
close: null,
371+
server: { version: "2026.4.24", connId: "new" },
372+
health: {
373+
ok: true,
374+
plugins: {
375+
errors: [
376+
{
377+
id: "telegram",
378+
origin: "bundled",
379+
activated: true,
380+
error: "failed to install bundled runtime deps: ENOSPC",
381+
},
382+
],
383+
},
384+
},
385+
});
386+
inspectPortUsage.mockResolvedValue({
387+
port: 18789,
388+
status: "busy",
389+
listeners: [{ pid: 8000, commandLine: "openclaw-gateway" }],
390+
hints: [],
391+
});
392+
393+
const { waitForGatewayHealthyRestart } = await import("./restart-health.js");
394+
const snapshot = await waitForGatewayHealthyRestart({
395+
service: makeGatewayService({ status: "running", pid: 8000 }),
396+
port: 18789,
397+
expectedVersion: "2026.4.24",
398+
});
399+
400+
expect(snapshot).toMatchObject({
401+
healthy: false,
402+
waitOutcome: "plugin-errors",
403+
elapsedMs: 0,
404+
activatedPluginErrors: [expect.objectContaining({ id: "telegram" })],
405+
});
406+
expect(sleep).not.toHaveBeenCalled();
407+
});
408+
308409
it("treats busy ports with unavailable listener details as healthy when runtime is running", async () => {
309410
const service = {
310411
readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })),

src/cli/daemon-cli/restart-health.ts

Lines changed: 114 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import type { PluginHealthErrorSummary } from "../../commands/health.types.js";
12
import type { GatewayServiceRuntime } from "../../daemon/service-runtime.js";
23
import type { GatewayService } from "../../daemon/service.js";
34
import { probeGateway } from "../../gateway/probe.js";
@@ -22,14 +23,20 @@ export const DEFAULT_RESTART_HEALTH_ATTEMPTS = Math.ceil(
2223
const STOPPED_FREE_EARLY_EXIT_GRACE_MS = 10_000;
2324
const WINDOWS_STOPPED_FREE_EARLY_EXIT_GRACE_MS = 90_000;
2425

25-
export type GatewayRestartWaitOutcome = "healthy" | "stale-pids" | "stopped-free" | "timeout";
26+
export type GatewayRestartWaitOutcome =
27+
| "healthy"
28+
| "plugin-errors"
29+
| "stale-pids"
30+
| "stopped-free"
31+
| "timeout";
2632

2733
export type GatewayRestartSnapshot = {
2834
runtime: GatewayServiceRuntime;
2935
portUsage: PortUsage;
3036
healthy: boolean;
3137
staleGatewayPids: number[];
3238
gatewayVersion?: string | null;
39+
activatedPluginErrors?: PluginHealthErrorSummary[];
3340
expectedVersion?: string;
3441
versionMismatch?: {
3542
expected: string;
@@ -47,6 +54,7 @@ export type GatewayPortHealthSnapshot = {
4754
type GatewayReachability = {
4855
reachable: boolean;
4956
gatewayVersion: string | null;
57+
activatedPluginErrors: PluginHealthErrorSummary[];
5058
};
5159

5260
function hasListenerAttributionGap(portUsage: PortUsage): boolean {
@@ -101,18 +109,73 @@ function applyExpectedVersion(
101109
};
102110
}
103111

104-
async function confirmGatewayReachable(port: number): Promise<GatewayReachability> {
112+
function readActivatedPluginErrors(health: unknown): PluginHealthErrorSummary[] {
113+
if (!health || typeof health !== "object") {
114+
return [];
115+
}
116+
const plugins = (health as { plugins?: unknown }).plugins;
117+
if (!plugins || typeof plugins !== "object") {
118+
return [];
119+
}
120+
const errors = (plugins as { errors?: unknown }).errors;
121+
if (!Array.isArray(errors)) {
122+
return [];
123+
}
124+
return errors
125+
.filter((entry): entry is PluginHealthErrorSummary => {
126+
if (!entry || typeof entry !== "object") {
127+
return false;
128+
}
129+
const candidate = entry as Partial<PluginHealthErrorSummary>;
130+
return (
131+
candidate.activated === true &&
132+
typeof candidate.id === "string" &&
133+
typeof candidate.error === "string"
134+
);
135+
})
136+
.map((entry) => {
137+
const error: PluginHealthErrorSummary = {
138+
id: entry.id,
139+
origin: typeof entry.origin === "string" ? entry.origin : "unknown",
140+
activated: true,
141+
error: entry.error,
142+
};
143+
if (typeof entry.activationSource === "string") {
144+
error.activationSource = entry.activationSource;
145+
}
146+
if (typeof entry.activationReason === "string") {
147+
error.activationReason = entry.activationReason;
148+
}
149+
if (typeof entry.failurePhase === "string") {
150+
error.failurePhase = entry.failurePhase;
151+
}
152+
return error;
153+
});
154+
}
155+
156+
function applyActivatedPluginErrors(snapshot: GatewayRestartSnapshot): GatewayRestartSnapshot {
157+
if (!snapshot.activatedPluginErrors?.length) {
158+
return snapshot;
159+
}
160+
return { ...snapshot, healthy: false };
161+
}
162+
163+
async function confirmGatewayReachable(params: {
164+
port: number;
165+
includeHealthDetails?: boolean;
166+
}): Promise<GatewayReachability> {
105167
const token = normalizeOptionalString(process.env.OPENCLAW_GATEWAY_TOKEN);
106168
const password = normalizeOptionalString(process.env.OPENCLAW_GATEWAY_PASSWORD);
107169
const probe = await probeGateway({
108-
url: `ws://127.0.0.1:${port}`,
170+
url: `ws://127.0.0.1:${params.port}`,
109171
auth: token || password ? { token, password } : undefined,
110172
timeoutMs: 3_000,
111-
includeDetails: false,
173+
includeDetails: params.includeHealthDetails === true,
112174
});
113175
return {
114176
reachable: probe.ok || looksLikeAuthClose(probe.close?.code, probe.close?.reason),
115177
gatewayVersion: probe.server?.version ?? null,
178+
activatedPluginErrors: readActivatedPluginErrors(probe.health),
116179
};
117180
}
118181

@@ -133,7 +196,7 @@ async function inspectGatewayPortHealth(port: number): Promise<GatewayPortHealth
133196
let healthy = false;
134197
if (portUsage.status === "busy") {
135198
try {
136-
healthy = (await confirmGatewayReachable(port)).reachable;
199+
healthy = (await confirmGatewayReachable({ port })).reachable;
137200
} catch {
138201
// best-effort probe
139202
}
@@ -152,8 +215,15 @@ export async function inspectGatewayRestart(params: {
152215
const env = params.env ?? process.env;
153216
const expectedVersion = normalizeOptionalString(params.expectedVersion);
154217
let reachability: GatewayReachability | null = null;
218+
let activatedPluginErrors: PluginHealthErrorSummary[] = [];
155219
const loadReachability = async () => {
156-
reachability ??= await confirmGatewayReachable(params.port);
220+
if (!reachability) {
221+
reachability = await confirmGatewayReachable({
222+
port: params.port,
223+
includeHealthDetails: Boolean(expectedVersion),
224+
});
225+
activatedPluginErrors = reachability.activatedPluginErrors;
226+
}
157227
return reachability;
158228
};
159229
let runtime: GatewayServiceRuntime = { status: "unknown" };
@@ -180,15 +250,20 @@ export async function inspectGatewayRestart(params: {
180250
try {
181251
const reachable = await loadReachability();
182252
if (reachable.reachable) {
183-
return applyExpectedVersion(
184-
{
185-
runtime,
186-
portUsage,
187-
healthy: true,
188-
staleGatewayPids: [],
189-
gatewayVersion: reachable.gatewayVersion,
190-
},
191-
expectedVersion,
253+
return applyActivatedPluginErrors(
254+
applyExpectedVersion(
255+
{
256+
runtime,
257+
portUsage,
258+
healthy: true,
259+
staleGatewayPids: [],
260+
gatewayVersion: reachable.gatewayVersion,
261+
...(reachable.activatedPluginErrors.length > 0
262+
? { activatedPluginErrors: reachable.activatedPluginErrors }
263+
: {}),
264+
},
265+
expectedVersion,
266+
),
192267
);
193268
}
194269
} catch {
@@ -228,6 +303,9 @@ export async function inspectGatewayRestart(params: {
228303
const reachable = await loadReachability();
229304
healthy = reachable.reachable;
230305
gatewayVersion = reachable.gatewayVersion;
306+
if (reachable.activatedPluginErrors.length > 0) {
307+
healthy = false;
308+
}
231309
} catch {
232310
healthy = false;
233311
}
@@ -261,15 +339,18 @@ export async function inspectGatewayRestart(params: {
261339
]),
262340
);
263341

264-
return applyExpectedVersion(
265-
{
266-
runtime,
267-
portUsage,
268-
healthy,
269-
staleGatewayPids,
270-
...(gatewayVersion !== undefined ? { gatewayVersion } : {}),
271-
},
272-
expectedVersion,
342+
return applyActivatedPluginErrors(
343+
applyExpectedVersion(
344+
{
345+
runtime,
346+
portUsage,
347+
healthy,
348+
staleGatewayPids,
349+
...(gatewayVersion !== undefined ? { gatewayVersion } : {}),
350+
...(activatedPluginErrors.length ? { activatedPluginErrors } : {}),
351+
},
352+
expectedVersion,
353+
),
273354
);
274355
}
275356

@@ -330,6 +411,9 @@ export async function waitForGatewayHealthyRestart(params: {
330411
if (snapshot.healthy) {
331412
return withWaitContext(snapshot, "healthy", attempt * delayMs);
332413
}
414+
if (snapshot.activatedPluginErrors?.length) {
415+
return withWaitContext(snapshot, "plugin-errors", attempt * delayMs);
416+
}
333417
if (snapshot.staleGatewayPids.length > 0 && snapshot.runtime.status !== "running") {
334418
return withWaitContext(snapshot, "stale-pids", attempt * delayMs);
335419
}
@@ -399,6 +483,12 @@ export function renderRestartDiagnostics(snapshot: GatewayRestartSnapshot): stri
399483
`Gateway version mismatch: expected ${snapshot.versionMismatch.expected}, running gateway reported ${actual}.`,
400484
);
401485
}
486+
if (snapshot.activatedPluginErrors?.length) {
487+
lines.push("Activated plugin load errors:");
488+
for (const plugin of snapshot.activatedPluginErrors) {
489+
lines.push(`- ${plugin.id}: ${plugin.error}`);
490+
}
491+
}
402492
const runtimeSummary = [
403493
snapshot.runtime.status ? `status=${snapshot.runtime.status}` : null,
404494
snapshot.runtime.state ? `state=${snapshot.runtime.state}` : null,

0 commit comments

Comments
 (0)