Skip to content

Commit 01c95f0

Browse files
committed
fix(agents): persist subagent registry before returning accepted
Native subagent spawn could return accepted while the run entry was absent from ~/.openclaw/subagents/runs.json, so subagents list and completion delivery lost track of the run. persistSubagentRunsToDisk swallowed the save error, hiding the missing-registry symptom. Make the initial registration fail closed: persist via a strict variant that propagates write errors, and roll the in-memory entry back on failure so spawn returns an actionable error instead of accepted. Subsequent lifecycle updates keep the best-effort writer. Refs #83132
1 parent 833f1ce commit 01c95f0

6 files changed

Lines changed: 49 additions & 0 deletions

src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,9 @@ export async function getSessionsSpawnTool(opts: CreateOpenClawToolsOpts) {
221221
persistSubagentRunsToDisk: () => {
222222
hoisted.notifyEventWaiters();
223223
},
224+
persistSubagentRunsToDiskOrThrow: () => {
225+
hoisted.notifyEventWaiters();
226+
},
224227
restoreSubagentRunsFromDisk: () => 0,
225228
resolveContextEngine: async () => ({
226229
info: { id: "test", name: "Test" },

src/agents/subagent-control.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ beforeEach(() => {
163163
ensureRuntimePluginsLoaded: () => {},
164164
getSubagentRunsSnapshotForRead: (runs) => new Map(runs),
165165
persistSubagentRunsToDisk: () => {},
166+
persistSubagentRunsToDiskOrThrow: () => {},
166167
restoreSubagentRunsFromDisk: () => 0,
167168
resolveContextEngine: async () => ({
168169
info: { id: "test", name: "Test" },

src/agents/subagent-registry-run-manager.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ export function createSubagentRunManager(params: {
108108
resumedRuns: Set<string>;
109109
endedHookInFlightRunIds: Set<string>;
110110
persist(): void;
111+
persistOrThrow(): void;
111112
callGateway: typeof callGateway;
112113
getRuntimeConfig: typeof getRuntimeConfig;
113114
ensureRuntimePluginsLoaded:
@@ -510,6 +511,12 @@ export function createSubagentRunManager(params: {
510511
retainAttachmentsOnKeep: registerParams.retainAttachmentsOnKeep,
511512
};
512513
params.runs.set(runId, entry);
514+
try {
515+
params.persistOrThrow();
516+
} catch (error) {
517+
params.runs.delete(runId);
518+
throw error;
519+
}
513520
try {
514521
createRunningTaskRun({
515522
runtime: "subagent",

src/agents/subagent-registry-state.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ export function persistSubagentRunsToDisk(runs: Map<string, SubagentRunRecord>)
1212
}
1313
}
1414

15+
export function persistSubagentRunsToDiskOrThrow(runs: Map<string, SubagentRunRecord>) {
16+
saveSubagentRegistryToDisk(runs);
17+
}
18+
1519
export function restoreSubagentRunsFromDisk(params: {
1620
runs: Map<string, SubagentRunRecord>;
1721
mergeOnly?: boolean;

src/agents/subagent-registry.test.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ const mocks = vi.hoisted(() => ({
8484
updateSessionStore: vi.fn(),
8585
emitSessionLifecycleEvent: vi.fn(),
8686
persistSubagentRunsToDisk: vi.fn(),
87+
persistSubagentRunsToDiskOrThrow: vi.fn(),
8788
restoreSubagentRunsFromDisk: vi.fn(() => 0),
8889
getSubagentRunsSnapshotForRead: vi.fn(
8990
(runs: Map<string, import("./subagent-registry.types.js").SubagentRunRecord>) => new Map(runs),
@@ -130,6 +131,7 @@ vi.mock("../sessions/session-lifecycle-events.js", () => ({
130131
vi.mock("./subagent-registry-state.js", () => ({
131132
getSubagentRunsSnapshotForRead: mocks.getSubagentRunsSnapshotForRead,
132133
persistSubagentRunsToDisk: mocks.persistSubagentRunsToDisk,
134+
persistSubagentRunsToDiskOrThrow: mocks.persistSubagentRunsToDiskOrThrow,
133135
restoreSubagentRunsFromDisk: mocks.restoreSubagentRunsFromDisk,
134136
}));
135137

@@ -211,6 +213,7 @@ describe("subagent registry seam flow", () => {
211213
cleanupBrowserSessionsForLifecycleEnd: mocks.cleanupBrowserSessionsForLifecycleEnd,
212214
onAgentEvent: mocks.onAgentEvent,
213215
persistSubagentRunsToDisk: mocks.persistSubagentRunsToDisk,
216+
persistSubagentRunsToDiskOrThrow: mocks.persistSubagentRunsToDiskOrThrow,
214217
resolveAgentTimeoutMs: mocks.resolveAgentTimeoutMs,
215218
restoreSubagentRunsFromDisk: mocks.restoreSubagentRunsFromDisk,
216219
runSubagentAnnounceFlow: mocks.runSubagentAnnounceFlow,
@@ -732,6 +735,29 @@ describe("subagent registry seam flow", () => {
732735
expect(mocks.persistSubagentRunsToDisk).toHaveBeenCalledTimes(6);
733736
});
734737

738+
it("throws and removes the entry when the initial durable registry write fails", () => {
739+
mocks.persistSubagentRunsToDiskOrThrow.mockImplementationOnce(() => {
740+
throw new Error("disk full");
741+
});
742+
743+
expect(() =>
744+
mod.registerSubagentRun({
745+
runId: "run-durability-required",
746+
childSessionKey: "agent:main:subagent:child",
747+
requesterSessionKey: "agent:main:main",
748+
requesterDisplayKey: "main",
749+
task: "must fail closed",
750+
cleanup: "keep",
751+
}),
752+
).toThrowError("disk full");
753+
754+
expect(
755+
mod
756+
.listSubagentRunsForRequester("agent:main:main")
757+
.find((entry) => entry.runId === "run-durability-required"),
758+
).toBeUndefined();
759+
});
760+
735761
it("continues completion announce cleanup when lifecycle cleanup fails", async () => {
736762
mocks.cleanupBrowserSessionsForLifecycleEnd.mockRejectedValueOnce(
737763
new Error("browser cleanup unavailable"),

src/agents/subagent-registry.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ import {
5353
import {
5454
getSubagentRunsSnapshotForRead,
5555
persistSubagentRunsToDisk,
56+
persistSubagentRunsToDiskOrThrow,
5657
restoreSubagentRunsFromDisk,
5758
} from "./subagent-registry-state.js";
5859
import { configureSubagentRegistrySteerRuntime } from "./subagent-registry-steer-runtime.js";
@@ -90,6 +91,7 @@ type SubagentRegistryDeps = {
9091
getRuntimeConfig: typeof getRuntimeConfig;
9192
onAgentEvent: typeof onAgentEvent;
9293
persistSubagentRunsToDisk: typeof persistSubagentRunsToDisk;
94+
persistSubagentRunsToDiskOrThrow: typeof persistSubagentRunsToDiskOrThrow;
9395
resolveAgentTimeoutMs: typeof resolveAgentTimeoutMs;
9496
restoreSubagentRunsFromDisk: typeof restoreSubagentRunsFromDisk;
9597
runSubagentAnnounceFlow: SubagentAnnounceModule["runSubagentAnnounceFlow"];
@@ -128,6 +130,7 @@ const defaultSubagentRegistryDeps: SubagentRegistryDeps = {
128130
getRuntimeConfig,
129131
onAgentEvent,
130132
persistSubagentRunsToDisk,
133+
persistSubagentRunsToDiskOrThrow,
131134
resolveAgentTimeoutMs,
132135
restoreSubagentRunsFromDisk,
133136
runSubagentAnnounceFlow: async (params) =>
@@ -249,6 +252,10 @@ function persistSubagentRuns() {
249252
subagentRegistryDeps.persistSubagentRunsToDisk(subagentRuns);
250253
}
251254

255+
function persistSubagentRunsOrThrow() {
256+
subagentRegistryDeps.persistSubagentRunsToDiskOrThrow(subagentRuns);
257+
}
258+
252259
export function scheduleSubagentOrphanRecovery(params?: { delayMs?: number; maxRetries?: number }) {
253260
const now = Date.now();
254261
if (now - lastOrphanRecoveryScheduleAt < ORPHAN_RECOVERY_DEBOUNCE_MS) {
@@ -1025,6 +1032,7 @@ const subagentRunManager = createSubagentRunManager({
10251032
resumedRuns,
10261033
endedHookInFlightRunIds,
10271034
persist: persistSubagentRuns,
1035+
persistOrThrow: persistSubagentRunsOrThrow,
10281036
callGateway: (request) => subagentRegistryDeps.callGateway(request),
10291037
getRuntimeConfig: () => subagentRegistryDeps.getRuntimeConfig(),
10301038
ensureRuntimePluginsLoaded: (args: {

0 commit comments

Comments
 (0)