Skip to content

Commit f9c6fc7

Browse files
Galin IlievGalin Iliev
authored andcommitted
fix: preserve subagent delivery after lock stalls
1 parent 277d8fe commit f9c6fc7

5 files changed

Lines changed: 71 additions & 10 deletions

File tree

src/agents/session-write-lock.test.ts

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,21 @@ describe("acquireSessionWriteLock", () => {
215215

216216
it("does not reenter locks by default in the same process", async () => {
217217
await withTempSessionLockFile(async ({ sessionFile }) => {
218+
pinCurrentProcessStartTimeForTest();
218219
const lock = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 });
219-
await expect(
220-
acquireSessionWriteLock({ sessionFile, timeoutMs: 5, staleMs: 60_000 }),
221-
).rejects.toThrow(/session file locked/);
220+
let error: unknown;
221+
try {
222+
await acquireSessionWriteLock({ sessionFile, timeoutMs: 5, staleMs: 60_000 });
223+
} catch (err) {
224+
error = err;
225+
}
226+
expect(error).toBeInstanceOf(Error);
227+
expect(String((error as { owner?: unknown }).owner)).toContain(`pid=${process.pid}`);
228+
expect(String((error as { owner?: unknown }).owner)).toContain("alive=true");
229+
expect(String((error as { owner?: unknown }).owner)).toContain(
230+
`starttime=${FAKE_STARTTIME}`,
231+
);
232+
expect(error).toHaveProperty("message", expect.stringContaining("currentStarttime=12345"));
222233
await lock.release();
223234
});
224235
});

src/agents/session-write-lock.ts

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,35 @@ function inspectLockPayloadForSession(params: {
637637
return inspected;
638638
}
639639

640+
function formatSessionLockTimeoutOwner(params: {
641+
payload: LockFilePayload | null;
642+
inspected: LockInspectionDetails;
643+
}): string {
644+
const parts = [
645+
params.inspected.pid === null ? "pid=unknown" : `pid=${params.inspected.pid}`,
646+
`alive=${params.inspected.pidAlive}`,
647+
];
648+
if (typeof params.payload?.starttime === "number") {
649+
parts.push(`starttime=${params.payload.starttime}`);
650+
if (params.inspected.pid !== null && params.inspected.pidAlive) {
651+
const currentStarttime = resolveProcessStartTimeForLock(params.inspected.pid);
652+
parts.push(`currentStarttime=${currentStarttime ?? "unknown"}`);
653+
}
654+
} else {
655+
parts.push("starttime=missing");
656+
}
657+
if (params.inspected.createdAt) {
658+
parts.push(`createdAt=${params.inspected.createdAt}`);
659+
}
660+
if (params.inspected.ageMs !== null) {
661+
parts.push(`ageMs=${params.inspected.ageMs}`);
662+
}
663+
if (params.inspected.staleReasons.length > 0) {
664+
parts.push(`staleReasons=${params.inspected.staleReasons.join(",")}`);
665+
}
666+
return parts.join(" ");
667+
}
668+
640669
export async function cleanStaleLockFiles(params: {
641670
sessionsDir: string;
642671
config?: SessionWriteLockAcquireTimeoutConfig;
@@ -783,7 +812,16 @@ export async function acquireSessionWriteLock(params: {
783812
}
784813
const timeoutLockPath = (err as { lockPath?: string }).lockPath ?? lockPath;
785814
const payload = await readLockPayload(timeoutLockPath);
786-
const owner = typeof payload?.pid === "number" ? `pid=${payload.pid}` : "unknown";
815+
const inspected = inspectLockPayloadForSession({
816+
payload,
817+
staleMs,
818+
nowMs: Date.now(),
819+
heldByThisProcess: sessionLockHeldByThisProcess(normalizedSessionFile),
820+
reclaimLockWithoutStarttime: true,
821+
readOwnerProcessArgs: readProcessArgsSync,
822+
respectMaxHold: true,
823+
});
824+
const owner = formatSessionLockTimeoutOwner({ payload, inspected });
787825
throw new SessionWriteLockTimeoutError({ timeoutMs, owner, lockPath: timeoutLockPath });
788826
}
789827
}

src/agents/subagent-registry.test.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1864,7 +1864,11 @@ describe("subagent registry seam flow", () => {
18641864
suspendedAt: undefined,
18651865
suspendedReason: undefined,
18661866
discardedAt: now,
1867-
discardReason: "expired",
1867+
discardReason: "expired-after-durable-fallback",
1868+
},
1869+
completion: {
1870+
fallbackResultText: "large final payload",
1871+
fallbackCapturedAt: now,
18681872
},
18691873
cleanupHandled: true,
18701874
cleanupCompletedAt: now,

src/agents/subagent-registry.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -704,9 +704,12 @@ async function discardSuspendedPendingFinalDelivery(
704704
): Promise<void> {
705705
const delivery = ensureDeliveryState(entry);
706706
const payload = delivery.payload;
707+
const durableFallbackText = payload?.frozenResultText ?? payload?.fallbackFrozenResultText;
708+
const persistedDurableFallback =
709+
reason === "expired" && typeof durableFallbackText === "string" && durableFallbackText.length > 0;
707710
delivery.status = "discarded";
708711
delivery.discardedAt = now;
709-
delivery.discardReason = reason;
712+
delivery.discardReason = persistedDurableFallback ? "expired-after-durable-fallback" : reason;
710713
delivery.discardedPayloadSummary = {
711714
requesterSessionKey: payload?.requesterSessionKey ?? entry.requesterSessionKey,
712715
childSessionKey: payload?.childSessionKey ?? entry.childSessionKey,
@@ -724,15 +727,20 @@ async function discardSuspendedPendingFinalDelivery(
724727
delivery.suspendedReason = undefined;
725728
entry.wakeOnDescendantSettle = undefined;
726729
const completion = ensureCompletionState(entry);
727-
completion.fallbackResultText = undefined;
728-
completion.fallbackCapturedAt = undefined;
730+
if (persistedDurableFallback) {
731+
completion.fallbackResultText = durableFallbackText;
732+
completion.fallbackCapturedAt = now;
733+
} else {
734+
completion.fallbackResultText = undefined;
735+
completion.fallbackCapturedAt = undefined;
736+
}
729737
entry.cleanupHandled = true;
730738
delivery.announcedAt = undefined;
731739
resumedRuns.delete(runId);
732740
clearPendingLifecycleError(runId);
733741
clearPendingLifecycleTimeout(runId);
734742
log.warn("subagent suspended delivery discarded", {
735-
reason,
743+
reason: delivery.discardReason,
736744
runId: entry.runId,
737745
childSessionKey: entry.childSessionKey,
738746
requesterSessionKey: entry.requesterSessionKey,

src/agents/subagent-registry.types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ export type SubagentCompletionDeliveryState = {
5959
suspendedAt?: number;
6060
suspendedReason?: "retry-limit" | "expiry";
6161
discardedAt?: number;
62-
discardReason?: "expired" | "pressure-pruned";
62+
discardReason?: "expired" | "expired-after-durable-fallback" | "pressure-pruned";
6363
discardedPayloadSummary?: {
6464
requesterSessionKey?: string;
6565
childSessionKey?: string;

0 commit comments

Comments
 (0)