Skip to content

Commit 394d978

Browse files
fix(agents): release session write lock if fence read throws on prompt release
1 parent d0b2b4b commit 394d978

2 files changed

Lines changed: 13 additions & 15 deletions

File tree

src/agents/embedded-agent-runner/run/attempt.session-lock.test.ts

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ const lockOptions = {
3434
const tempDirs: string[] = [];
3535

3636
afterEach(async () => {
37+
vi.restoreAllMocks();
3738
resetEmbeddedAttemptSessionFileOwnersForTest();
3839
resetSessionWriteLockStateForTest();
3940
for (const dir of tempDirs.splice(0)) {
@@ -136,7 +137,7 @@ describe("embedded attempt session lock lifecycle", () => {
136137
expect(releases).toEqual(["held"]);
137138
});
138139

139-
it("releases the eagerly-held lock even when the fence read throws during prompt release (FAD-845)", async () => {
140+
it("releases the eagerly-held lock when the fence read throws during prompt release", async () => {
140141
const release = vi.fn(async () => {});
141142
const acquireSessionWriteLockLocalFad845 = vi.fn(async () => ({ release }));
142143
const controller = await createEmbeddedAttemptSessionLockController({
@@ -151,14 +152,15 @@ describe("embedded attempt session lock lifecycle", () => {
151152
const statError = Object.assign(new Error("simulated I/O failure"), { code: "EIO" });
152153
const statSpy = vi.spyOn(fs, "stat").mockRejectedValueOnce(statError);
153154

154-
await expect(controller.releaseForPrompt()).rejects.toThrow();
155+
try {
156+
await expect(controller.releaseForPrompt()).rejects.toThrow();
155157

156-
// The underlying file lock MUST still be released; otherwise it is orphaned on the
157-
// live gateway process for the full maxHoldMs lease (~17 min), wedging every
158-
// subsequent interactive turn with SessionWriteLockTimeoutError. See FAD-845.
159-
expect(release).toHaveBeenCalledTimes(1);
160-
161-
statSpy.mockRestore();
158+
// The underlying file lock must still be released so later turns do not wait for
159+
// the full maxHoldMs watchdog before the stale lease is reclaimed.
160+
expect(release).toHaveBeenCalledTimes(1);
161+
} finally {
162+
statSpy.mockRestore();
163+
}
162164
});
163165

164166
it("releaseHeldLockForAbort and dispose are idempotent in succession (#86816)", async () => {

src/agents/embedded-agent-runner/run/attempt.session-lock.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -801,13 +801,9 @@ export async function createEmbeddedAttemptSessionLockController(params: {
801801
}
802802
const lock = heldLock;
803803
heldLock = undefined;
804-
// Once `heldLock` is cleared the controller no longer tracks this lock, so the
805-
// underlying file lock MUST be released even if the fence bookkeeping below
806-
// throws. The fence reads perform filesystem I/O that can fail with a transient
807-
// non-ENOENT error (e.g. EIO/EMFILE under load); without this guard such a throw
808-
// would orphan the lock on the live gateway process for the full maxHoldMs lease
809-
// (~17 min), wedging every subsequent interactive turn with
810-
// SessionWriteLockTimeoutError until the watchdog reclaims it. See FAD-845.
804+
// Clearing `heldLock` transfers release ownership to this block. Fence reads can
805+
// throw after that transfer; release the underlying file lock anyway so later
806+
// turns do not wait for the maxHoldMs watchdog.
811807
try {
812808
const fingerprint = await readSessionFileFingerprint(params.lockOptions.sessionFile);
813809
const ownedWrite = ownedSessionFileWrites.get(sessionFileFenceKey);

0 commit comments

Comments
 (0)