Skip to content

Commit 74f9d6b

Browse files
yetvalsteipete
andauthored
fix(codex): preserve shared app-server when spawned helper run fails logically (#72574) (#87375)
* fix(codex): preserve shared app-server when spawned helper run fails logically * fix(codex): widen spawnedBy param to match EmbeddedRunAttemptParams * fix(codex): align spawnedBy startup typing * fix(codex): retire shared client on spawned startup timeout * fix(codex): narrow spawned thread-start preservation --------- Co-authored-by: Peter Steinberger <steipete@gmail.com>
1 parent 15b1e99 commit 74f9d6b

4 files changed

Lines changed: 167 additions & 7 deletions

File tree

extensions/codex/src/app-server/attempt-startup.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import {
4949
} from "./shared-client.js";
5050
import {
5151
startOrResumeThread,
52+
isCodexThreadStartRequestError,
5253
type CodexAppServerThreadLifecycleBinding,
5354
type CodexContextEngineThreadBootstrapProjection,
5455
} from "./thread-lifecycle.js";
@@ -95,6 +96,7 @@ export async function startCodexAttemptThread(params: {
9596
startupTimeoutMs: number;
9697
signal: AbortSignal;
9798
onStartupTimeout: () => void | Promise<void>;
99+
spawnedBy: EmbeddedRunAttemptParams["spawnedBy"];
98100
}): Promise<StartCodexAttemptThreadResult> {
99101
let pluginAppServer = params.appServer;
100102
let releaseSharedClientLease: (() => void) | undefined;
@@ -373,7 +375,12 @@ export async function startCodexAttemptThread(params: {
373375
releaseSharedClientLease,
374376
};
375377
} catch (error) {
376-
clearSharedCodexAppServerClientIfCurrent(startupClientForCleanup);
378+
const transportPoisoned = isCodexAppServerConnectionClosedError(error);
379+
const preserveSharedClientForSpawnedThreadStartError =
380+
Boolean(params.spawnedBy?.trim()) && isCodexThreadStartRequestError(error);
381+
if (transportPoisoned || !preserveSharedClientForSpawnedThreadStartError) {
382+
clearSharedCodexAppServerClientIfCurrent(startupClientForCleanup);
383+
}
377384
throw error;
378385
}
379386
}

extensions/codex/src/app-server/run-attempt.test.ts

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import { CODEX_GPT5_BEHAVIOR_CONTRACT } from "../../prompt-overlay.js";
2727
import { defaultCodexAppInventoryCache } from "./app-inventory-cache.js";
2828
import * as authBridge from "./auth-bridge.js";
2929
import { resolveCodexAppServerEnvApiKeyCacheKey } from "./auth-bridge.js";
30+
import { CodexAppServerRpcError } from "./client.js";
3031
import { readCodexPluginConfig, resolveCodexAppServerRuntimeOptions } from "./config.js";
3132
import {
3233
CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE,
@@ -65,6 +66,7 @@ import {
6566
} from "./sandbox-exec-server.js";
6667
import { createSandboxContext } from "./sandbox-exec-server.test-helpers.js";
6768
import { readCodexAppServerBinding, writeCodexAppServerBinding } from "./session-binding.js";
69+
import * as sharedClientModule from "./shared-client.js";
6870
import { createCodexTestModel } from "./test-support.js";
6971
import { buildTurnStartParams, startOrResumeThread } from "./thread-lifecycle.js";
7072

@@ -3489,6 +3491,134 @@ describe("runCodexAppServerAttempt", () => {
34893491
]);
34903492
});
34913493

3494+
it("does not retire the shared Codex client when a spawned helper run fails with a logical thread/start error", async () => {
3495+
const clearSpy = vi.spyOn(sharedClientModule, "clearSharedCodexAppServerClientIfCurrent");
3496+
clearSpy.mockClear();
3497+
let failedClient: unknown;
3498+
setCodexAppServerClientFactoryForTest(async () => {
3499+
const c = {
3500+
request: vi.fn(async (method: string) => {
3501+
if (method === "thread/start") {
3502+
throw new CodexAppServerRpcError(
3503+
{ message: "401 authentication_error: Invalid bearer token" },
3504+
"thread/start",
3505+
);
3506+
}
3507+
return {};
3508+
}),
3509+
addNotificationHandler: vi.fn(() => () => undefined),
3510+
addRequestHandler: vi.fn(() => () => undefined),
3511+
};
3512+
failedClient = c;
3513+
return c as never;
3514+
});
3515+
const params = createParams(
3516+
path.join(tempDir, "session.jsonl"),
3517+
path.join(tempDir, "workspace"),
3518+
);
3519+
params.spawnedBy = "agent:main:session-parent";
3520+
3521+
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("Invalid bearer token");
3522+
const calledWithFailedClient = clearSpy.mock.calls.some(([arg]) => arg === failedClient);
3523+
expect(calledWithFailedClient).toBe(false);
3524+
clearSpy.mockRestore();
3525+
});
3526+
3527+
it("retires the shared Codex client when a spawned helper run times out during thread/start", async () => {
3528+
const clearSpy = vi.spyOn(sharedClientModule, "clearSharedCodexAppServerClientIfCurrent");
3529+
clearSpy.mockClear();
3530+
let failedClient: unknown;
3531+
setCodexAppServerClientFactoryForTest(async () => {
3532+
const c = {
3533+
request: vi.fn(async (method: string) => {
3534+
if (method === "thread/start") {
3535+
return await new Promise<never>(() => undefined);
3536+
}
3537+
return {};
3538+
}),
3539+
addNotificationHandler: vi.fn(() => () => undefined),
3540+
addRequestHandler: vi.fn(() => () => undefined),
3541+
};
3542+
failedClient = c;
3543+
return c as never;
3544+
});
3545+
const params = createParams(
3546+
path.join(tempDir, "session.jsonl"),
3547+
path.join(tempDir, "workspace"),
3548+
);
3549+
params.spawnedBy = "agent:main:session-parent";
3550+
params.timeoutMs = 1;
3551+
3552+
await expect(runCodexAppServerAttempt(params, { startupTimeoutFloorMs: 1 })).rejects.toThrow(
3553+
"codex app-server startup timed out",
3554+
);
3555+
const calledWithFailedClient = clearSpy.mock.calls.some(([arg]) => arg === failedClient);
3556+
expect(calledWithFailedClient).toBe(true);
3557+
clearSpy.mockRestore();
3558+
});
3559+
3560+
it("retires the shared Codex client when a spawned helper hits a thread/start write failure", async () => {
3561+
const clearSpy = vi.spyOn(sharedClientModule, "clearSharedCodexAppServerClientIfCurrent");
3562+
clearSpy.mockClear();
3563+
let failedClient: unknown;
3564+
setCodexAppServerClientFactoryForTest(async () => {
3565+
const c = {
3566+
request: vi.fn(async (method: string) => {
3567+
if (method === "thread/start") {
3568+
throw new Error("write EPIPE");
3569+
}
3570+
return {};
3571+
}),
3572+
addNotificationHandler: vi.fn(() => () => undefined),
3573+
addRequestHandler: vi.fn(() => () => undefined),
3574+
};
3575+
failedClient = c;
3576+
return c as never;
3577+
});
3578+
const params = createParams(
3579+
path.join(tempDir, "session.jsonl"),
3580+
path.join(tempDir, "workspace"),
3581+
);
3582+
params.spawnedBy = "agent:main:session-parent";
3583+
3584+
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("write EPIPE");
3585+
const calledWithFailedClient = clearSpy.mock.calls.some(([arg]) => arg === failedClient);
3586+
expect(calledWithFailedClient).toBe(true);
3587+
clearSpy.mockRestore();
3588+
});
3589+
3590+
it("retires the shared Codex client when a top-level run fails with a logical thread/start error", async () => {
3591+
const clearSpy = vi.spyOn(sharedClientModule, "clearSharedCodexAppServerClientIfCurrent");
3592+
clearSpy.mockClear();
3593+
let failedClient: unknown;
3594+
setCodexAppServerClientFactoryForTest(async () => {
3595+
const c = {
3596+
request: vi.fn(async (method: string) => {
3597+
if (method === "thread/start") {
3598+
throw new CodexAppServerRpcError(
3599+
{ message: "401 authentication_error: Invalid bearer token" },
3600+
"thread/start",
3601+
);
3602+
}
3603+
return {};
3604+
}),
3605+
addNotificationHandler: vi.fn(() => () => undefined),
3606+
addRequestHandler: vi.fn(() => () => undefined),
3607+
};
3608+
failedClient = c;
3609+
return c as never;
3610+
});
3611+
const params = createParams(
3612+
path.join(tempDir, "session.jsonl"),
3613+
path.join(tempDir, "workspace"),
3614+
);
3615+
3616+
await expect(runCodexAppServerAttempt(params)).rejects.toThrow("Invalid bearer token");
3617+
const calledWithFailedClient = clearSpy.mock.calls.some(([arg]) => arg === failedClient);
3618+
expect(calledWithFailedClient).toBe(true);
3619+
clearSpy.mockRestore();
3620+
});
3621+
34923622
it("passes configured app-server policy, sandbox, service tier, and model on resume", async () => {
34933623
const sessionFile = path.join(tempDir, "session.jsonl");
34943624
const workspaceDir = path.join(tempDir, "workspace");

extensions/codex/src/app-server/run-attempt.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,7 @@ export async function runCodexAppServerAttempt(
864864
onStartupTimeout: () => {
865865
runAbortController.abort("codex_startup_timeout");
866866
},
867+
spawnedBy: params.spawnedBy,
867868
});
868869
client = startupResult.client;
869870
thread = startupResult.thread;

extensions/codex/src/app-server/thread-lifecycle.ts

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
import {
22
embeddedAgentLog,
3+
formatErrorMessage,
34
isActiveHarnessContextEngine,
45
type EmbeddedRunAttemptParams,
56
} from "openclaw/plugin-sdk/agent-harness-runtime";
67
import { buildCodexUserMcpServersThreadConfigPatch } from "openclaw/plugin-sdk/codex-mcp-projection";
78
import { listRegisteredPluginAgentPromptGuidance } from "openclaw/plugin-sdk/plugin-runtime";
89
import { CODEX_GPT5_HEARTBEAT_PROMPT_OVERLAY } from "../../prompt-overlay.js";
910
import { isModernCodexModel } from "../../provider.js";
10-
import { isCodexAppServerConnectionClosedError, type CodexAppServerClient } from "./client.js";
11+
import {
12+
CodexAppServerRpcError,
13+
isCodexAppServerConnectionClosedError,
14+
type CodexAppServerClient,
15+
} from "./client.js";
1116
import { codexSandboxPolicyForTurn, type CodexAppServerRuntimeOptions } from "./config.js";
1217
import {
1318
resolveCodexContextEngineProjectionMaxChars,
@@ -56,6 +61,17 @@ export type CodexAppServerThreadLifecycleBinding = CodexAppServerThreadBinding &
5661
lifecycle: CodexAppServerThreadLifecycle;
5762
};
5863

64+
class CodexThreadStartRequestError extends Error {
65+
constructor(cause: unknown) {
66+
super(formatErrorMessage(cause), { cause });
67+
this.name = "CodexThreadStartRequestError";
68+
}
69+
}
70+
71+
export function isCodexThreadStartRequestError(error: unknown): boolean {
72+
return error instanceof CodexThreadStartRequestError;
73+
}
74+
5975
export type CodexThreadFinalConfigPatchDecision =
6076
| { action: "resume"; binding: CodexAppServerThreadBinding }
6177
| { action: "start" };
@@ -548,11 +564,17 @@ export async function startOrResumeThread(params: {
548564
environmentSelection: params.environmentSelection,
549565
}),
550566
);
551-
const response = assertCodexThreadStartResponse(
552-
await lifecycleTiming.measure("thread_start_request", () =>
553-
params.client.request("thread/start", startParams),
554-
),
555-
);
567+
const threadStartResponse = await lifecycleTiming.measure("thread_start_request", async () => {
568+
try {
569+
return await params.client.request("thread/start", startParams);
570+
} catch (error) {
571+
if (error instanceof CodexAppServerRpcError) {
572+
throw new CodexThreadStartRequestError(error);
573+
}
574+
throw error;
575+
}
576+
});
577+
const response = assertCodexThreadStartResponse(threadStartResponse);
556578
const modelProvider = resolveCodexAppServerModelProvider({
557579
provider: params.params.provider,
558580
authProfileId: params.params.authProfileId,

0 commit comments

Comments
 (0)