Skip to content

Commit 115f649

Browse files
committed
fix(codex): retry only completion watchdog stalls
1 parent c0b85ec commit 115f649

5 files changed

Lines changed: 51 additions & 2 deletions

File tree

extensions/codex/src/app-server/run-attempt.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,10 @@ import {
100100
resolveCodexTurnTerminalIdleTimeoutMs,
101101
withCodexStartupTimeout,
102102
} from "./attempt-timeouts.js";
103-
import { createCodexAttemptTurnWatchController } from "./attempt-turn-watches.js";
103+
import {
104+
createCodexAttemptTurnWatchController,
105+
type CodexAttemptTurnWatchTimeoutKind,
106+
} from "./attempt-turn-watches.js";
104107
import {
105108
refreshCodexAppServerAuthTokens,
106109
resolveCodexAppServerAuthAccountCacheKey,
@@ -943,6 +946,7 @@ export async function runCodexAppServerAttempt(
943946
let terminalTurnNotificationQueued = false;
944947
let timedOut = false;
945948
let turnCompletionIdleTimedOut = false;
949+
let turnWatchTimeoutKind: CodexAttemptTurnWatchTimeoutKind | undefined;
946950
let turnCompletionIdleTimeoutMessage: string | undefined;
947951
let clientClosedPromptError: string | undefined;
948952
let clientClosedAbort = false;
@@ -1021,9 +1025,10 @@ export async function runCodexAppServerAttempt(
10211025
turnTerminalIdleTimeoutMs,
10221026
interruptTimeoutMs: CODEX_APP_SERVER_INTERRUPT_TIMEOUT_MS,
10231027
onInterruptTurn: (input) => interruptCodexTurnBestEffort(client, input),
1024-
onTimeout: () => {
1028+
onTimeout: (timeout) => {
10251029
timedOut = true;
10261030
turnCompletionIdleTimedOut = true;
1031+
turnWatchTimeoutKind = timeout.kind;
10271032
turnCompletionIdleTimeoutMessage =
10281033
"codex app-server turn idle timed out waiting for turn/completed";
10291034
},
@@ -2122,6 +2127,10 @@ export async function runCodexAppServerAttempt(
21222127
? {
21232128
codexAppServerFailure: {
21242129
kind: codexAppServerFailureKind,
2130+
...(codexAppServerFailureKind === "turn_completion_idle_timeout" &&
2131+
turnWatchTimeoutKind
2132+
? { turnWatchTimeoutKind }
2133+
: {}),
21252134
transport: appServer.start.transport,
21262135
threadId: thread.threadId,
21272136
turnId: activeTurnId,

extensions/codex/src/app-server/run-attempt.turn-watches.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,6 +2147,7 @@ describe("runCodexAppServerAttempt turn watches", () => {
21472147
promptError: "codex app-server turn idle timed out waiting for turn/completed",
21482148
codexAppServerFailure: {
21492149
kind: "turn_completion_idle_timeout",
2150+
turnWatchTimeoutKind: "completion",
21502151
transport: "stdio",
21512152
threadId: "thread-1",
21522153
turnId: "turn-1",

src/agents/embedded-agent-runner/run.codex-app-server-recovery.test.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ function codexTurnCompletionIdleTimeoutAttempt(
4242
promptErrorSource: "prompt",
4343
codexAppServerFailure: {
4444
kind: "turn_completion_idle_timeout",
45+
turnWatchTimeoutKind: "completion",
4546
transport: "stdio",
4647
threadId: "thread-1",
4748
turnId: "turn-1",
@@ -172,6 +173,35 @@ describe("runEmbeddedAgent Codex app-server recovery", () => {
172173
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
173174
});
174175

176+
it("does not retry non-completion Codex turn watch timeouts", async () => {
177+
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
178+
codexTurnCompletionIdleTimeoutAttempt({
179+
codexAppServerFailure: {
180+
kind: "turn_completion_idle_timeout",
181+
turnWatchTimeoutKind: "progress",
182+
transport: "stdio",
183+
threadId: "thread-1",
184+
turnId: "turn-1",
185+
replaySafe: true,
186+
},
187+
}),
188+
);
189+
190+
const result = await runEmbeddedAgent({
191+
...overflowBaseRunParams,
192+
provider: "codex",
193+
model: "gpt-5.5",
194+
runId: "run-codex-progress-idle-timeout",
195+
});
196+
197+
expect(result.payloads?.[0]).toMatchObject({
198+
isError: true,
199+
text: "Request timed out before a response was generated. Please try again, or increase `agents.defaults.timeoutSeconds` in your config.",
200+
});
201+
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
202+
expect(mockedMarkAuthProfileFailure).not.toHaveBeenCalled();
203+
});
204+
175205
it("returns a timeout payload after a replay-safe turn/completed idle timeout retry is exhausted", async () => {
176206
mockedRunEmbeddedAttempt
177207
.mockResolvedValueOnce(codexTurnCompletionIdleTimeoutAttempt())
@@ -199,6 +229,7 @@ describe("runEmbeddedAgent Codex app-server recovery", () => {
199229
codexTurnCompletionIdleTimeoutAttempt({
200230
codexAppServerFailure: {
201231
kind: "turn_completion_idle_timeout",
232+
turnWatchTimeoutKind: "completion",
202233
transport: "websocket",
203234
threadId: "thread-1",
204235
turnId: "turn-1",
@@ -237,6 +268,7 @@ describe("runEmbeddedAgent Codex app-server recovery", () => {
237268
},
238269
codexAppServerFailure: {
239270
kind: "turn_completion_idle_timeout",
271+
turnWatchTimeoutKind: "completion",
240272
transport: "stdio",
241273
threadId: "thread-1",
242274
turnId: "turn-1",

src/agents/embedded-agent-runner/run/codex-app-server-recovery.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ export function resolveCodexAppServerRecoveryRetry(params: {
1414
) {
1515
return { retry: false, reason: failure.kind };
1616
}
17+
if (
18+
failure.kind === "turn_completion_idle_timeout" &&
19+
failure.turnWatchTimeoutKind !== "completion"
20+
) {
21+
return { retry: false, reason: failure.turnWatchTimeoutKind ?? "unknown_turn_watch_timeout" };
22+
}
1723
if (failure.transport !== "stdio") {
1824
return { retry: false, reason: "non_stdio_transport" };
1925
}

src/agents/embedded-agent-runner/run/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ export type EmbeddedRunAttemptResult = {
125125
};
126126
codexAppServerFailure?: {
127127
kind: "client_closed_before_turn_completed" | "turn_completion_idle_timeout";
128+
turnWatchTimeoutKind?: "progress" | "completion" | "terminal";
128129
transport: "stdio" | "websocket";
129130
threadId?: string;
130131
turnId?: string;

0 commit comments

Comments
 (0)