Skip to content

Commit 56024b7

Browse files
yshimadahrs-ship-ity.shimadajoshavant
authored
fix(tasks): recover childless Codex native subagent tasks (#82836)
* fix(tasks): recover childless Codex native subagent tasks * fix(tasks): harden codex native task recovery --------- Co-authored-by: y.shimada <y.shimada@waishimadanoMac-mini.local> Co-authored-by: joshavant <830519+joshavant@users.noreply.github.com>
1 parent 94c012b commit 56024b7

6 files changed

Lines changed: 173 additions & 18 deletions

File tree

extensions/codex/src/app-server/native-subagent-task-mirror.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
import {
2+
CODEX_NATIVE_SUBAGENT_RUN_ID_PREFIX,
3+
CODEX_NATIVE_SUBAGENT_RUNTIME,
4+
CODEX_NATIVE_SUBAGENT_TASK_KIND,
25
createRunningTaskRun,
36
finalizeTaskRunByRunId,
47
recordTaskRunProgressByRunId,
@@ -16,9 +19,6 @@ import type {
1619
} from "./protocol.js";
1720
import { isJsonObject } from "./protocol.js";
1821

19-
const CODEX_NATIVE_SUBAGENT_RUNTIME = "subagent";
20-
const CODEX_NATIVE_SUBAGENT_TASK_KIND = "codex-native";
21-
2222
export type TaskLifecycleRuntime = {
2323
createRunningTaskRun: typeof createRunningTaskRun;
2424
recordTaskRunProgressByRunId: typeof recordTaskRunProgressByRunId;
@@ -291,7 +291,7 @@ export class CodexNativeSubagentTaskMirror {
291291
}
292292

293293
export function codexNativeSubagentRunId(threadId: string): string {
294-
return `codex-thread:${threadId.trim()}`;
294+
return `${CODEX_NATIVE_SUBAGENT_RUN_ID_PREFIX}${threadId.trim()}`;
295295
}
296296

297297
export function readSubagentThreadSpawnSource(

src/plugin-sdk/codex-native-task-runtime.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33
// task registry without promoting detached task mutation helpers to the public
44
// plugin SDK.
55

6+
export {
7+
CODEX_NATIVE_SUBAGENT_RUN_ID_PREFIX,
8+
CODEX_NATIVE_SUBAGENT_RUNTIME,
9+
CODEX_NATIVE_SUBAGENT_STALE_ERROR,
10+
CODEX_NATIVE_SUBAGENT_TASK_KIND,
11+
} from "../tasks/codex-native-subagent-task.js";
12+
613
export {
714
createRunningTaskRun,
815
finalizeTaskRunByRunId,
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import type { TaskRecord } from "./task-registry.types.js";
2+
3+
export const CODEX_NATIVE_SUBAGENT_RUNTIME = "subagent";
4+
export const CODEX_NATIVE_SUBAGENT_TASK_KIND = "codex-native";
5+
export const CODEX_NATIVE_SUBAGENT_RUN_ID_PREFIX = "codex-thread:";
6+
export const CODEX_NATIVE_SUBAGENT_STALE_ERROR = "Codex native subagent stopped reporting progress";
7+
8+
export function isChildlessCodexNativeSubagentTask(task: TaskRecord): boolean {
9+
if (
10+
task.runtime !== CODEX_NATIVE_SUBAGENT_RUNTIME ||
11+
task.taskKind !== CODEX_NATIVE_SUBAGENT_TASK_KIND
12+
) {
13+
return false;
14+
}
15+
if (task.childSessionKey?.trim()) {
16+
return false;
17+
}
18+
return [task.sourceId, task.runId].some((candidate) =>
19+
candidate?.trim().startsWith(CODEX_NATIVE_SUBAGENT_RUN_ID_PREFIX),
20+
);
21+
}

src/tasks/task-registry.maintenance.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ import {
3232
normalizeLowercaseStringOrEmpty,
3333
normalizeOptionalString,
3434
} from "../shared/string-coerce.js";
35+
import {
36+
CODEX_NATIVE_SUBAGENT_STALE_ERROR,
37+
isChildlessCodexNativeSubagentTask,
38+
} from "./codex-native-subagent-task.js";
3539
import {
3640
getDetachedTaskLifecycleRuntime,
3741
tryRecoverTaskBeforeMarkLost,
@@ -59,6 +63,7 @@ import type { TaskRecord, TaskRegistrySummary, TaskStatus } from "./task-registr
5963

6064
const log = createSubsystemLogger("tasks/task-registry-maintenance");
6165
const TASK_RECONCILE_GRACE_MS = 5 * 60_000;
66+
const CHILDLESS_CODEX_NATIVE_RECONCILE_GRACE_MS = 30 * 60_000;
6267
const TASK_RETENTION_MS = 7 * 24 * 60 * 60_000;
6368
const TASK_SWEEP_INTERVAL_MS = 60_000;
6469

@@ -290,7 +295,10 @@ function isTerminalTask(task: TaskRecord): boolean {
290295

291296
function hasLostGraceExpired(task: TaskRecord, now: number): boolean {
292297
const referenceAt = task.lastEventAt ?? task.startedAt ?? task.createdAt;
293-
return now - referenceAt >= TASK_RECONCILE_GRACE_MS;
298+
const graceMs = isChildlessCodexNativeSubagentTask(task)
299+
? CHILDLESS_CODEX_NATIVE_RECONCILE_GRACE_MS
300+
: TASK_RECONCILE_GRACE_MS;
301+
return now - referenceAt >= graceMs;
294302
}
295303

296304
function parseCronExecutionId(task: TaskRecord): CronExecutionId | undefined {
@@ -462,7 +470,7 @@ function hasBackingSession(task: TaskRecord, context?: BackingSessionLookupConte
462470

463471
const childSessionKey = task.childSessionKey?.trim();
464472
if (!childSessionKey) {
465-
return true;
473+
return !isChildlessCodexNativeSubagentTask(task);
466474
}
467475
if (task.runtime === "acp") {
468476
const acpEntry = taskRegistryMaintenanceRuntime.readAcpSessionEntry({
@@ -491,6 +499,9 @@ function hasBackingSession(task: TaskRecord, context?: BackingSessionLookupConte
491499
}
492500

493501
function resolveTaskLostError(task: TaskRecord, context?: BackingSessionLookupContext): string {
502+
if (isChildlessCodexNativeSubagentTask(task)) {
503+
return CODEX_NATIVE_SUBAGENT_STALE_ERROR;
504+
}
494505
if (task.runtime === "subagent") {
495506
const entry = findTaskSessionEntry(task, context);
496507
if (entry && isSubagentRecoveryWedgedEntry(entry)) {

src/tasks/task-registry.test.ts

Lines changed: 114 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,7 +1885,7 @@ describe("task-registry", () => {
18851885
});
18861886
});
18871887

1888-
it("does not mark codex-native subagent tasks lost when they have no OpenClaw child session", async () => {
1888+
it("keeps fresh childless codex-native subagent tasks live", async () => {
18891889
await withTaskRegistryTempDir(async (root) => {
18901890
process.env.OPENCLAW_STATE_DIR = root;
18911891
resetTaskRegistryForTests();
@@ -1914,14 +1914,85 @@ describe("task-registry", () => {
19141914
cleanupStamped: 0,
19151915
pruned: 0,
19161916
});
1917-
expect(getTaskById(task.taskId)).toEqual({
1918-
...task,
1919-
createdAt: now - 10 * 60_000,
1917+
expectRecordFields(requireTaskById(task.taskId), {
1918+
status: "running",
19201919
lastEventAt: now - 10 * 60_000,
19211920
});
19221921
});
19231922
});
19241923

1924+
it("marks stale childless codex-native subagent tasks lost", async () => {
1925+
await withTaskRegistryTempDir(async (root) => {
1926+
process.env.OPENCLAW_STATE_DIR = root;
1927+
resetTaskRegistryForTests();
1928+
const now = Date.now();
1929+
1930+
const task = createTaskRecord({
1931+
runtime: "subagent",
1932+
taskKind: "codex-native",
1933+
ownerKey: "agent:main:main",
1934+
scopeKind: "session",
1935+
sourceId: "codex-thread:child-thread",
1936+
runId: "codex-thread:child-thread",
1937+
task: "Codex native child",
1938+
status: "running",
1939+
deliveryStatus: "not_applicable",
1940+
notifyPolicy: "silent",
1941+
});
1942+
setTaskTimingById({
1943+
taskId: task.taskId,
1944+
lastEventAt: now - 31 * 60_000,
1945+
});
1946+
1947+
expect(await runTaskRegistryMaintenance()).toEqual({
1948+
reconciled: 1,
1949+
recovered: 0,
1950+
cleanupStamped: 0,
1951+
pruned: 0,
1952+
});
1953+
expectRecordFields(requireTaskById(task.taskId), {
1954+
status: "lost",
1955+
error: "Codex native subagent stopped reporting progress",
1956+
});
1957+
});
1958+
});
1959+
1960+
it("does not mark unrelated childless subagent tasks lost", async () => {
1961+
await withTaskRegistryTempDir(async (root) => {
1962+
process.env.OPENCLAW_STATE_DIR = root;
1963+
resetTaskRegistryForTests();
1964+
const now = Date.now();
1965+
1966+
const task = createTaskRecord({
1967+
runtime: "subagent",
1968+
taskKind: "codex-native",
1969+
ownerKey: "agent:main:main",
1970+
scopeKind: "session",
1971+
sourceId: "other-runtime:child-thread",
1972+
runId: "other-runtime:child-thread",
1973+
task: "Non-Codex childless row",
1974+
status: "running",
1975+
deliveryStatus: "not_applicable",
1976+
notifyPolicy: "silent",
1977+
});
1978+
setTaskTimingById({
1979+
taskId: task.taskId,
1980+
lastEventAt: now - 31 * 60_000,
1981+
});
1982+
1983+
expect(await runTaskRegistryMaintenance()).toEqual({
1984+
reconciled: 0,
1985+
recovered: 0,
1986+
cleanupStamped: 0,
1987+
pruned: 0,
1988+
});
1989+
expectRecordFields(requireTaskById(task.taskId), {
1990+
status: "running",
1991+
lastEventAt: now - 31 * 60_000,
1992+
});
1993+
});
1994+
});
1995+
19251996
it("closes terminal parent-owned one-shot ACP sessions during maintenance", async () => {
19261997
await withTaskRegistryTempDir(async (root) => {
19271998
process.env.OPENCLAW_STATE_DIR = root;
@@ -3168,7 +3239,7 @@ describe("task-registry", () => {
31683239
});
31693240
});
31703241

3171-
it("does not route codex-native task cancellation through OpenClaw subagent sessions", async () => {
3242+
it("cancels childless codex-native tasks without routing through OpenClaw subagent sessions", async () => {
31723243
await withTaskRegistryTempDir(async (root) => {
31733244
process.env.OPENCLAW_STATE_DIR = root;
31743245
resetTaskRegistryForTests();
@@ -3190,6 +3261,44 @@ describe("task-registry", () => {
31903261
taskId: task.taskId,
31913262
});
31923263

3264+
expectRecordFields(result, {
3265+
found: true,
3266+
cancelled: true,
3267+
});
3268+
expectRecordFields(result.task, {
3269+
taskId: task.taskId,
3270+
status: "cancelled",
3271+
endedAt: expect.any(Number),
3272+
lastEventAt: expect.any(Number),
3273+
cleanupAfter: expect.any(Number),
3274+
error: "Cancelled by operator.",
3275+
});
3276+
expect(hoisted.killSubagentRunAdminMock).not.toHaveBeenCalled();
3277+
});
3278+
});
3279+
3280+
it("does not cancel unrelated childless subagent tasks", async () => {
3281+
await withTaskRegistryTempDir(async (root) => {
3282+
process.env.OPENCLAW_STATE_DIR = root;
3283+
resetTaskRegistryForTests();
3284+
const task = createTaskRecord({
3285+
runtime: "subagent",
3286+
taskKind: "codex-native",
3287+
ownerKey: "agent:main:main",
3288+
scopeKind: "session",
3289+
sourceId: "other-runtime:child-thread",
3290+
runId: "other-runtime:child-thread",
3291+
task: "Non-Codex childless row",
3292+
status: "running",
3293+
deliveryStatus: "not_applicable",
3294+
notifyPolicy: "silent",
3295+
});
3296+
3297+
const result = await cancelTaskById({
3298+
cfg: {} as never,
3299+
taskId: task.taskId,
3300+
});
3301+
31933302
expect(result).toEqual({
31943303
found: true,
31953304
cancelled: false,

src/tasks/task-registry.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { parseAgentSessionKey } from "../routing/session-key.js";
1111
import { normalizeOptionalString } from "../shared/string-coerce.js";
1212
import { normalizeDeliveryContext } from "../utils/delivery-context.shared.js";
1313
import { isDeliverableMessageChannel } from "../utils/message-channel.js";
14+
import { isChildlessCodexNativeSubagentTask } from "./codex-native-subagent-task.js";
1415
import {
1516
formatTaskBlockedFollowupMessage,
1617
formatTaskStateChangeMessage,
@@ -1898,14 +1899,20 @@ export async function cancelTaskById(params: {
18981899
try {
18991900
if (task.runtime !== "cli") {
19001901
if (!childSessionKey) {
1901-
return {
1902-
found: true,
1903-
cancelled: false,
1904-
reason: "Task has no cancellable child session.",
1905-
task: cloneTaskRecord(task),
1906-
};
1902+
if (!isChildlessCodexNativeSubagentTask(task)) {
1903+
return {
1904+
found: true,
1905+
cancelled: false,
1906+
reason: "Task has no cancellable child session.",
1907+
task: cloneTaskRecord(task),
1908+
};
1909+
}
19071910
}
1908-
if (task.runtime === "acp") {
1911+
if (!childSessionKey) {
1912+
// Codex native subagents are mirrored from the Codex app server and do
1913+
// not have OpenClaw child sessions to terminate. Cancellation clears
1914+
// the stale task-registry record only.
1915+
} else if (task.runtime === "acp") {
19091916
const { getAcpSessionManager } = await loadTaskRegistryControlRuntime();
19101917
await getAcpSessionManager().cancelSession({
19111918
cfg: params.cfg,

0 commit comments

Comments
 (0)