Skip to content

Commit ff50412

Browse files
committed
fix: include codex system prompt on initial turn
1 parent abd8726 commit ff50412

6 files changed

Lines changed: 125 additions & 7 deletions

File tree

packages/cli/src/daemon/resumer.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ import type { RuntimePool } from "./runtimePool.js";
2020

2121
const logger = createLogger("resumer");
2222

23+
function buildResumeTaskContext(message: string): string {
24+
return `This is a resume of an already claimed task. The task is already assigned to you and in_progress; do not run \`ak task claim\` again. Continue from the message below.\n\n${message}`;
25+
}
26+
2327
/**
2428
* Resume a saved session (rate-limited or rejected). Returns true on success.
2529
*/
@@ -76,7 +80,7 @@ export async function resumeSession(session: SessionFile, message: string, clien
7680
sessionId: session.sessionId,
7781
resumeToken: session.providerResumeToken,
7882
cwd: workspace.cwd,
79-
taskContext: message,
83+
taskContext: buildResumeTaskContext(message),
8084
agentClient,
8185
agentEnv,
8286
resume: true,

packages/cli/src/providers/codex.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,15 @@ function readAccessToken(): string | null {
2121
}
2222
}
2323

24+
function readSystemPrompt(filePath?: string): string {
25+
if (!filePath) return "";
26+
return readFileSync(filePath, "utf-8");
27+
}
28+
29+
function buildPrompt(opts: ExecuteOpts): string {
30+
return [readSystemPrompt(opts.systemPromptFile), opts.taskContext].filter(Boolean).join("\n\n");
31+
}
32+
2433
/** Per 1M tokens, OpenAI pricing */
2534
const CODEX_PRICING: Record<string, { input: number; cached_input: number; output: number }> = {
2635
o3: { input: 2.0, cached_input: 0.5, output: 8.0 },
@@ -223,7 +232,7 @@ export const codexProvider: AgentProvider = {
223232
const thread = opts.resume ? codex.resumeThread(opts.resumeToken ?? opts.sessionId, threadOpts) : codex.startThread(threadOpts);
224233

225234
const abortController = new AbortController();
226-
const streamed = await thread.runStreamed(opts.taskContext, { signal: abortController.signal });
235+
const streamed = await thread.runStreamed(buildPrompt(opts), { signal: abortController.signal });
227236

228237
const events = (async function* () {
229238
const turnOpen = { value: false };

packages/cli/tests/providers.test.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,33 @@ describe("codexProvider.execute — thread selection", () => {
685685
expect(resumeThreadSpy).toHaveBeenCalledWith("codex-thread-1", expect.any(Object));
686686
});
687687

688+
it("passes system prompt content and task context to Codex", async () => {
689+
const { Codex } = await import("@openai/codex-sdk");
690+
const fsModule = await import("node:fs");
691+
vi.mocked(fsModule.readFileSync).mockImplementation((path) => {
692+
if (path === "/tmp/system.txt") return "You are Codex with board rules." as any;
693+
throw new Error("ENOENT");
694+
});
695+
const runStreamedSpy = vi.fn().mockResolvedValue({ events: (async function* () {})() });
696+
vi.mocked(Codex).mockImplementationOnce(
697+
() =>
698+
({
699+
startThread: vi.fn().mockReturnValue({ runStreamed: runStreamedSpy }),
700+
resumeThread: vi.fn(),
701+
}) as any,
702+
);
703+
704+
await codexProvider.execute({
705+
sessionId: "s1",
706+
cwd: "/tmp",
707+
env: { OPENAI_API_KEY: "test-key" },
708+
systemPromptFile: "/tmp/system.txt",
709+
taskContext: "Implement the task.",
710+
});
711+
712+
expect(runStreamedSpy).toHaveBeenCalledWith("You are Codex with board rules.\n\nImplement the task.", expect.any(Object));
713+
});
714+
688715
it("omits explicit model for ChatGPT-backed Codex sessions", async () => {
689716
const { Codex } = await import("@openai/codex-sdk");
690717
const fsModule = await import("node:fs");
@@ -1369,6 +1396,23 @@ describe("geminiProvider.execute — arg selection", () => {
13691396
expect(call.args).toContain("--resume");
13701397
});
13711398

1399+
it("uses task context only when resuming", async () => {
1400+
const { spawnAgent } = await import("../src/providers/spawnHelper.js");
1401+
vi.mocked(spawnAgent).mockClear();
1402+
await geminiProvider.execute({
1403+
sessionId: "s1",
1404+
cwd: "/tmp",
1405+
env: {},
1406+
systemPromptFile: "/tmp/system.txt",
1407+
taskContext: "Task rejected. Reason: fix it",
1408+
resume: true,
1409+
});
1410+
const call = vi.mocked(spawnAgent).mock.calls[0][0];
1411+
const promptIdx = call.args.indexOf("--prompt");
1412+
1413+
expect(call.args[promptIdx + 1]).toBe("Task rejected. Reason: fix it");
1414+
});
1415+
13721416
it("uses buildArgs when resume is false or absent", async () => {
13731417
const { spawnAgent } = await import("../src/providers/spawnHelper.js");
13741418
vi.mocked(spawnAgent).mockClear();

packages/cli/tests/rejectResumeFlow.integration.test.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ import { resumeSession } from "../src/daemon/resumer.js";
115115
import { RuntimePool } from "../src/daemon/runtimePool.js";
116116
import type { AgentEvent, AgentHandle, AgentProvider, ExecuteOpts } from "../src/providers/types.js";
117117
import type { SessionFile } from "../src/session/store.js";
118-
import { clearAllSessions, listSessions, readSession, writeSession } from "../src/session/store.js";
118+
import { clearAllSessions, readSession, writeSession } from "../src/session/store.js";
119119

120120
// ── FakeProvider ──────────────────────────────────────────────────────────────
121121

@@ -136,7 +136,6 @@ class FakeProvider implements AgentProvider {
136136
executeCalls: FakeExecuteCall[] = [];
137137

138138
async execute(opts: ExecuteOpts): Promise<AgentHandle> {
139-
let pushEvent!: (event: AgentEvent) => void;
140139
let endStream!: () => void;
141140

142141
// The queue is implemented with a simple promise chain so events
@@ -145,7 +144,7 @@ class FakeProvider implements AgentProvider {
145144
const waiters: Array<(done: boolean) => void> = [];
146145
let done = false;
147146

148-
pushEvent = (event: AgentEvent) => {
147+
const pushEvent = (event: AgentEvent) => {
149148
if (waiters.length > 0) {
150149
// a consumer is already waiting — deliver directly
151150
eventQueue.push(event);
@@ -643,9 +642,13 @@ describe("Scenario 5: full end-to-end reject-resume", () => {
643642
// (b) a new execute() call happened on FakeProvider
644643
expect(fake.executeCalls).toHaveLength(1);
645644

646-
// (c) taskContext passed in contains "fix it"
645+
// (c) provider receives resume guard context plus the original rejection message
647646
const executeOpts = fake.executeCalls[0].opts;
648-
expect(executeOpts.taskContext).toContain("fix it");
647+
expect(executeOpts.taskContext).toContain("already claimed task");
648+
expect(executeOpts.taskContext).toContain("already assigned to you and in_progress");
649+
expect(executeOpts.taskContext).toContain("do not run `ak task claim` again");
650+
expect(executeOpts.taskContext).toContain(message);
651+
expect(executeOpts.resume).toBe(true);
649652

650653
// (d) session status went back to "active"
651654
const afterResume = readSession(session.sessionId);

tests/runtimePool-coverage.test.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,46 @@ describe("RuntimePool — getActiveTaskIds", () => {
295295
});
296296
});
297297

298+
describe("RuntimePool — provider execute options", () => {
299+
it("passes systemPromptFile through to the provider", async () => {
300+
const taskId = randomUUID();
301+
const sessionId = randomUUID();
302+
await seedActiveSession(sessions, sessionId, taskId);
303+
304+
const agentClient = makeAgentClient({ status: "in_progress" });
305+
let resolveEvents!: () => void;
306+
const stuckHandle: AgentHandle = {
307+
events: (async function* () {
308+
await new Promise<void>((r) => {
309+
resolveEvents = r;
310+
});
311+
})(),
312+
abort: vi.fn().mockResolvedValue(undefined),
313+
send: vi.fn().mockResolvedValue(undefined),
314+
};
315+
const provider = makeProvider(stuckHandle);
316+
const pool = new RuntimePool(apiClient, { onSlotFreed: vi.fn() }, { onRateLimited: vi.fn(), onRateLimitResumed: vi.fn() }, 0, null);
317+
318+
await pool.spawnAgent({
319+
provider,
320+
taskId,
321+
sessionId,
322+
cwd: "/tmp",
323+
taskContext: "test task",
324+
agentClient,
325+
agentEnv: {},
326+
systemPromptFile: "/tmp/system-prompt.txt",
327+
});
328+
329+
expect(provider.execute).toHaveBeenCalledWith(
330+
expect.objectContaining({
331+
systemPromptFile: "/tmp/system-prompt.txt",
332+
}),
333+
);
334+
resolveEvents();
335+
});
336+
});
337+
298338
describe("RuntimePool — sendToAgent", () => {
299339
it("sends message to a running agent", async () => {
300340
const taskId = randomUUID();

tests/task-scheduled-at.test.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,24 @@ describe("dispatchTasks — scheduled_at filter", () => {
521521
rl.stop();
522522
});
523523

524+
it("writes generated system prompt and passes its file to the runtime", async () => {
525+
const { dispatchTasks } = await import("../packages/cli/src/daemon/dispatcher");
526+
const { getSessionManager } = await import("../packages/cli/src/session/manager");
527+
const spawnSpy = vi.fn().mockResolvedValue(undefined);
528+
vi.mocked(getSessionManager().create).mockClear();
529+
const task = makeTask({ id: "task-system-prompt", assigned_to: "agent-system-prompt", status: "todo" });
530+
const client = makeClient([task]);
531+
const pool = makePool(spawnSpy);
532+
const rl = makeRateLimiter();
533+
534+
const result = await dispatchTasks(client as any, pool as any, rl, prMonitor, opts);
535+
536+
expect(result).toBe(true);
537+
expect(getSessionManager().create).toHaveBeenCalledWith(expect.not.objectContaining({ systemPrompt: expect.anything() }));
538+
expect(spawnSpy).toHaveBeenCalledWith(expect.objectContaining({ systemPromptFile: "/tmp/prompt.txt" }));
539+
rl.stop();
540+
});
541+
524542
it("does not dispatch when the local runtime is unauthorized", async () => {
525543
const { dispatchTasks } = await import("../packages/cli/src/daemon/dispatcher");
526544
providerMocks.availability = { status: "unauthorized" };

0 commit comments

Comments
 (0)