Skip to content

Commit f8e2bd4

Browse files
committed
Add Codex prompt snapshots
1 parent 10ebcbd commit f8e2bd4

18 files changed

Lines changed: 6023 additions & 51 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ Docs: https://docs.openclaw.ai
4848
- Dependencies: refresh workspace dependency pins, including TypeBox 1.1.37, AWS SDK 3.1041.0, Microsoft Teams 2.0.9, and Marked 18.0.3. Thanks @mariozechner, @aws, and @microsoft.
4949
- Discord/channels: add reusable message-channel access groups plus Discord channel-audience DM authorization, so allowlists can reference `accessGroup:<name>` across channel auth paths. (#75813)
5050
- Crabbox/scripts: print the selected Crabbox binary, version, and supported providers before `pnpm crabbox:*` commands, and reject stale binaries that lack `blacksmith-testbox` provider support.
51+
- Agents/Codex: add committed happy-path prompt snapshots for Codex/message-tool Telegram direct, Discord group, and heartbeat turns so prompt drift can be reviewed. Thanks @pashpashpash.
5152

5253
### Fixes
5354

docs/concepts/system-prompt.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,19 @@ section when the direct/group chat context already includes the resolved
109109
conversation-specific `NO_REPLY` behavior. This avoids repeating token mechanics
110110
in both the global system prompt and channel context.
111111

112+
## Prompt snapshots
113+
114+
OpenClaw keeps committed happy-path prompt snapshots for the Codex/message-tool
115+
runtime under `test/fixtures/agents/prompt-snapshots/happy-path/`. They render
116+
the OpenClaw-owned Codex app-server developer instructions, selected thread
117+
start/resume params, turn user input, and dynamic tool specs for Telegram direct,
118+
Discord group, and heartbeat turns. The hidden base Codex system prompt and
119+
turn-scoped Codex collaboration-mode instructions are owned by the Codex runtime
120+
and are not rendered by OpenClaw.
121+
122+
Regenerate them with `pnpm prompt:snapshots:gen` and verify drift with
123+
`pnpm prompt:snapshots:check`.
124+
112125
## Workspace bootstrap injection
113126

114127
Bootstrap files are trimmed and appended under **Project Context** so the model sees identity and profile context without needing explicit reads:

extensions/codex/api.ts

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import type {
2+
AnyAgentTool,
3+
EmbeddedRunAttemptParams,
4+
} from "openclaw/plugin-sdk/agent-harness-runtime";
5+
import {
6+
type CodexAppServerRuntimeOptions,
7+
resolveCodexAppServerRuntimeOptions,
8+
} from "./src/app-server/config.js";
9+
import type { CodexPluginConfig } from "./src/app-server/config.js";
10+
import { applyCodexDynamicToolProfile } from "./src/app-server/dynamic-tool-profile.js";
11+
import { createCodexDynamicToolBridge } from "./src/app-server/dynamic-tools.js";
12+
import type { CodexDynamicToolSpec, JsonObject } from "./src/app-server/protocol.js";
13+
import {
14+
buildDeveloperInstructions,
15+
buildThreadResumeParams,
16+
buildThreadStartParams,
17+
buildTurnStartParams,
18+
} from "./src/app-server/thread-lifecycle.js";
19+
20+
export type CodexHarnessPromptSnapshot = {
21+
developerInstructions: string;
22+
threadStartParams: ReturnType<typeof buildThreadStartParams>;
23+
threadResumeParams: ReturnType<typeof buildThreadResumeParams>;
24+
turnStartParams: ReturnType<typeof buildTurnStartParams>;
25+
};
26+
27+
export function resolveCodexPromptSnapshotAppServerOptions(
28+
pluginConfig?: unknown,
29+
): CodexAppServerRuntimeOptions {
30+
return resolveCodexAppServerRuntimeOptions({
31+
pluginConfig,
32+
env: {},
33+
});
34+
}
35+
36+
export function buildCodexHarnessPromptSnapshot(params: {
37+
attempt: EmbeddedRunAttemptParams;
38+
cwd: string;
39+
threadId: string;
40+
dynamicTools: CodexDynamicToolSpec[];
41+
appServer: CodexAppServerRuntimeOptions;
42+
config?: JsonObject;
43+
promptText?: string;
44+
}): CodexHarnessPromptSnapshot {
45+
const developerInstructions = buildDeveloperInstructions(params.attempt);
46+
return {
47+
developerInstructions,
48+
threadStartParams: buildThreadStartParams(params.attempt, {
49+
cwd: params.cwd,
50+
dynamicTools: params.dynamicTools,
51+
appServer: params.appServer,
52+
developerInstructions,
53+
config: params.config,
54+
}),
55+
threadResumeParams: buildThreadResumeParams(params.attempt, {
56+
threadId: params.threadId,
57+
appServer: params.appServer,
58+
developerInstructions,
59+
config: params.config,
60+
}),
61+
turnStartParams: buildTurnStartParams(params.attempt, {
62+
threadId: params.threadId,
63+
cwd: params.cwd,
64+
appServer: params.appServer,
65+
promptText: params.promptText,
66+
}),
67+
};
68+
}
69+
70+
export function createCodexDynamicToolSpecsForPromptSnapshot(params: {
71+
tools: AnyAgentTool[];
72+
pluginConfig?: Pick<CodexPluginConfig, "codexDynamicToolsProfile" | "codexDynamicToolsExclude">;
73+
}): CodexDynamicToolSpec[] {
74+
const profiledTools = applyCodexDynamicToolProfile(params.tools, params.pluginConfig ?? {});
75+
return createCodexDynamicToolBridge({
76+
tools: profiledTools,
77+
signal: new AbortController().signal,
78+
}).specs;
79+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import type { CodexPluginConfig } from "./config.js";
2+
3+
export const CODEX_NATIVE_FIRST_DYNAMIC_TOOL_EXCLUDES = [
4+
"read",
5+
"write",
6+
"edit",
7+
"apply_patch",
8+
"exec",
9+
"process",
10+
"update_plan",
11+
] as const;
12+
13+
export function applyCodexDynamicToolProfile<T extends { name: string }>(
14+
tools: T[],
15+
config: Pick<CodexPluginConfig, "codexDynamicToolsProfile" | "codexDynamicToolsExclude">,
16+
): T[] {
17+
const excludes = new Set<string>();
18+
const profile = config.codexDynamicToolsProfile ?? "native-first";
19+
if (profile === "native-first") {
20+
for (const name of CODEX_NATIVE_FIRST_DYNAMIC_TOOL_EXCLUDES) {
21+
excludes.add(name);
22+
}
23+
}
24+
for (const name of config.codexDynamicToolsExclude ?? []) {
25+
const trimmed = name.trim();
26+
if (trimmed) {
27+
excludes.add(trimmed);
28+
}
29+
}
30+
return excludes.size === 0 ? tools : tools.filter((tool) => !excludes.has(tool.name));
31+
}

extensions/codex/src/app-server/run-attempt.ts

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ import {
5454
type CodexPluginConfig,
5555
} from "./config.js";
5656
import { projectContextEngineAssemblyForCodex } from "./context-engine-projection.js";
57+
import { applyCodexDynamicToolProfile } from "./dynamic-tool-profile.js";
5758
import { createCodexDynamicToolBridge, type CodexDynamicToolBridge } from "./dynamic-tools.js";
5859
import { handleCodexAppServerElicitationRequest } from "./elicitation-bridge.js";
5960
import { CodexAppServerEventProjector } from "./event-projector.js";
@@ -99,15 +100,6 @@ const CODEX_APP_SERVER_STARTUP_CONNECTION_CLOSE_MAX_ATTEMPTS = 3;
99100
const CODEX_TURN_COMPLETION_IDLE_TIMEOUT_MS = 60_000;
100101
const CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS = 30 * 60_000;
101102
const CODEX_STEER_ALL_DEBOUNCE_MS = 500;
102-
const CODEX_NATIVE_FIRST_DYNAMIC_TOOL_EXCLUDES = [
103-
"read",
104-
"write",
105-
"edit",
106-
"apply_patch",
107-
"exec",
108-
"process",
109-
"update_plan",
110-
] as const;
111103
const LOG_FIELD_MAX_LENGTH = 160;
112104

113105
type OpenClawCodingToolsOptions = NonNullable<
@@ -1499,26 +1491,6 @@ async function buildDynamicTools(input: DynamicToolBuildParams) {
14991491
});
15001492
}
15011493

1502-
function applyCodexDynamicToolProfile<T extends { name: string }>(
1503-
tools: T[],
1504-
config: CodexPluginConfig,
1505-
): T[] {
1506-
const excludes = new Set<string>();
1507-
const profile = config.codexDynamicToolsProfile ?? "native-first";
1508-
if (profile === "native-first") {
1509-
for (const name of CODEX_NATIVE_FIRST_DYNAMIC_TOOL_EXCLUDES) {
1510-
excludes.add(name);
1511-
}
1512-
}
1513-
for (const name of config.codexDynamicToolsExclude ?? []) {
1514-
const trimmed = name.trim();
1515-
if (trimmed) {
1516-
excludes.add(trimmed);
1517-
}
1518-
}
1519-
return excludes.size === 0 ? tools : tools.filter((tool) => !excludes.has(tool.name));
1520-
}
1521-
15221494
async function withCodexStartupTimeout<T>(params: {
15231495
timeoutMs: number;
15241496
timeoutFloorMs?: number;

extensions/codex/src/app-server/thread-lifecycle.ts

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -97,25 +97,19 @@ export async function startOrResumeThread(params: {
9797
}
9898
}
9999

100-
const modelProvider = resolveCodexAppServerModelProvider(params.params.provider);
101100
const response = assertCodexThreadStartResponse(
102-
await params.client.request("thread/start", {
103-
model: params.params.modelId,
104-
...(modelProvider ? { modelProvider } : {}),
105-
cwd: params.cwd,
106-
approvalPolicy: params.appServer.approvalPolicy,
107-
approvalsReviewer: params.appServer.approvalsReviewer,
108-
sandbox: params.appServer.sandbox,
109-
...(params.appServer.serviceTier ? { serviceTier: params.appServer.serviceTier } : {}),
110-
serviceName: "OpenClaw",
111-
...(params.config ? { config: params.config } : {}),
112-
developerInstructions:
113-
params.developerInstructions ?? buildDeveloperInstructions(params.params),
114-
dynamicTools: params.dynamicTools,
115-
experimentalRawEvents: true,
116-
persistExtendedHistory: true,
117-
} satisfies CodexThreadStartParams),
101+
await params.client.request(
102+
"thread/start",
103+
buildThreadStartParams(params.params, {
104+
cwd: params.cwd,
105+
dynamicTools: params.dynamicTools,
106+
appServer: params.appServer,
107+
developerInstructions: params.developerInstructions,
108+
config: params.config,
109+
}),
110+
),
118111
);
112+
const modelProvider = resolveCodexAppServerModelProvider(params.params.provider);
119113
const createdAt = new Date().toISOString();
120114
await writeCodexAppServerBinding(params.params.sessionFile, {
121115
threadId: response.thread.id,
@@ -140,6 +134,34 @@ export async function startOrResumeThread(params: {
140134
};
141135
}
142136

137+
export function buildThreadStartParams(
138+
params: EmbeddedRunAttemptParams,
139+
options: {
140+
cwd: string;
141+
dynamicTools: CodexDynamicToolSpec[];
142+
appServer: CodexAppServerRuntimeOptions;
143+
developerInstructions?: string;
144+
config?: JsonObject;
145+
},
146+
): CodexThreadStartParams {
147+
const modelProvider = resolveCodexAppServerModelProvider(params.provider);
148+
return {
149+
model: params.modelId,
150+
...(modelProvider ? { modelProvider } : {}),
151+
cwd: options.cwd,
152+
approvalPolicy: options.appServer.approvalPolicy,
153+
approvalsReviewer: options.appServer.approvalsReviewer,
154+
sandbox: options.appServer.sandbox,
155+
...(options.appServer.serviceTier ? { serviceTier: options.appServer.serviceTier } : {}),
156+
serviceName: "OpenClaw",
157+
...(options.config ? { config: options.config } : {}),
158+
developerInstructions: options.developerInstructions ?? buildDeveloperInstructions(params),
159+
dynamicTools: options.dynamicTools,
160+
experimentalRawEvents: true,
161+
persistExtendedHistory: true,
162+
};
163+
}
164+
143165
export function buildThreadResumeParams(
144166
params: EmbeddedRunAttemptParams,
145167
options: {

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,8 @@
14471447
"prepare": "command -v git >/dev/null 2>&1 && git rev-parse --is-inside-work-tree >/dev/null 2>&1 && git config core.hooksPath git-hooks || exit 0",
14481448
"prepush:ci": "bash scripts/prepush-ci.sh",
14491449
"probe:anthropic:prompt": "node --import tsx scripts/anthropic-prompt-probe.ts",
1450+
"prompt:snapshots:check": "node --import tsx scripts/generate-prompt-snapshots.ts --check",
1451+
"prompt:snapshots:gen": "node --import tsx scripts/generate-prompt-snapshots.ts --write",
14501452
"protocol:check": "pnpm protocol:gen && pnpm protocol:gen:swift && git diff --exit-code -- dist/protocol.schema.json apps/macos/Sources/OpenClawProtocol/GatewayModels.swift apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift",
14511453
"protocol:gen": "node --import tsx scripts/protocol-gen.ts",
14521454
"protocol:gen:swift": "node --import tsx scripts/protocol-gen-swift.ts",
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import { execFile } from "node:child_process";
2+
import fs from "node:fs/promises";
3+
import os from "node:os";
4+
import path from "node:path";
5+
import { fileURLToPath, pathToFileURL } from "node:url";
6+
import { promisify } from "node:util";
7+
import {
8+
createHappyPathPromptSnapshotFiles,
9+
HAPPY_PATH_PROMPT_SNAPSHOT_DIR,
10+
} from "../test/helpers/agents/happy-path-prompt-snapshots.js";
11+
12+
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
13+
const execFileAsync = promisify(execFile);
14+
15+
type PromptSnapshotFile = ReturnType<typeof createHappyPathPromptSnapshotFiles>[number];
16+
17+
function describeError(error: unknown): string {
18+
return error instanceof Error ? error.message : String(error);
19+
}
20+
21+
async function writeSnapshotFiles(root: string, files: PromptSnapshotFile[]) {
22+
await Promise.all(
23+
files.map(async (file) => {
24+
const filePath = path.resolve(root, file.path);
25+
await fs.mkdir(path.dirname(filePath), { recursive: true });
26+
await fs.writeFile(filePath, file.content);
27+
}),
28+
);
29+
}
30+
31+
async function formatSnapshotFiles(root: string, files: PromptSnapshotFile[]) {
32+
const filePaths = files.map((file) => path.resolve(root, file.path));
33+
await execFileAsync("oxfmt", ["--write", "--threads=1", ...filePaths], {
34+
cwd: repoRoot,
35+
});
36+
}
37+
38+
async function readSnapshotFiles(root: string, files: PromptSnapshotFile[]) {
39+
return await Promise.all(
40+
files.map(async (file) => ({
41+
...file,
42+
content: await fs.readFile(path.resolve(root, file.path), "utf8"),
43+
})),
44+
);
45+
}
46+
47+
export async function createFormattedPromptSnapshotFiles(): Promise<PromptSnapshotFile[]> {
48+
const files = createHappyPathPromptSnapshotFiles();
49+
const tmpRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-prompt-snapshots-"));
50+
try {
51+
await writeSnapshotFiles(tmpRoot, files);
52+
await formatSnapshotFiles(tmpRoot, files);
53+
return await readSnapshotFiles(tmpRoot, files);
54+
} finally {
55+
await fs.rm(tmpRoot, { recursive: true, force: true });
56+
}
57+
}
58+
59+
async function writeSnapshots() {
60+
const files = await createFormattedPromptSnapshotFiles();
61+
await fs.mkdir(path.resolve(repoRoot, HAPPY_PATH_PROMPT_SNAPSHOT_DIR), { recursive: true });
62+
await writeSnapshotFiles(repoRoot, files);
63+
console.log(`Wrote ${files.length} prompt snapshot files.`);
64+
}
65+
66+
async function checkSnapshots() {
67+
const files = await createFormattedPromptSnapshotFiles();
68+
const mismatches: string[] = [];
69+
for (const file of files) {
70+
const filePath = path.resolve(repoRoot, file.path);
71+
let actual: string;
72+
try {
73+
actual = await fs.readFile(filePath, "utf8");
74+
} catch (error) {
75+
mismatches.push(`${file.path}: missing (${describeError(error)})`);
76+
continue;
77+
}
78+
if (actual !== file.content) {
79+
mismatches.push(`${file.path}: differs from generated output`);
80+
}
81+
}
82+
if (mismatches.length > 0) {
83+
console.error("Prompt snapshot drift detected. Run `pnpm prompt:snapshots:gen`.");
84+
for (const mismatch of mismatches) {
85+
console.error(`- ${mismatch}`);
86+
}
87+
process.exitCode = 1;
88+
return;
89+
}
90+
console.log(`Prompt snapshots are current (${files.length} files).`);
91+
}
92+
93+
export async function runPromptSnapshotGenerator(argv = process.argv.slice(2)) {
94+
const mode = argv.includes("--write") ? "write" : argv.includes("--check") ? "check" : undefined;
95+
96+
if (!mode) {
97+
console.error("Usage: pnpm prompt:snapshots:gen | pnpm prompt:snapshots:check");
98+
process.exitCode = 2;
99+
return;
100+
}
101+
102+
if (mode === "write") {
103+
await writeSnapshots();
104+
} else {
105+
await checkSnapshots();
106+
}
107+
}
108+
109+
const invokedPath = process.argv[1] ? pathToFileURL(path.resolve(process.argv[1])).href : "";
110+
if (import.meta.url === invokedPath) {
111+
await runPromptSnapshotGenerator();
112+
}

0 commit comments

Comments
 (0)