Skip to content

Commit 563dca8

Browse files
authored
Add Codex happy path prompt snapshots (#75807)
* Add Codex prompt snapshots * Fix prompt snapshot scenario catalogs * Harden prompt snapshot drift check * Fix CLI compat build export * fix: keep codex snapshots out of core plugin surface * fix: harden prompt snapshot ci checks * fix: accept readonly web search onboarding scopes * fix: repair plugin sdk package boundary types * fix: clear prompt snapshot ci regressions * fix: clear latest main ci checks * fix: resolve latest main discord helper overlap * fix: refresh codex dynamic tool snapshots * fix: align prompt snapshot branch with latest ci * fix: isolate plugin auto enable tests * test: refresh prompt dynamic tool snapshots * fix: stabilize bundled channel auto enable * fix: clean stale prompt snapshots
1 parent 4fb520d commit 563dca8

46 files changed

Lines changed: 7920 additions & 133 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Docs: https://docs.openclaw.ai
5454
- Dependencies: refresh workspace dependency pins, including TypeBox 1.1.37, AWS SDK 3.1041.0, Microsoft Teams 2.0.9, and Marked 18.0.3. Thanks @mariozechner, @aws, and @microsoft.
5555
- Discord/channels: add reusable message-channel access groups plus Discord channel-audience DM authorization, so allowlists can reference `accessGroup:<name>` across channel auth paths. (#75813)
5656
- Crabbox/scripts: print the selected Crabbox binary, version, and supported providers before `pnpm crabbox:*` commands, and reject stale binaries that lack `blacksmith-testbox` provider support.
57+
- Agents/Codex: add committed happy-path prompt snapshots for Codex/message-tool Telegram direct, Discord group, and heartbeat turns so prompt drift can be reviewed. Thanks @pashpashpash.
5758

5859
### Fixes
5960

docs/concepts/system-prompt.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,19 @@ section when the direct/group chat context already includes the resolved
109109
conversation-specific `NO_REPLY` behavior. This avoids repeating token mechanics
110110
in both the global system prompt and channel context.
111111

112+
## Prompt snapshots
113+
114+
OpenClaw keeps committed happy-path prompt snapshots for the Codex/message-tool
115+
runtime under `test/fixtures/agents/prompt-snapshots/happy-path/`. They render
116+
the OpenClaw-owned Codex app-server developer instructions, selected thread
117+
start/resume params, turn user input, and dynamic tool specs for Telegram direct,
118+
Discord group, and heartbeat turns. The hidden base Codex system prompt and
119+
turn-scoped Codex collaboration-mode instructions are owned by the Codex runtime
120+
and are not rendered by OpenClaw.
121+
122+
Regenerate them with `pnpm prompt:snapshots:gen` and verify drift with
123+
`pnpm prompt:snapshots:check`.
124+
112125
## Workspace bootstrap injection
113126

114127
Bootstrap files are trimmed and appended under **Project Context** so the model sees identity and profile context without needing explicit reads:
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import type { CodexPluginConfig } from "./config.js";
2+
3+
export const CODEX_NATIVE_FIRST_DYNAMIC_TOOL_EXCLUDES = [
4+
"read",
5+
"write",
6+
"edit",
7+
"apply_patch",
8+
"exec",
9+
"process",
10+
"update_plan",
11+
] as const;
12+
13+
export function applyCodexDynamicToolProfile<T extends { name: string }>(
14+
tools: T[],
15+
config: Pick<CodexPluginConfig, "codexDynamicToolsProfile" | "codexDynamicToolsExclude">,
16+
): T[] {
17+
const excludes = new Set<string>();
18+
const profile = config.codexDynamicToolsProfile ?? "native-first";
19+
if (profile === "native-first") {
20+
for (const name of CODEX_NATIVE_FIRST_DYNAMIC_TOOL_EXCLUDES) {
21+
excludes.add(name);
22+
}
23+
}
24+
for (const name of config.codexDynamicToolsExclude ?? []) {
25+
const trimmed = name.trim();
26+
if (trimmed) {
27+
excludes.add(trimmed);
28+
}
29+
}
30+
return excludes.size === 0 ? tools : tools.filter((tool) => !excludes.has(tool.name));
31+
}

extensions/codex/src/app-server/run-attempt.ts

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ import {
5454
type CodexPluginConfig,
5555
} from "./config.js";
5656
import { projectContextEngineAssemblyForCodex } from "./context-engine-projection.js";
57+
import { applyCodexDynamicToolProfile } from "./dynamic-tool-profile.js";
5758
import { createCodexDynamicToolBridge, type CodexDynamicToolBridge } from "./dynamic-tools.js";
5859
import { handleCodexAppServerElicitationRequest } from "./elicitation-bridge.js";
5960
import { CodexAppServerEventProjector } from "./event-projector.js";
@@ -99,15 +100,6 @@ const CODEX_APP_SERVER_STARTUP_CONNECTION_CLOSE_MAX_ATTEMPTS = 3;
99100
const CODEX_TURN_COMPLETION_IDLE_TIMEOUT_MS = 60_000;
100101
const CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS = 30 * 60_000;
101102
const CODEX_STEER_ALL_DEBOUNCE_MS = 500;
102-
const CODEX_NATIVE_FIRST_DYNAMIC_TOOL_EXCLUDES = [
103-
"read",
104-
"write",
105-
"edit",
106-
"apply_patch",
107-
"exec",
108-
"process",
109-
"update_plan",
110-
] as const;
111103
const LOG_FIELD_MAX_LENGTH = 160;
112104

113105
type OpenClawCodingToolsOptions = NonNullable<
@@ -1499,26 +1491,6 @@ async function buildDynamicTools(input: DynamicToolBuildParams) {
14991491
});
15001492
}
15011493

1502-
function applyCodexDynamicToolProfile<T extends { name: string }>(
1503-
tools: T[],
1504-
config: CodexPluginConfig,
1505-
): T[] {
1506-
const excludes = new Set<string>();
1507-
const profile = config.codexDynamicToolsProfile ?? "native-first";
1508-
if (profile === "native-first") {
1509-
for (const name of CODEX_NATIVE_FIRST_DYNAMIC_TOOL_EXCLUDES) {
1510-
excludes.add(name);
1511-
}
1512-
}
1513-
for (const name of config.codexDynamicToolsExclude ?? []) {
1514-
const trimmed = name.trim();
1515-
if (trimmed) {
1516-
excludes.add(trimmed);
1517-
}
1518-
}
1519-
return excludes.size === 0 ? tools : tools.filter((tool) => !excludes.has(tool.name));
1520-
}
1521-
15221494
async function withCodexStartupTimeout<T>(params: {
15231495
timeoutMs: number;
15241496
timeoutFloorMs?: number;

extensions/codex/src/app-server/thread-lifecycle.ts

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -97,25 +97,19 @@ export async function startOrResumeThread(params: {
9797
}
9898
}
9999

100-
const modelProvider = resolveCodexAppServerModelProvider(params.params.provider);
101100
const response = assertCodexThreadStartResponse(
102-
await params.client.request("thread/start", {
103-
model: params.params.modelId,
104-
...(modelProvider ? { modelProvider } : {}),
105-
cwd: params.cwd,
106-
approvalPolicy: params.appServer.approvalPolicy,
107-
approvalsReviewer: params.appServer.approvalsReviewer,
108-
sandbox: params.appServer.sandbox,
109-
...(params.appServer.serviceTier ? { serviceTier: params.appServer.serviceTier } : {}),
110-
serviceName: "OpenClaw",
111-
...(params.config ? { config: params.config } : {}),
112-
developerInstructions:
113-
params.developerInstructions ?? buildDeveloperInstructions(params.params),
114-
dynamicTools: params.dynamicTools,
115-
experimentalRawEvents: true,
116-
persistExtendedHistory: true,
117-
} satisfies CodexThreadStartParams),
101+
await params.client.request(
102+
"thread/start",
103+
buildThreadStartParams(params.params, {
104+
cwd: params.cwd,
105+
dynamicTools: params.dynamicTools,
106+
appServer: params.appServer,
107+
developerInstructions: params.developerInstructions,
108+
config: params.config,
109+
}),
110+
),
118111
);
112+
const modelProvider = resolveCodexAppServerModelProvider(params.params.provider);
119113
const createdAt = new Date().toISOString();
120114
await writeCodexAppServerBinding(params.params.sessionFile, {
121115
threadId: response.thread.id,
@@ -140,6 +134,34 @@ export async function startOrResumeThread(params: {
140134
};
141135
}
142136

137+
export function buildThreadStartParams(
138+
params: EmbeddedRunAttemptParams,
139+
options: {
140+
cwd: string;
141+
dynamicTools: CodexDynamicToolSpec[];
142+
appServer: CodexAppServerRuntimeOptions;
143+
developerInstructions?: string;
144+
config?: JsonObject;
145+
},
146+
): CodexThreadStartParams {
147+
const modelProvider = resolveCodexAppServerModelProvider(params.provider);
148+
return {
149+
model: params.modelId,
150+
...(modelProvider ? { modelProvider } : {}),
151+
cwd: options.cwd,
152+
approvalPolicy: options.appServer.approvalPolicy,
153+
approvalsReviewer: options.appServer.approvalsReviewer,
154+
sandbox: options.appServer.sandbox,
155+
...(options.appServer.serviceTier ? { serviceTier: options.appServer.serviceTier } : {}),
156+
serviceName: "OpenClaw",
157+
...(options.config ? { config: options.config } : {}),
158+
developerInstructions: options.developerInstructions ?? buildDeveloperInstructions(params),
159+
dynamicTools: options.dynamicTools,
160+
experimentalRawEvents: true,
161+
persistExtendedHistory: true,
162+
};
163+
}
164+
143165
export function buildThreadResumeParams(
144166
params: EmbeddedRunAttemptParams,
145167
options: {

extensions/codex/test-api.ts

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import type {
2+
AnyAgentTool,
3+
EmbeddedRunAttemptParams,
4+
} from "openclaw/plugin-sdk/agent-harness-runtime";
5+
import {
6+
type CodexAppServerRuntimeOptions,
7+
resolveCodexAppServerRuntimeOptions,
8+
} from "./src/app-server/config.js";
9+
import type { CodexPluginConfig } from "./src/app-server/config.js";
10+
import { applyCodexDynamicToolProfile } from "./src/app-server/dynamic-tool-profile.js";
11+
import { createCodexDynamicToolBridge } from "./src/app-server/dynamic-tools.js";
12+
import type { CodexDynamicToolSpec, JsonObject } from "./src/app-server/protocol.js";
13+
import {
14+
buildDeveloperInstructions,
15+
buildThreadResumeParams,
16+
buildThreadStartParams,
17+
buildTurnStartParams,
18+
} from "./src/app-server/thread-lifecycle.js";
19+
20+
type CodexHarnessPromptSnapshot = {
21+
developerInstructions: string;
22+
threadStartParams: ReturnType<typeof buildThreadStartParams>;
23+
threadResumeParams: ReturnType<typeof buildThreadResumeParams>;
24+
turnStartParams: ReturnType<typeof buildTurnStartParams>;
25+
};
26+
27+
export function resolveCodexPromptSnapshotAppServerOptions(
28+
pluginConfig?: unknown,
29+
): CodexAppServerRuntimeOptions {
30+
return resolveCodexAppServerRuntimeOptions({
31+
pluginConfig,
32+
env: {},
33+
});
34+
}
35+
36+
export function buildCodexHarnessPromptSnapshot(params: {
37+
attempt: EmbeddedRunAttemptParams;
38+
cwd: string;
39+
threadId: string;
40+
dynamicTools: CodexDynamicToolSpec[];
41+
appServer: CodexAppServerRuntimeOptions;
42+
config?: JsonObject;
43+
promptText?: string;
44+
}): CodexHarnessPromptSnapshot {
45+
const developerInstructions = buildDeveloperInstructions(params.attempt);
46+
return {
47+
developerInstructions,
48+
threadStartParams: buildThreadStartParams(params.attempt, {
49+
cwd: params.cwd,
50+
dynamicTools: params.dynamicTools,
51+
appServer: params.appServer,
52+
developerInstructions,
53+
config: params.config,
54+
}),
55+
threadResumeParams: buildThreadResumeParams(params.attempt, {
56+
threadId: params.threadId,
57+
appServer: params.appServer,
58+
developerInstructions,
59+
config: params.config,
60+
}),
61+
turnStartParams: buildTurnStartParams(params.attempt, {
62+
threadId: params.threadId,
63+
cwd: params.cwd,
64+
appServer: params.appServer,
65+
promptText: params.promptText,
66+
}),
67+
};
68+
}
69+
70+
export function createCodexDynamicToolSpecsForPromptSnapshot(params: {
71+
tools: AnyAgentTool[];
72+
pluginConfig?: Pick<CodexPluginConfig, "codexDynamicToolsProfile" | "codexDynamicToolsExclude">;
73+
}): CodexDynamicToolSpec[] {
74+
const profiledTools = applyCodexDynamicToolProfile(params.tools, params.pluginConfig ?? {});
75+
return createCodexDynamicToolBridge({
76+
tools: profiledTools,
77+
signal: new AbortController().signal,
78+
}).specs;
79+
}

extensions/google/embedding-provider.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ type GeminiInlinePart = {
5454
inlineData: { mimeType: string; data: string };
5555
};
5656
type GeminiPart = GeminiTextPart | GeminiInlinePart;
57+
type GeminiEmbeddingInputPart = NonNullable<EmbeddingInput["parts"]>[number];
5758
type GeminiEmbeddingRequest = {
5859
content: { parts: GeminiPart[] };
5960
taskType: GeminiTaskType;
@@ -85,7 +86,7 @@ export function buildGeminiEmbeddingRequest(params: {
8586
}): GeminiEmbeddingRequest {
8687
const request: GeminiEmbeddingRequest = {
8788
content: {
88-
parts: params.input.parts?.map((part) =>
89+
parts: params.input.parts?.map((part: GeminiEmbeddingInputPart) =>
8990
part.type === "text"
9091
? ({ text: part.text } satisfies GeminiTextPart)
9192
: ({

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,6 +1448,8 @@
14481448
"prepare": "command -v git >/dev/null 2>&1 && git rev-parse --is-inside-work-tree >/dev/null 2>&1 && git config core.hooksPath git-hooks || exit 0",
14491449
"prepush:ci": "bash scripts/prepush-ci.sh",
14501450
"probe:anthropic:prompt": "node --import tsx scripts/anthropic-prompt-probe.ts",
1451+
"prompt:snapshots:check": "node --import tsx scripts/generate-prompt-snapshots.ts --check",
1452+
"prompt:snapshots:gen": "node --import tsx scripts/generate-prompt-snapshots.ts --write",
14511453
"protocol:check": "pnpm protocol:gen && pnpm protocol:gen:swift && git diff --exit-code -- dist/protocol.schema.json apps/macos/Sources/OpenClawProtocol/GatewayModels.swift apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift",
14521454
"protocol:gen": "node --import tsx scripts/protocol-gen.ts",
14531455
"protocol:gen:swift": "node --import tsx scripts/protocol-gen-swift.ts",

0 commit comments

Comments
 (0)