Skip to content

Commit c4d747f

Browse files
committed
fix codex memory flush tool surface
1 parent e399a92 commit c4d747f

4 files changed

Lines changed: 114 additions & 1 deletion

File tree

extensions/codex/src/app-server/run-attempt.test.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,57 @@ describe("runCodexAppServerAttempt", () => {
760760
expect(testing.resolveCodexDynamicToolsLoading({}, privateQaCodexEnv)).toBe("direct");
761761
});
762762

763+
it("limits Codex memory flush runs to managed read and write tools", async () => {
764+
const factoryOptions: unknown[] = [];
765+
testing.setOpenClawCodingToolsFactoryForTests((options) => {
766+
factoryOptions.push(options);
767+
return [
768+
createRuntimeDynamicTool("read"),
769+
createRuntimeDynamicTool("write"),
770+
createRuntimeDynamicTool("exec"),
771+
createRuntimeDynamicTool("process"),
772+
createRuntimeDynamicTool("apply_patch"),
773+
createRuntimeDynamicTool("message"),
774+
];
775+
});
776+
const sessionFile = path.join(tempDir, "session.jsonl");
777+
const workspaceDir = path.join(tempDir, "workspace");
778+
const params = createParams(sessionFile, workspaceDir);
779+
params.disableTools = false;
780+
params.runtimePlan = createCodexRuntimePlanFixture();
781+
params.trigger = "memory";
782+
params.memoryFlushWritePath = "memory/2026-05-22.md";
783+
const sandboxSessionKey = params.sessionKey;
784+
if (!sandboxSessionKey) {
785+
throw new Error("createParams must provide a sessionKey for Codex dynamic tool tests.");
786+
}
787+
788+
const nativeToolSurfaceEnabled = testing.shouldEnableCodexAppServerNativeToolSurface(params, {
789+
enabled: true,
790+
backendId: "docker",
791+
} as never);
792+
const tools = await testing.buildDynamicTools({
793+
params,
794+
resolvedWorkspace: workspaceDir,
795+
effectiveWorkspace: workspaceDir,
796+
sandboxSessionKey,
797+
sandbox: { enabled: true, backendId: "docker" } as never,
798+
nativeToolSurfaceEnabled,
799+
runAbortController: new AbortController(),
800+
sessionAgentId: "main",
801+
pluginConfig: {},
802+
onYieldDetected: () => undefined,
803+
});
804+
805+
expect(nativeToolSurfaceEnabled).toBe(false);
806+
expect(factoryOptions).toHaveLength(1);
807+
expect(factoryOptions[0]).toMatchObject({
808+
trigger: "memory",
809+
memoryFlushWritePath: "memory/2026-05-22.md",
810+
});
811+
expect(tools.map((tool) => tool.name)).toEqual(["read", "write"]);
812+
});
813+
763814
it("exposes OpenClaw sandbox shell tools under distinct names for non-Docker sandbox backends", async () => {
764815
testing.setOpenClawCodingToolsFactoryForTests(() => [
765816
createRuntimeDynamicTool("read"),

extensions/codex/src/app-server/run-attempt.ts

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ const CODEX_NATIVE_SANDBOX_TOOL_REQUIREMENTS = [
215215
"edit",
216216
"apply_patch",
217217
] as const;
218+
const CODEX_MEMORY_FLUSH_DYNAMIC_TOOL_ALLOW = new Set(["read", "write"]);
218219
const CODEX_NATIVE_PROJECT_DOC_BASENAMES = new Set(["agents.md"]);
219220
const CODEX_WORKSPACE_DEVELOPER_CONTEXT_BASENAMES = new Set([
220221
"identity.md",
@@ -3578,7 +3579,9 @@ async function buildDynamicTools(input: DynamicToolBuildParams) {
35783579
},
35793580
});
35803581
const codexFilteredTools = addSandboxShellDynamicToolsIfAvailable(
3581-
filterCodexDynamicTools(allTools, input.pluginConfig),
3582+
isCodexMemoryFlushRun(params)
3583+
? filterCodexMemoryFlushDynamicTools(allTools)
3584+
: filterCodexDynamicTools(allTools, input.pluginConfig),
35823585
allTools,
35833586
input,
35843587
);
@@ -3624,6 +3627,9 @@ function shouldEnableCodexAppServerNativeToolSurface(
36243627
sandbox?: OpenClawSandboxContext,
36253628
options: { sandboxExecServerEnabled?: boolean } = {},
36263629
): boolean {
3630+
if (isCodexMemoryFlushRun(params)) {
3631+
return false;
3632+
}
36273633
const toolsAllow = includeForcedMessageToolAllow(params.toolsAllow, params);
36283634
if (toolsAllow === undefined) {
36293635
return canCodexAppServerNativeToolSurfaceHonorSandbox(sandbox, options);
@@ -3666,6 +3672,18 @@ function canSandboxToolPolicyExposeCodexNativeToolSurface(sandbox: {
36663672
);
36673673
}
36683674

3675+
function isCodexMemoryFlushRun(
3676+
params?: Pick<EmbeddedRunAttemptParams, "trigger" | "memoryFlushWritePath">,
3677+
): boolean {
3678+
return params?.trigger === "memory" && Boolean(params.memoryFlushWritePath?.trim());
3679+
}
3680+
3681+
function filterCodexMemoryFlushDynamicTools<T extends { name: string }>(tools: T[]): T[] {
3682+
return tools.filter((tool) =>
3683+
CODEX_MEMORY_FLUSH_DYNAMIC_TOOL_ALLOW.has(normalizeCodexDynamicToolName(tool.name)),
3684+
);
3685+
}
3686+
36693687
function shouldRequireCodexSandboxExecServerEnvironment(params: {
36703688
sandbox?: OpenClawSandboxContext;
36713689
nativeToolSurfaceEnabled: boolean;
@@ -3773,6 +3791,9 @@ function addSandboxShellDynamicToolsIfAvailable(
37733791
}
37743792

37753793
function shouldExposeSandboxExecDynamicTool(input: DynamicToolBuildParams): boolean {
3794+
if (isCodexMemoryFlushRun(input.params)) {
3795+
return false;
3796+
}
37763797
const backendId = input.sandbox?.enabled ? input.sandbox.backendId.trim().toLowerCase() : "";
37773798
return Boolean(backendId && input.nativeToolSurfaceEnabled === false);
37783799
}

src/auto-reply/reply/agent-runner-memory.test.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ const runEmbeddedPiAgentMock = vi.fn();
2222
const refreshQueuedFollowupSessionMock = vi.fn();
2323
const incrementCompactionCountMock = vi.fn();
2424
const ensureSelectedAgentHarnessPluginMock = vi.fn();
25+
const ensureMemoryFlushTargetFileMock = vi.fn();
2526

2627
function registerMemoryFlushPlanResolverForTest(resolver: MemoryFlushPlanResolver): void {
2728
registerMemoryCapability("memory-core", { flushPlanResolver: resolver });
@@ -141,6 +142,7 @@ describe("runMemoryFlushIfNeeded", () => {
141142
});
142143
runEmbeddedPiAgentMock.mockReset().mockResolvedValue({ payloads: [], meta: {} });
143144
refreshQueuedFollowupSessionMock.mockReset();
145+
ensureMemoryFlushTargetFileMock.mockReset().mockResolvedValue(undefined);
144146
ensureSelectedAgentHarnessPluginMock.mockReset().mockResolvedValue(undefined);
145147
incrementCompactionCountMock.mockReset().mockImplementation(async (params) => {
146148
const sessionKey = String(params.sessionKey ?? "");
@@ -174,6 +176,7 @@ describe("runMemoryFlushIfNeeded", () => {
174176
compactEmbeddedPiSession: compactEmbeddedPiSessionMock as never,
175177
runWithModelFallback: runWithModelFallbackMock as never,
176178
runEmbeddedPiAgent: runEmbeddedPiAgentMock as never,
179+
ensureMemoryFlushTargetFile: ensureMemoryFlushTargetFileMock as never,
177180
refreshQueuedFollowupSession: refreshQueuedFollowupSessionMock as never,
178181
incrementCompactionCount: incrementCompactionCountMock as never,
179182
ensureSelectedAgentHarnessPlugin: ensureSelectedAgentHarnessPluginMock as never,
@@ -246,6 +249,13 @@ describe("runMemoryFlushIfNeeded", () => {
246249
expect(flushCall.prompt).not.toBe(flushCall.transcriptPrompt);
247250
expect(flushCall.memoryFlushWritePath).toMatch(/^memory\/\d{4}-\d{2}-\d{2}\.md$/);
248251
expect(flushCall.silentExpected).toBe(true);
252+
expect(ensureMemoryFlushTargetFileMock).toHaveBeenCalledWith({
253+
workspaceDir: followupRun.run.workspaceDir,
254+
relativePath: flushCall.memoryFlushWritePath,
255+
});
256+
expect(ensureMemoryFlushTargetFileMock.mock.invocationCallOrder[0]).toBeLessThan(
257+
runEmbeddedPiAgentMock.mock.invocationCallOrder[0] ?? 0,
258+
);
249259
expect(refreshQueuedFollowupSessionMock).toHaveBeenCalledTimes(1);
250260
const refreshCall = requireRefreshQueuedFollowupSessionCall();
251261
expect(refreshCall.key).toBe(sessionKey);

src/auto-reply/reply/agent-runner-memory.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import crypto from "node:crypto";
22
import fs from "node:fs";
3+
import path from "node:path";
34
import type { AgentMessage } from "@earendil-works/pi-agent-core";
45
import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
56
import { estimateMessagesTokens } from "../../agents/compaction.js";
@@ -85,11 +86,36 @@ async function runEmbeddedPiAgentDefault(
8586
return await runEmbeddedPiAgent(...args);
8687
}
8788

89+
async function ensureMemoryFlushTargetFile(params: {
90+
workspaceDir: string;
91+
relativePath: string;
92+
}): Promise<void> {
93+
const workspaceDir = normalizeOptionalString(params.workspaceDir);
94+
const relativePath = normalizeOptionalString(params.relativePath);
95+
if (!workspaceDir || !relativePath || path.isAbsolute(relativePath)) {
96+
throw new Error("Invalid memory flush target path");
97+
}
98+
const workspaceRoot = path.resolve(workspaceDir);
99+
const targetPath = path.resolve(workspaceRoot, relativePath);
100+
const targetRelativePath = path.relative(workspaceRoot, targetPath);
101+
if (
102+
!targetRelativePath ||
103+
targetRelativePath.startsWith("..") ||
104+
path.isAbsolute(targetRelativePath)
105+
) {
106+
throw new Error("Memory flush target path must stay inside the workspace");
107+
}
108+
await fs.promises.mkdir(path.dirname(targetPath), { recursive: true });
109+
const handle = await fs.promises.open(targetPath, "a");
110+
await handle.close();
111+
}
112+
88113
const memoryDeps = {
89114
compactEmbeddedPiSession: compactEmbeddedPiSessionDefault,
90115
runWithModelFallback,
91116
ensureSelectedAgentHarnessPlugin,
92117
runEmbeddedPiAgent: runEmbeddedPiAgentDefault,
118+
ensureMemoryFlushTargetFile,
93119
registerAgentRunContext,
94120
refreshQueuedFollowupSession,
95121
incrementCompactionCount,
@@ -104,6 +130,7 @@ export function setAgentRunnerMemoryTestDeps(overrides?: Partial<typeof memoryDe
104130
ensureSelectedAgentHarnessPlugin,
105131
compactEmbeddedPiSession: compactEmbeddedPiSessionDefault,
106132
runEmbeddedPiAgent: runEmbeddedPiAgentDefault,
133+
ensureMemoryFlushTargetFile,
107134
registerAgentRunContext,
108135
refreshQueuedFollowupSession,
109136
incrementCompactionCount,
@@ -1013,6 +1040,10 @@ export async function runMemoryFlushIfNeeded(params: {
10131040
nowMs: memoryFlushNowMs,
10141041
}) ?? memoryFlushPlan;
10151042
const memoryFlushWritePath = activeMemoryFlushPlan.relativePath;
1043+
await memoryDeps.ensureMemoryFlushTargetFile({
1044+
workspaceDir: params.followupRun.run.workspaceDir,
1045+
relativePath: memoryFlushWritePath,
1046+
});
10161047
const flushSystemPrompt = [
10171048
params.followupRun.run.extraSystemPrompt,
10181049
activeMemoryFlushPlan.systemPrompt,

0 commit comments

Comments
 (0)