Skip to content

Commit 4bfd741

Browse files
jalehmansteipete
andauthored
fix: stabilize code-mode follow-up tool display and replay (#80663)
* fix: project tool-search bridge event display * fix: keep codex tool progress out of final replies * fix: preserve tool result pairs on cleanup * fix: restore tool search display target helper * fix: keep tool search controls independent * fix: render bridged tool calls like native tools * fix: abort timed out tool search bridge calls * fix: preserve code-mode tool results across display turns * fix: repair missing code-mode tool results on disk * fix: expose tool search controls in embedded runs * docs: add code-mode followups changelog * fix: update session repair agent-core import * fix: harden code-mode follow-up repair * fix: use stable session repair ids --------- Co-authored-by: Peter Steinberger <steipete@gmail.com>
1 parent 1786d60 commit 4bfd741

27 files changed

Lines changed: 909 additions & 35 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ Docs: https://docs.openclaw.ai
7979
- Bonjour/Gateway: treat active ciao probing and fresh name-conflict renames as in-progress so the mDNS watchdog waits for probe settlement before retrying, preventing rapid re-advertise loops on Windows, WSL, and other multicast-hostile hosts. (#74778) Refs #74242. Thanks @fuller-stack-dev.
8080
- Providers/MiniMax: send a minimal Anthropic-compatible user fallback when message conversion filters a turn to an empty payload, so MiniMax M2.7 no longer returns `chat content is empty` after tool-heavy sessions. Fixes #74589. Thanks @neeravmakwana and @DerekEXS.
8181
- Tools/media: preserve implicit allow-all semantics from `tools.alsoAllow`-only policies when preconstructing built-in media generation and PDF tools, so configured media tools become live without forcing `tools.allow: ["*", ...]`. Fixes #77841. Thanks @trialanderrorstudios.
82+
- Codex/Telegram: separate code-mode tool progress from final replies, render bridged tool calls with native tool labels, and repair persisted missing tool results for safer follow-up turns. (#80663) Thanks @jalehman.
8283
- Memory/search: load the platform-specific `sqlite-vec-<platform>-<arch>` variant directly when the meta `sqlite-vec` package is missing from a global install, so vector recall keeps working on `npm install -g openclaw@latest` upgrades where optionalDependencies left only the platform variant on disk. Fixes #77838. Thanks @corevibe555 and @Simon2256928.
8384
- Cron: keep long manual cron runs active in the task registry until completion, preventing transient `lost` markers before durable recovery reconciles. Fixes #78233. (#78243) Thanks @Feelw00.
8485
- Doctor/GitHub CLI: surface a `GH_CONFIG_DIR` hint when the GitHub skill is usable but `gh` auth lives under a different operator HOME than the agent process, without warning for disabled or filtered skills. Fixes #78063. (#78095) Thanks @tmimmanuel.

extensions/codex/src/app-server/event-projector.test.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,53 @@ describe("CodexAppServerEventProjector", () => {
406406
expect(result.toolMediaUrls).toStrictEqual([]);
407407
});
408408

409+
it("does not promote repeated tool progress text to the final assistant reply", async () => {
410+
const onToolResult = vi.fn();
411+
const projector = await createProjector({
412+
...(await createParams()),
413+
verboseLevel: "on",
414+
onToolResult,
415+
});
416+
417+
await projector.handleNotification(
418+
forCurrentTurn("item/started", {
419+
item: {
420+
type: "commandExecution",
421+
id: "cmd-1",
422+
command: "pnpm test extensions/codex",
423+
cwd: "/workspace",
424+
processId: null,
425+
source: "agent",
426+
status: "inProgress",
427+
commandActions: [],
428+
aggregatedOutput: null,
429+
exitCode: null,
430+
durationMs: null,
431+
},
432+
}),
433+
);
434+
const toolProgressText = onToolResult.mock.calls[0]?.[0]?.text;
435+
expect(toolProgressText).toBe("🛠️ `run tests (workspace)`");
436+
437+
await projector.handleNotification(
438+
forCurrentTurn("rawResponseItem/completed", {
439+
item: {
440+
type: "message",
441+
id: "raw-tool-progress",
442+
role: "assistant",
443+
content: [{ type: "output_text", text: toolProgressText }],
444+
},
445+
}),
446+
);
447+
await projector.handleNotification(turnCompleted());
448+
449+
const result = projector.buildResult(buildEmptyToolTelemetry());
450+
451+
expect(result.assistantTexts).toEqual([]);
452+
expect(result.lastAssistant).toBeUndefined();
453+
});
454+
455+
409456
it("does not fail a completed reply after a retryable app-server error notification", async () => {
410457
const projector = await createProjector();
411458

extensions/codex/src/app-server/event-projector.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ export class CodexAppServerEventProjector {
111111
private readonly activeItemIds = new Set<string>();
112112
private readonly completedItemIds = new Set<string>();
113113
private readonly activeCompactionItemIds = new Set<string>();
114+
private readonly toolProgressTexts = new Set<string>();
114115
private readonly toolResultSummaryItemIds = new Set<string>();
115116
private readonly toolResultOutputItemIds = new Set<string>();
116117
private readonly toolResultOutputStreamedItemIds = new Set<string>();
@@ -962,11 +963,16 @@ export class CodexAppServerEventProjector {
962963
text: string;
963964
finalOutput?: boolean;
964965
}): void {
966+
const text = params.text.trim();
967+
if (!text) {
968+
return;
969+
}
970+
this.toolProgressTexts.add(text);
965971
if (params.finalOutput) {
966972
this.toolResultOutputItemIds.add(params.itemId);
967973
}
968974
try {
969-
void Promise.resolve(this.params.onToolResult?.({ text: params.text })).catch(() => {
975+
void Promise.resolve(this.params.onToolResult?.({ text })).catch(() => {
970976
// Tool progress delivery is best-effort and should not affect the turn.
971977
});
972978
} catch {
@@ -1109,7 +1115,7 @@ export class CodexAppServerEventProjector {
11091115
continue;
11101116
}
11111117
const text = this.assistantTextByItem.get(itemId)?.trim();
1112-
if (text) {
1118+
if (text && !this.toolProgressTexts.has(text)) {
11131119
return text;
11141120
}
11151121
}

extensions/openai/openai-provider.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ describe("buildOpenAIProvider", () => {
546546
sanitizeToolCallIds: false,
547547
validateGeminiTurns: false,
548548
validateAnthropicTurns: false,
549+
allowSyntheticToolResults: true,
549550
});
550551
});
551552

extensions/openai/replay-policy.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,23 @@ import type {
33
ProviderReplayPolicyContext,
44
} from "openclaw/plugin-sdk/plugin-entry";
55

6+
const RESPONSES_FAMILY_APIS = new Set([
7+
"openai-responses",
8+
"openai-codex-responses",
9+
"azure-openai-responses",
10+
]);
11+
612
/**
713
* Returns the provider-owned replay policy for OpenAI-family transports.
814
*/
915
export function buildOpenAIReplayPolicy(ctx: ProviderReplayPolicyContext): ProviderReplayPolicy {
16+
const isResponsesFamily = RESPONSES_FAMILY_APIS.has(ctx.modelApi ?? "");
1017
return {
1118
sanitizeMode: "images-only",
1219
applyAssistantFirstOrderingFix: false,
1320
validateGeminiTurns: false,
1421
validateAnthropicTurns: false,
22+
...(isResponsesFamily ? { allowSyntheticToolResults: true } : {}),
1523
...(ctx.modelApi === "openai-completions"
1624
? {
1725
sanitizeToolCallIds: true,

src/agents/pi-embedded-runner.guard.test.ts

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,65 @@ describe("guardSessionManager integration", () => {
3737
]);
3838
});
3939

40+
it("keeps real toolResult pending across delivery-mirror assistant messages", () => {
41+
const sm = guardSessionManager(SessionManager.inMemory());
42+
const appendMessage = sm.appendMessage.bind(sm) as unknown as (message: AgentMessage) => void;
43+
44+
appendMessage(assistantToolCall("call_1"));
45+
appendMessage({
46+
role: "assistant",
47+
provider: "openclaw",
48+
model: "delivery-mirror",
49+
content: [{ type: "text", text: "display copy" }],
50+
} as AgentMessage);
51+
appendMessage({
52+
role: "toolResult",
53+
toolCallId: "call_1",
54+
toolName: "n",
55+
content: [{ type: "text", text: "real output" }],
56+
isError: false,
57+
} as AgentMessage);
58+
59+
const messages = sm
60+
.getEntries()
61+
.filter((e) => e.type === "message")
62+
.map((e) => (e as { message: AgentMessage }).message);
63+
64+
expect(messages.map((m) => m.role)).toEqual(["assistant", "assistant", "toolResult"]);
65+
expect((messages[1] as { model?: string }).model).toBe("delivery-mirror");
66+
expect((messages[2] as { isError?: boolean }).isError).toBe(false);
67+
expect((messages[2] as { content?: Array<{ text?: string }> }).content?.[0]?.text).toBe(
68+
"real output",
69+
);
70+
expect(JSON.stringify(messages)).not.toContain("missing tool result");
71+
});
72+
73+
it("uses Codex-style aborted synthetic results for interrupted Responses tool calls", () => {
74+
const sm = guardSessionManager(SessionManager.inMemory(), {
75+
allowSyntheticToolResults: true,
76+
missingToolResultText: "aborted",
77+
});
78+
const appendMessage = sm.appendMessage.bind(sm) as unknown as (message: AgentMessage) => void;
79+
80+
appendMessage(assistantToolCall("call_responses_1"));
81+
appendMessage({
82+
role: "user",
83+
content: [{ type: "text", text: "interrupting prompt" }],
84+
timestamp: Date.now(),
85+
} as AgentMessage);
86+
87+
const messages = sm
88+
.getEntries()
89+
.filter((e) => e.type === "message")
90+
.map((e) => (e as { message: AgentMessage }).message);
91+
92+
expect(messages.map((m) => m.role)).toEqual(["assistant", "toolResult", "user"]);
93+
expect((messages[1] as { toolCallId?: string }).toolCallId).toBe("call_responses_1");
94+
expect((messages[1] as { content?: Array<{ text?: string }> }).content?.[0]?.text).toBe(
95+
"aborted",
96+
);
97+
});
98+
4099
it("redacts configured text patterns before persisting transcript messages", () => {
41100
const cfg = {
42101
logging: {

src/agents/pi-embedded-runner.guard.waitforidle-before-flush.test.ts

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ describe("flushPendingToolResultsAfterIdle", () => {
100100
);
101101
});
102102

103-
it("clears pending without synthetic flush when timeout cleanup is requested", async () => {
103+
it("flushes pending on cleanup timeout instead of leaving orphaned tool calls", async () => {
104104
const sm = guardSessionManager(SessionManager.inMemory());
105105
const appendMessage = sm.appendMessage.bind(sm) as unknown as (message: AgentMessage) => void;
106106
vi.useFakeTimers();
@@ -112,19 +112,21 @@ describe("flushPendingToolResultsAfterIdle", () => {
112112
agent,
113113
sessionManager: sm,
114114
timeoutMs: 30,
115-
clearPendingOnTimeout: true,
116115
});
117116
await vi.advanceTimersByTimeAsync(30);
118117
await flushPromise;
119118

120-
expect(getMessages(sm).map((m) => m.role)).toEqual(["assistant"]);
119+
const messages = getMessages(sm);
120+
expect(messages.map((m) => m.role)).toEqual(["assistant", "toolResult"]);
121+
expect((messages[1] as { toolCallId?: string }).toolCallId).toBe("call_orphan_2");
122+
expect((messages[1] as { isError?: boolean }).isError).toBe(true);
121123

122124
appendMessage({
123125
role: "user",
124126
content: "still there?",
125127
timestamp: Date.now(),
126128
} as AgentMessage);
127-
expect(getMessages(sm).map((m) => m.role)).toEqual(["assistant", "user"]);
129+
expect(getMessages(sm).map((m) => m.role)).toEqual(["assistant", "toolResult", "user"]);
128130
});
129131

130132
it("clears timeout handle when waitForIdle resolves first", async () => {
@@ -142,7 +144,7 @@ describe("flushPendingToolResultsAfterIdle", () => {
142144
expect(vi.getTimerCount()).toBe(0);
143145
});
144146

145-
it("immediately clears pending tool results without waiting when timeoutMs is 0 or less", async () => {
147+
it("immediately flushes pending tool results without waiting when timeoutMs is 0 or less", async () => {
146148
const sm = guardSessionManager(SessionManager.inMemory());
147149
const appendMessage = sm.appendMessage.bind(sm) as unknown as (message: AgentMessage) => void;
148150

@@ -158,26 +160,29 @@ describe("flushPendingToolResultsAfterIdle", () => {
158160
agent,
159161
sessionManager: sm,
160162
timeoutMs: 0,
161-
clearPendingOnTimeout: true,
162163
});
163164

164165
// Verify waitForIdle was completely bypassed
165166
expect(waitForIdleSpy).not.toHaveBeenCalled();
166167

167-
// The pending tool result should be cleared immediately.
168-
expect(getMessages(sm).map((m) => m.role)).toEqual(["assistant"]);
168+
// The pending tool result should be flushed immediately.
169+
expect(getMessages(sm).map((m) => m.role)).toEqual(["assistant", "toolResult"]);
169170

170171
// Test negative timeout as well
171172
appendMessage(assistantToolCall("call_orphan_negative"));
172173
await flushPendingToolResultsAfterIdle({
173174
agent,
174175
sessionManager: sm,
175176
timeoutMs: -100,
176-
clearPendingOnTimeout: true,
177177
});
178178

179179
// Verify waitForIdle was still bypassed
180180
expect(waitForIdleSpy).not.toHaveBeenCalled();
181-
expect(getMessages(sm).map((m) => m.role)).toEqual(["assistant", "assistant"]);
181+
expect(getMessages(sm).map((m) => m.role)).toEqual([
182+
"assistant",
183+
"toolResult",
184+
"assistant",
185+
"toolResult",
186+
]);
182187
});
183188
});

src/agents/pi-embedded-runner/compact.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1371,7 +1371,6 @@ async function compactEmbeddedPiSessionDirectOnce(
13711371
await flushPendingToolResultsAfterIdle({
13721372
agent: session?.agent,
13731373
sessionManager,
1374-
clearPendingOnTimeout: true,
13751374
});
13761375
} catch {
13771376
/* best-effort */

src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,33 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
188188
vi.restoreAllMocks();
189189
});
190190

191+
it("enables Tool Search controls for embedded PI runs when configured", async () => {
192+
await createContextEngineAttemptRunner({
193+
contextEngine: {
194+
assemble: async ({ messages }) => ({ messages, estimatedTokens: 1 }),
195+
},
196+
sessionKey,
197+
tempPaths,
198+
attemptOverrides: {
199+
disableTools: false,
200+
config: {
201+
tools: {
202+
toolSearch: true,
203+
},
204+
} as OpenClawConfig,
205+
},
206+
});
207+
208+
expect(hoisted.createOpenClawCodingToolsMock).toHaveBeenCalled();
209+
const options = mockParams(
210+
hoisted.createOpenClawCodingToolsMock,
211+
0,
212+
"createOpenClawCodingTools options",
213+
);
214+
expect(options.includeToolSearchControls).toBe(true);
215+
expect(options.toolSearchCatalogRef).toBeTruthy();
216+
});
217+
191218
it("sends transcriptPrompt visibly and queues runtime context as hidden custom context", async () => {
192219
const seen: { prompt?: string; messages?: unknown[]; systemPrompt?: string } = {};
193220

src/agents/pi-embedded-runner/run/attempt.subscription-cleanup.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ export async function cleanupEmbeddedAttemptResources(params: {
5858
agent: IdleAwareAgent | null | undefined;
5959
sessionManager: ToolResultFlushManager | null | undefined;
6060
timeoutMs?: number;
61-
clearPendingOnTimeout?: boolean;
6261
}) => Promise<void>;
6362
session?: { agent?: unknown; dispose(): void };
6463
sessionManager: unknown;
@@ -83,11 +82,13 @@ export async function cleanupEmbeddedAttemptResources(params: {
8382
sessionId: params.sessionId ?? "unknown",
8483
});
8584
}
85+
// PERF: When the run was aborted (user stop / timeout), skip the expensive
86+
// waitForIdle (up to 30 s) and flush pending tool results synchronously so
87+
// the session write-lock is released without leaving orphaned tool calls.
8688
try {
8789
await params.flushPendingToolResultsAfterIdle({
8890
agent: params.session?.agent as IdleAwareAgent | null | undefined,
8991
sessionManager: params.sessionManager as ToolResultFlushManager | null | undefined,
90-
clearPendingOnTimeout: true,
9192
...(params.aborted ? { timeoutMs: 0 } : {}),
9293
});
9394
} catch {

0 commit comments

Comments
 (0)