Skip to content

Commit 50e3698

Browse files
committed
fix: harden codex verbose tool progress (#70966) (thanks @jalehman)
1 parent f353a61 commit 50e3698

6 files changed

Lines changed: 228 additions & 14 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Docs: https://docs.openclaw.ai
3030
### Fixes
3131

3232
- Voice-call/Telnyx: preserve inbound/outbound callback metadata and read transcription text from Telnyx's current `transcription_data` payload.
33-
- Codex harness: send verbose tool progress to chat channels for native app-server runs, matching the Pi harness `/verbose on` and `/verbose full` behavior.
33+
- Codex harness: send verbose tool progress to chat channels for native app-server runs, matching the Pi harness `/verbose on` and `/verbose full` behavior. (#70966) Thanks @jalehman.
3434
- Codex harness: route native `request_user_input` prompts back to the originating chat, preserve queued follow-up answers, and honor newer app-server command approval amendment decisions.
3535
- Codex status: report Codex CLI OAuth as `oauth (codex-cli)` for native `codex/*` sessions instead of showing unknown auth. Fixes #70688. Thanks @jb510.
3636
- Codex harness/context-engine: redact context-engine assembly failures before logging, so fallback warnings do not serialize raw error objects. (#70809) Thanks @jalehman.

extensions/codex/src/app-server/event-projector.test.ts

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,10 +529,41 @@ describe("CodexAppServerEventProjector", () => {
529529

530530
expect(onToolResult).toHaveBeenCalledTimes(1);
531531
expect(onToolResult).toHaveBeenCalledWith({
532-
text: "🛠️ Bash: `pnpm test extensions/codex`",
532+
text: "🛠️ Bash: `` run tests (in /workspace), `pnpm test extensions/codex` ``",
533533
});
534534
});
535535

536+
it("redacts secrets in verbose command summaries", async () => {
537+
const onToolResult = vi.fn();
538+
const projector = await createProjector({
539+
...(await createParams()),
540+
verboseLevel: "on",
541+
onToolResult,
542+
});
543+
544+
await projector.handleNotification(
545+
forCurrentTurn("item/started", {
546+
item: {
547+
type: "commandExecution",
548+
id: "cmd-1",
549+
command: "OPENAI_API_KEY=sk-1234567890abcdefZZZZ pnpm test",
550+
cwd: "/workspace",
551+
processId: null,
552+
source: "agent",
553+
status: "inProgress",
554+
commandActions: [],
555+
aggregatedOutput: null,
556+
exitCode: null,
557+
durationMs: null,
558+
},
559+
}),
560+
);
561+
562+
const text = onToolResult.mock.calls[0]?.[0]?.text;
563+
expect(text).toContain("sk-123…ZZZZ");
564+
expect(text).not.toContain("sk-1234567890abcdefZZZZ");
565+
});
566+
536567
it("uses argument details instead of lifecycle status in verbose tool summaries", async () => {
537568
const onToolResult = vi.fn();
538569
const projector = await createProjector({
@@ -596,6 +627,76 @@ describe("CodexAppServerEventProjector", () => {
596627
});
597628
});
598629

630+
it("uses a safe markdown fence for verbose tool output", async () => {
631+
const onToolResult = vi.fn();
632+
const projector = await createProjector({
633+
...(await createParams()),
634+
verboseLevel: "full",
635+
onToolResult,
636+
});
637+
638+
await projector.handleNotification(
639+
turnCompleted([
640+
{
641+
type: "dynamicToolCall",
642+
id: "tool-1",
643+
namespace: null,
644+
tool: "read",
645+
arguments: { path: "README.md" },
646+
status: "completed",
647+
contentItems: [{ type: "inputText", text: "line\n```\nMEDIA:/tmp/secret.png" }],
648+
success: true,
649+
durationMs: 12,
650+
},
651+
]),
652+
);
653+
654+
expect(onToolResult).toHaveBeenNthCalledWith(2, {
655+
text: "📖 Read: `from README.md`\n````txt\nline\n```\nMEDIA:/tmp/secret.png\n````",
656+
});
657+
});
658+
659+
it("bounds streamed verbose tool output", async () => {
660+
const onToolResult = vi.fn();
661+
const projector = await createProjector({
662+
...(await createParams()),
663+
verboseLevel: "full",
664+
onToolResult,
665+
});
666+
667+
for (let i = 0; i < 25; i += 1) {
668+
await projector.handleNotification(
669+
forCurrentTurn("item/commandExecution/outputDelta", {
670+
itemId: "cmd-1",
671+
delta: `line ${i}\n`,
672+
}),
673+
);
674+
}
675+
await projector.handleNotification(
676+
turnCompleted([
677+
{
678+
type: "commandExecution",
679+
id: "cmd-1",
680+
command: "pnpm test",
681+
cwd: "/workspace",
682+
processId: null,
683+
source: "agent",
684+
status: "completed",
685+
commandActions: [],
686+
aggregatedOutput: "final output should not duplicate streamed output",
687+
exitCode: 0,
688+
durationMs: 12,
689+
},
690+
]),
691+
);
692+
693+
expect(onToolResult).toHaveBeenCalledTimes(21);
694+
expect(onToolResult.mock.calls[19]?.[0]?.text).toContain("...(truncated)...");
695+
expect(JSON.stringify(onToolResult.mock.calls)).not.toContain(
696+
"final output should not duplicate",
697+
);
698+
});
699+
599700
it("continues projecting turn completion when an event consumer throws", async () => {
600701
const onAgentEvent = vi.fn(() => {
601702
throw new Error("consumer failed");

extensions/codex/src/app-server/event-projector.ts

Lines changed: 81 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
33
import { SessionManager } from "@mariozechner/pi-coding-agent";
44
import {
55
formatErrorMessage,
6+
formatToolProgressOutput,
67
inferToolMetaFromArgs,
78
normalizeUsage,
89
runAgentHarnessAfterCompactionHook,
910
runAgentHarnessBeforeCompactionHook,
1011
type EmbeddedRunAttemptParams,
1112
type EmbeddedRunAttemptResult,
13+
TOOL_PROGRESS_OUTPUT_MAX_CHARS,
1214
formatToolAggregate,
1315
type MessagingToolSend,
1416
} from "openclaw/plugin-sdk/agent-harness-runtime";
@@ -56,6 +58,8 @@ const CURRENT_TOKEN_USAGE_KEYS = [
5658
"last_token_usage",
5759
] as const;
5860

61+
const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20;
62+
5963
export class CodexAppServerEventProjector {
6064
private readonly assistantTextByItem = new Map<string, string>();
6165
private readonly assistantItemOrder: string[] = [];
@@ -66,6 +70,11 @@ export class CodexAppServerEventProjector {
6670
private readonly activeCompactionItemIds = new Set<string>();
6771
private readonly toolResultSummaryItemIds = new Set<string>();
6872
private readonly toolResultOutputItemIds = new Set<string>();
73+
private readonly toolResultOutputStreamedItemIds = new Set<string>();
74+
private readonly toolResultOutputDeltaState = new Map<
75+
string,
76+
{ chars: number; messages: number; truncated: boolean }
77+
>();
6978
private readonly toolMetas = new Map<string, { toolName: string; meta?: string }>();
7079
private assistantStarted = false;
7180
private reasoningStarted = false;
@@ -489,10 +498,44 @@ export class CodexAppServerEventProjector {
489498
if (!itemId || !delta || !this.shouldEmitToolOutput()) {
490499
return;
491500
}
501+
const state = this.toolResultOutputDeltaState.get(itemId) ?? {
502+
chars: 0,
503+
messages: 0,
504+
truncated: false,
505+
};
506+
if (state.truncated) {
507+
return;
508+
}
509+
const remainingChars = Math.max(0, TOOL_PROGRESS_OUTPUT_MAX_CHARS - state.chars);
510+
const remainingMessages = Math.max(0, MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM - state.messages);
511+
if (remainingChars === 0 || remainingMessages === 0) {
512+
state.truncated = true;
513+
this.toolResultOutputDeltaState.set(itemId, state);
514+
this.emitToolResultMessage({
515+
itemId,
516+
text: formatToolOutput(toolName, undefined, "(output truncated)"),
517+
});
518+
return;
519+
}
520+
const chunk = delta.length > remainingChars ? delta.slice(0, remainingChars) : delta;
521+
state.chars += chunk.length;
522+
state.messages += 1;
523+
const reachedLimit =
524+
delta.length > remainingChars ||
525+
state.chars >= TOOL_PROGRESS_OUTPUT_MAX_CHARS ||
526+
state.messages >= MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM;
527+
if (reachedLimit) {
528+
state.truncated = true;
529+
}
530+
this.toolResultOutputDeltaState.set(itemId, state);
531+
this.toolResultOutputStreamedItemIds.add(itemId);
492532
this.emitToolResultMessage({
493533
itemId,
494-
text: formatToolOutput(toolName, undefined, delta),
495-
output: true,
534+
text: formatToolOutput(
535+
toolName,
536+
undefined,
537+
reachedLimit ? `${chunk}\n...(truncated)...` : chunk,
538+
),
496539
});
497540
}
498541

@@ -588,6 +631,9 @@ export class CodexAppServerEventProjector {
588631
if (this.toolResultOutputItemIds.has(itemId)) {
589632
return;
590633
}
634+
if (this.toolResultOutputStreamedItemIds.has(itemId)) {
635+
return;
636+
}
591637
const toolName = itemName(item);
592638
const output = itemOutputText(item);
593639
if (!toolName || !output) {
@@ -596,12 +642,16 @@ export class CodexAppServerEventProjector {
596642
this.emitToolResultMessage({
597643
itemId,
598644
text: formatToolOutput(toolName, itemMeta(item), output),
599-
output: true,
645+
finalOutput: true,
600646
});
601647
}
602648

603-
private emitToolResultMessage(params: { itemId: string; text: string; output?: boolean }): void {
604-
if (params.output) {
649+
private emitToolResultMessage(params: {
650+
itemId: string;
651+
text: string;
652+
finalOutput?: boolean;
653+
}): void {
654+
if (params.finalOutput) {
605655
this.toolResultOutputItemIds.add(params.itemId);
606656
}
607657
try {
@@ -934,7 +984,10 @@ function itemName(item: CodexThreadItem): string | undefined {
934984

935985
function itemMeta(item: CodexThreadItem): string | undefined {
936986
if (item.type === "commandExecution" && typeof item.command === "string") {
937-
return item.command;
987+
return inferToolMetaFromArgs("exec", {
988+
command: item.command,
989+
cwd: typeof item.cwd === "string" ? item.cwd : undefined,
990+
});
938991
}
939992
if (item.type === "webSearch" && typeof item.query === "string") {
940993
return item.query;
@@ -995,11 +1048,30 @@ function formatToolSummary(toolName: string, meta?: string): string {
9951048
}
9961049

9971050
function formatToolOutput(toolName: string, meta: string | undefined, output: string): string {
998-
const trimmed = output.trim();
999-
if (!trimmed) {
1051+
const formattedOutput = formatToolProgressOutput(output);
1052+
if (!formattedOutput) {
10001053
return formatToolSummary(toolName, meta);
10011054
}
1002-
return `${formatToolSummary(toolName, meta)}\n\`\`\`txt\n${trimmed}\n\`\`\``;
1055+
const fence = markdownFenceForText(formattedOutput);
1056+
return `${formatToolSummary(toolName, meta)}\n${fence}txt\n${formattedOutput}\n${fence}`;
1057+
}
1058+
1059+
function markdownFenceForText(text: string): string {
1060+
return "`".repeat(Math.max(3, longestBacktickRun(text) + 1));
1061+
}
1062+
1063+
function longestBacktickRun(value: string): number {
1064+
let longest = 0;
1065+
let current = 0;
1066+
for (const char of value) {
1067+
if (char === "`") {
1068+
current += 1;
1069+
longest = Math.max(longest, current);
1070+
continue;
1071+
}
1072+
current = 0;
1073+
}
1074+
return longest;
10031075
}
10041076

10051077
function readItemString(item: CodexThreadItem, key: string): string | undefined {

src/auto-reply/tool-meta.test.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ describe("tool meta formatting", () => {
4545
expect(out).toContain("`~/dir/a.txt`");
4646
});
4747

48+
it("uses a longer inline code delimiter when meta contains backticks", () => {
49+
const out = formatToolAggregate("fs", ["name `with` ticks"], { markdown: true });
50+
expect(out).toBe("🧩 Fs: ``name `with` ticks``");
51+
});
52+
4853
it("keeps exec flags outside markdown and moves them to the front", () => {
4954
vi.stubEnv("HOME", home);
5055
const out = formatToolAggregate("exec", [`cd ${home}/dir && gemini 2>&1 · elevated`], {

src/auto-reply/tool-meta.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,21 @@ function maybeWrapMarkdown(value: string, markdown?: boolean): string {
137137
if (!markdown) {
138138
return value;
139139
}
140-
if (value.includes("`")) {
141-
return value;
140+
const delimiter = "`".repeat(longestBacktickRun(value) + 1);
141+
const padding = value.startsWith("`") || value.endsWith("`") || value.includes("\n") ? " " : "";
142+
return `${delimiter}${padding}${value}${padding}${delimiter}`;
143+
}
144+
145+
function longestBacktickRun(value: string): number {
146+
let longest = 0;
147+
let current = 0;
148+
for (const char of value) {
149+
if (char === "`") {
150+
current += 1;
151+
longest = Math.max(longest, current);
152+
continue;
153+
}
154+
current = 0;
142155
}
143-
return `\`${value}\``;
156+
return longest;
144157
}

src/plugin-sdk/agent-harness-runtime.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
// register quickly inside gateway startup and Docker e2e runs.
44

55
import { formatToolDetail, resolveToolDisplay } from "../agents/tool-display.js";
6+
import { redactToolDetail } from "../logging/redact.js";
7+
import { truncateUtf16Safe } from "../utils.js";
8+
9+
export const TOOL_PROGRESS_OUTPUT_MAX_CHARS = 8_000;
610

711
export type {
812
AgentHarness,
@@ -96,3 +100,22 @@ export function inferToolMetaFromArgs(toolName: string, args: unknown): string |
96100
const display = resolveToolDisplay({ name: toolName, args });
97101
return formatToolDetail(display);
98102
}
103+
104+
/**
105+
* Prepare verbose tool output for user-facing progress messages.
106+
*/
107+
export function formatToolProgressOutput(
108+
output: string,
109+
options?: { maxChars?: number },
110+
): string | undefined {
111+
const trimmed = output.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
112+
if (!trimmed) {
113+
return undefined;
114+
}
115+
const redacted = redactToolDetail(trimmed);
116+
const maxChars = options?.maxChars ?? TOOL_PROGRESS_OUTPUT_MAX_CHARS;
117+
if (redacted.length <= maxChars) {
118+
return redacted;
119+
}
120+
return `${truncateUtf16Safe(redacted, maxChars)}\n...(truncated)...`;
121+
}

0 commit comments

Comments
 (0)