fix: harden codex verbose tool progress (#70966) (thanks @jalehman)

steipete · steipete · commit 50e36983bb2d · 2026-04-24T08:10:04.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,7 +30,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes
 
 - Voice-call/Telnyx: preserve inbound/outbound callback metadata and read transcription text from Telnyx's current `transcription_data` payload.
-- Codex harness: send verbose tool progress to chat channels for native app-server runs, matching the Pi harness `/verbose on` and `/verbose full` behavior.
+- Codex harness: send verbose tool progress to chat channels for native app-server runs, matching the Pi harness `/verbose on` and `/verbose full` behavior. (#70966) Thanks @jalehman.
 - Codex harness: route native `request_user_input` prompts back to the originating chat, preserve queued follow-up answers, and honor newer app-server command approval amendment decisions.
 - Codex status: report Codex CLI OAuth as `oauth (codex-cli)` for native `codex/*` sessions instead of showing unknown auth. Fixes #70688. Thanks @jb510.
 - Codex harness/context-engine: redact context-engine assembly failures before logging, so fallback warnings do not serialize raw error objects. (#70809) Thanks @jalehman.
diff --git a/extensions/codex/src/app-server/event-projector.test.ts b/extensions/codex/src/app-server/event-projector.test.ts
@@ -529,10 +529,41 @@ describe("CodexAppServerEventProjector", () => {
 
     expect(onToolResult).toHaveBeenCalledTimes(1);
     expect(onToolResult).toHaveBeenCalledWith({
-      text: "🛠️ Bash: `pnpm test extensions/codex`",
+      text: "🛠️ Bash: `` run tests (in /workspace), `pnpm test extensions/codex` ``",
     });
   });
 
+  it("redacts secrets in verbose command summaries", async () => {
+    const onToolResult = vi.fn();
+    const projector = await createProjector({
+      ...(await createParams()),
+      verboseLevel: "on",
+      onToolResult,
+    });
+
+    await projector.handleNotification(
+      forCurrentTurn("item/started", {
+        item: {
+          type: "commandExecution",
+          id: "cmd-1",
+          command: "OPENAI_API_KEY=sk-1234567890abcdefZZZZ pnpm test",
+          cwd: "/workspace",
+          processId: null,
+          source: "agent",
+          status: "inProgress",
+          commandActions: [],
+          aggregatedOutput: null,
+          exitCode: null,
+          durationMs: null,
+        },
+      }),
+    );
+
+    const text = onToolResult.mock.calls[0]?.[0]?.text;
+    expect(text).toContain("sk-123…ZZZZ");
+    expect(text).not.toContain("sk-1234567890abcdefZZZZ");
+  });
+
   it("uses argument details instead of lifecycle status in verbose tool summaries", async () => {
     const onToolResult = vi.fn();
     const projector = await createProjector({
@@ -596,6 +627,76 @@ describe("CodexAppServerEventProjector", () => {
     });
   });
 
+  it("uses a safe markdown fence for verbose tool output", async () => {
+    const onToolResult = vi.fn();
+    const projector = await createProjector({
+      ...(await createParams()),
+      verboseLevel: "full",
+      onToolResult,
+    });
+
+    await projector.handleNotification(
+      turnCompleted([
+        {
+          type: "dynamicToolCall",
+          id: "tool-1",
+          namespace: null,
+          tool: "read",
+          arguments: { path: "README.md" },
+          status: "completed",
+          contentItems: [{ type: "inputText", text: "line\n```\nMEDIA:/tmp/secret.png" }],
+          success: true,
+          durationMs: 12,
+        },
+      ]),
+    );
+
+    expect(onToolResult).toHaveBeenNthCalledWith(2, {
+      text: "📖 Read: `from README.md`\n````txt\nline\n```\nMEDIA:/tmp/secret.png\n````",
+    });
+  });
+
+  it("bounds streamed verbose tool output", async () => {
+    const onToolResult = vi.fn();
+    const projector = await createProjector({
+      ...(await createParams()),
+      verboseLevel: "full",
+      onToolResult,
+    });
+
+    for (let i = 0; i < 25; i += 1) {
+      await projector.handleNotification(
+        forCurrentTurn("item/commandExecution/outputDelta", {
+          itemId: "cmd-1",
+          delta: `line ${i}\n`,
+        }),
+      );
+    }
+    await projector.handleNotification(
+      turnCompleted([
+        {
+          type: "commandExecution",
+          id: "cmd-1",
+          command: "pnpm test",
+          cwd: "/workspace",
+          processId: null,
+          source: "agent",
+          status: "completed",
+          commandActions: [],
+          aggregatedOutput: "final output should not duplicate streamed output",
+          exitCode: 0,
+          durationMs: 12,
+        },
+      ]),
+    );
+
+    expect(onToolResult).toHaveBeenCalledTimes(21);
+    expect(onToolResult.mock.calls[19]?.[0]?.text).toContain("...(truncated)...");
+    expect(JSON.stringify(onToolResult.mock.calls)).not.toContain(
+      "final output should not duplicate",
+    );
+  });
+
   it("continues projecting turn completion when an event consumer throws", async () => {
     const onAgentEvent = vi.fn(() => {
       throw new Error("consumer failed");
diff --git a/extensions/codex/src/app-server/event-projector.ts b/extensions/codex/src/app-server/event-projector.ts
@@ -3,12 +3,14 @@ import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
 import { SessionManager } from "@mariozechner/pi-coding-agent";
 import {
   formatErrorMessage,
+  formatToolProgressOutput,
   inferToolMetaFromArgs,
   normalizeUsage,
   runAgentHarnessAfterCompactionHook,
   runAgentHarnessBeforeCompactionHook,
   type EmbeddedRunAttemptParams,
   type EmbeddedRunAttemptResult,
+  TOOL_PROGRESS_OUTPUT_MAX_CHARS,
   formatToolAggregate,
   type MessagingToolSend,
 } from "openclaw/plugin-sdk/agent-harness-runtime";
@@ -56,6 +58,8 @@ const CURRENT_TOKEN_USAGE_KEYS = [
   "last_token_usage",
 ] as const;
 
+const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20;
+
 export class CodexAppServerEventProjector {
   private readonly assistantTextByItem = new Map<string, string>();
   private readonly assistantItemOrder: string[] = [];
@@ -66,6 +70,11 @@ export class CodexAppServerEventProjector {
   private readonly activeCompactionItemIds = new Set<string>();
   private readonly toolResultSummaryItemIds = new Set<string>();
   private readonly toolResultOutputItemIds = new Set<string>();
+  private readonly toolResultOutputStreamedItemIds = new Set<string>();
+  private readonly toolResultOutputDeltaState = new Map<
+    string,
+    { chars: number; messages: number; truncated: boolean }
+  >();
   private readonly toolMetas = new Map<string, { toolName: string; meta?: string }>();
   private assistantStarted = false;
   private reasoningStarted = false;
@@ -489,10 +498,44 @@ export class CodexAppServerEventProjector {
     if (!itemId || !delta || !this.shouldEmitToolOutput()) {
       return;
     }
+    const state = this.toolResultOutputDeltaState.get(itemId) ?? {
+      chars: 0,
+      messages: 0,
+      truncated: false,
+    };
+    if (state.truncated) {
+      return;
+    }
+    const remainingChars = Math.max(0, TOOL_PROGRESS_OUTPUT_MAX_CHARS - state.chars);
+    const remainingMessages = Math.max(0, MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM - state.messages);
+    if (remainingChars === 0 || remainingMessages === 0) {
+      state.truncated = true;
+      this.toolResultOutputDeltaState.set(itemId, state);
+      this.emitToolResultMessage({
+        itemId,
+        text: formatToolOutput(toolName, undefined, "(output truncated)"),
+      });
+      return;
+    }
+    const chunk = delta.length > remainingChars ? delta.slice(0, remainingChars) : delta;
+    state.chars += chunk.length;
+    state.messages += 1;
+    const reachedLimit =
+      delta.length > remainingChars ||
+      state.chars >= TOOL_PROGRESS_OUTPUT_MAX_CHARS ||
+      state.messages >= MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM;
+    if (reachedLimit) {
+      state.truncated = true;
+    }
+    this.toolResultOutputDeltaState.set(itemId, state);
+    this.toolResultOutputStreamedItemIds.add(itemId);
     this.emitToolResultMessage({
       itemId,
-      text: formatToolOutput(toolName, undefined, delta),
-      output: true,
+      text: formatToolOutput(
+        toolName,
+        undefined,
+        reachedLimit ? `${chunk}\n...(truncated)...` : chunk,
+      ),
     });
   }
 
@@ -588,6 +631,9 @@ export class CodexAppServerEventProjector {
     if (this.toolResultOutputItemIds.has(itemId)) {
       return;
     }
+    if (this.toolResultOutputStreamedItemIds.has(itemId)) {
+      return;
+    }
     const toolName = itemName(item);
     const output = itemOutputText(item);
     if (!toolName || !output) {
@@ -596,12 +642,16 @@ export class CodexAppServerEventProjector {
     this.emitToolResultMessage({
       itemId,
       text: formatToolOutput(toolName, itemMeta(item), output),
-      output: true,
+      finalOutput: true,
     });
   }
 
-  private emitToolResultMessage(params: { itemId: string; text: string; output?: boolean }): void {
-    if (params.output) {
+  private emitToolResultMessage(params: {
+    itemId: string;
+    text: string;
+    finalOutput?: boolean;
+  }): void {
+    if (params.finalOutput) {
       this.toolResultOutputItemIds.add(params.itemId);
     }
     try {
@@ -934,7 +984,10 @@ function itemName(item: CodexThreadItem): string | undefined {
 
 function itemMeta(item: CodexThreadItem): string | undefined {
   if (item.type === "commandExecution" && typeof item.command === "string") {
-    return item.command;
+    return inferToolMetaFromArgs("exec", {
+      command: item.command,
+      cwd: typeof item.cwd === "string" ? item.cwd : undefined,
+    });
   }
   if (item.type === "webSearch" && typeof item.query === "string") {
     return item.query;
@@ -995,11 +1048,30 @@ function formatToolSummary(toolName: string, meta?: string): string {
 }
 
 function formatToolOutput(toolName: string, meta: string | undefined, output: string): string {
-  const trimmed = output.trim();
-  if (!trimmed) {
+  const formattedOutput = formatToolProgressOutput(output);
+  if (!formattedOutput) {
     return formatToolSummary(toolName, meta);
   }
-  return `${formatToolSummary(toolName, meta)}\n\`\`\`txt\n${trimmed}\n\`\`\``;
+  const fence = markdownFenceForText(formattedOutput);
+  return `${formatToolSummary(toolName, meta)}\n${fence}txt\n${formattedOutput}\n${fence}`;
+}
+
+function markdownFenceForText(text: string): string {
+  return "`".repeat(Math.max(3, longestBacktickRun(text) + 1));
+}
+
+function longestBacktickRun(value: string): number {
+  let longest = 0;
+  let current = 0;
+  for (const char of value) {
+    if (char === "`") {
+      current += 1;
+      longest = Math.max(longest, current);
+      continue;
+    }
+    current = 0;
+  }
+  return longest;
 }
 
 function readItemString(item: CodexThreadItem, key: string): string | undefined {
diff --git a/src/auto-reply/tool-meta.test.ts b/src/auto-reply/tool-meta.test.ts
@@ -45,6 +45,11 @@ describe("tool meta formatting", () => {
     expect(out).toContain("`~/dir/a.txt`");
   });
 
+  it("uses a longer inline code delimiter when meta contains backticks", () => {
+    const out = formatToolAggregate("fs", ["name `with` ticks"], { markdown: true });
+    expect(out).toBe("🧩 Fs: ``name `with` ticks``");
+  });
+
   it("keeps exec flags outside markdown and moves them to the front", () => {
     vi.stubEnv("HOME", home);
     const out = formatToolAggregate("exec", [`cd ${home}/dir && gemini 2>&1 · elevated`], {
diff --git a/src/auto-reply/tool-meta.ts b/src/auto-reply/tool-meta.ts
@@ -137,8 +137,21 @@ function maybeWrapMarkdown(value: string, markdown?: boolean): string {
   if (!markdown) {
     return value;
   }
-  if (value.includes("`")) {
-    return value;
+  const delimiter = "`".repeat(longestBacktickRun(value) + 1);
+  const padding = value.startsWith("`") || value.endsWith("`") || value.includes("\n") ? " " : "";
+  return `${delimiter}${padding}${value}${padding}${delimiter}`;
+}
+
+function longestBacktickRun(value: string): number {
+  let longest = 0;
+  let current = 0;
+  for (const char of value) {
+    if (char === "`") {
+      current += 1;
+      longest = Math.max(longest, current);
+      continue;
+    }
+    current = 0;
   }
-  return `\`${value}\``;
+  return longest;
 }
diff --git a/src/plugin-sdk/agent-harness-runtime.ts b/src/plugin-sdk/agent-harness-runtime.ts
@@ -3,6 +3,10 @@
 // register quickly inside gateway startup and Docker e2e runs.
 
 import { formatToolDetail, resolveToolDisplay } from "../agents/tool-display.js";
+import { redactToolDetail } from "../logging/redact.js";
+import { truncateUtf16Safe } from "../utils.js";
+
+export const TOOL_PROGRESS_OUTPUT_MAX_CHARS = 8_000;
 
 export type {
   AgentHarness,
@@ -96,3 +100,22 @@ export function inferToolMetaFromArgs(toolName: string, args: unknown): string |
   const display = resolveToolDisplay({ name: toolName, args });
   return formatToolDetail(display);
 }
+
+/**
+ * Prepare verbose tool output for user-facing progress messages.
+ */
+export function formatToolProgressOutput(
+  output: string,
+  options?: { maxChars?: number },
+): string | undefined {
+  const trimmed = output.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
+  if (!trimmed) {
+    return undefined;
+  }
+  const redacted = redactToolDetail(trimmed);
+  const maxChars = options?.maxChars ?? TOOL_PROGRESS_OUTPUT_MAX_CHARS;
+  if (redacted.length <= maxChars) {
+    return redacted;
+  }
+  return `${truncateUtf16Safe(redacted, maxChars)}\n...(truncated)...`;
+}