fix(logging): redact persisted transcript text

vincentkoc · vincentkoc · commit 406ae72fd278 · 2026-04-26T12:12:44.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
 
 ### Fixes
 
+- Logging/sessions: apply configured redaction patterns to persisted session transcript text and accept escaped character classes in safe custom redaction regexes, so transcript JSONL no longer keeps matching sensitive text in the clear. Fixes #42982. Thanks @panpan0000.
 - Auto-reply: poison inbound message dedupe after replay-unsafe provider/runtime failures so retries stay safe before visible progress but cannot duplicate messages after block output, tool side effects, or session progress. Fixes #69303; keeps #58549 and #64606 as duplicate validation. Thanks @martingarramon, @NikolaFC, and @zeroth-blip.
 - Agents/model fallback: jump directly to a known later live-session model redirect instead of walking unrelated fallback candidates, while preserving the already-landed live-session/fallback loop guard. Fixes #57471; related loop family already closed via #58496. Thanks @yuxiaoyang2007-prog.
 - Gateway/Bonjour: keep @homebridge/ciao cancellation handlers registered across advertiser restarts so late probing cancellations cannot crash Linux and other mDNS-churned gateways. Thanks @codex.
diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256
@@ -1,4 +1,4 @@
-7fa6e35bb9f9d3096d6281f141488be0dcfe15de40dc4f5c0305eb1ff2bc60b6  config-baseline.json
-5f5fb87fd46f9cbb84d8af17e00ae3c4b74062e8ad517bc2260ba83da2e9014f  config-baseline.core.json
+4d1995e41b659e484afb5a48d6fca0558337123200a4a537f556ca38e8e829e7  config-baseline.json
+3245c9a013c55ee8a24db52d5e88c42bc86e26f822d4a144fc7f37fc71e05fa8  config-baseline.core.json
 7cd9c908f066c143eab2a201efbc9640f483ab28bba92ddeca1d18cc2b528bc3  config-baseline.channel.json
 f9e0174988718959fe1923a54496ec5b9262721fe1e7306f32ccb1316d9d9c3f  config-baseline.plugin.json
diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md
@@ -859,6 +859,7 @@ Notes:
 - Set `logging.file` for a stable path.
 - `consoleLevel` bumps to `debug` when `--verbose`.
 - `maxFileBytes`: maximum active log file size in bytes before rotation (positive integer; default: `104857600` = 100 MB). OpenClaw keeps up to five numbered archives beside the active file.
+- `redactSensitive` / `redactPatterns`: best-effort masking for console output, file logs, OTLP log records, and persisted session transcript text.
 
 ---
 
diff --git a/docs/gateway/logging.md b/docs/gateway/logging.md
@@ -54,9 +54,10 @@ You can tune console verbosity independently via:
 
 ## Redaction
 
-OpenClaw can mask sensitive tokens before log output leaves the process. The
-same redaction policy is applied at console and file-log sinks, so matching
-secret values are masked before JSONL lines are written to disk.
+OpenClaw can mask sensitive tokens before log or transcript output leaves the
+process. The same redaction policy is applied at console, file-log, OTLP
+log-record, and session transcript text sinks, so matching secret values are
+masked before JSONL lines or messages are written to disk.
 
 - `logging.redactSensitive`: `off` | `tools` (default: `tools`)
 - `logging.redactPatterns`: array of regex strings (overrides defaults)
diff --git a/docs/gateway/security/index.md b/docs/gateway/security/index.md
@@ -999,7 +999,7 @@ Logs and transcripts can leak sensitive info even when access controls are corre
 
 Recommendations:
 
-- Keep tool summary redaction on (`logging.redactSensitive: "tools"`; default).
+- Keep log and transcript redaction on (`logging.redactSensitive: "tools"`; default).
 - Add custom patterns for your environment via `logging.redactPatterns` (tokens, hostnames, internal URLs).
 - When sharing diagnostics, prefer `openclaw status --all` (pasteable, secrets redacted) over raw logs.
 - Prune old session transcripts and log files if you don’t need long retention.
diff --git a/docs/logging.md b/docs/logging.md
@@ -167,14 +167,16 @@ file log levels.
 
 ### Redaction
 
-Tool summaries can redact sensitive tokens before they hit the console:
+OpenClaw can redact sensitive tokens before they hit console output, file logs,
+OTLP log records, or persisted session transcript text:
 
 - `logging.redactSensitive`: `off` | `tools` (default: `tools`)
 - `logging.redactPatterns`: list of regex strings to override the default set
 
-Redaction applies at the logging sinks for **console output**, **stderr-routed
-console diagnostics**, and **file logs**. File logs stay JSONL, but matching
-secret values are masked before the line is written to disk.
+File logs and session transcripts stay JSONL, but matching secret values are
+masked before the line or message is written to disk. Redaction is best-effort:
+it applies to text-bearing message content and log strings, not every
+identifier or binary payload field.
 
 ## Diagnostics and OpenTelemetry
 
diff --git a/src/agents/pi-embedded-runner.guard.test.ts b/src/agents/pi-embedded-runner.guard.test.ts
@@ -1,6 +1,7 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import { SessionManager } from "@mariozechner/pi-coding-agent";
 import { describe, expect, it } from "vitest";
+import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { guardSessionManager } from "./session-tool-result-guard-wrapper.js";
 import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js";
 
@@ -35,4 +36,46 @@ describe("guardSessionManager integration", () => {
       "assistant",
     ]);
   });
+
+  it("redacts configured text patterns before persisting transcript messages", () => {
+    const cfg = {
+      logging: {
+        redactSensitive: "tools",
+        redactPatterns: [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`],
+      },
+    } satisfies OpenClawConfig;
+    const sm = guardSessionManager(SessionManager.inMemory(), { config: cfg });
+    const appendMessage = sm.appendMessage.bind(sm) as unknown as (message: AgentMessage) => void;
+
+    appendMessage({
+      role: "assistant",
+      content: [
+        { type: "thinking", thinking: "the email is peter@dc.io", thinkingSignature: "sig" },
+        { type: "text", text: "contact peter@dc.io" },
+        { type: "toolCall", id: "call_1", name: "read", arguments: { path: "/tmp/peter@dc.io" } },
+      ],
+      stopReason: "toolUse",
+    } as AgentMessage);
+    appendMessage({
+      role: "toolResult",
+      toolCallId: "call_1",
+      toolName: "read",
+      content: [{ type: "text", text: "peter@dc.io\n" }],
+      isError: false,
+    } as AgentMessage);
+
+    const messages = sm
+      .getEntries()
+      .filter((e) => e.type === "message")
+      .map((e) => (e as { message: AgentMessage }).message);
+    const serialized = JSON.stringify(messages);
+
+    expect(serialized).not.toContain("the email is peter@dc.io");
+    expect(serialized).not.toContain("contact peter@dc.io");
+    expect(serialized).not.toContain("peter@dc.io\\n");
+    expect(serialized).toContain('"thinking":"the email is peter@d***.io"');
+    expect(serialized).toContain('"text":"contact peter@d***.io"');
+    expect(serialized).toContain('"text":"peter@d***.io\\n"');
+    expect(serialized).toContain('"/tmp/peter@dc.io"');
+  });
 });
diff --git a/src/agents/session-tool-result-guard-wrapper.ts b/src/agents/session-tool-result-guard-wrapper.ts
@@ -1,6 +1,7 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import type { SessionManager } from "@mariozechner/pi-coding-agent";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
+import { redactSensitiveText } from "../logging/redact.js";
 import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
 import {
   applyInputProvenanceToUserMessage,
@@ -16,6 +17,71 @@ export type GuardedSessionManager = SessionManager & {
   clearPendingToolResults?: () => void;
 };
 
+function redactTranscriptText(value: string, cfg?: OpenClawConfig): string {
+  if (cfg?.logging?.redactSensitive === "off") {
+    return value;
+  }
+  return redactSensitiveText(value, {
+    mode: cfg?.logging?.redactSensitive,
+    patterns: cfg?.logging?.redactPatterns,
+  });
+}
+
+function redactTranscriptContentBlock(block: unknown, cfg?: OpenClawConfig): unknown {
+  if (!block || typeof block !== "object" || Array.isArray(block)) {
+    return block;
+  }
+  const source = block as Record<string, unknown>;
+  let next: Record<string, unknown> | null = null;
+  const assign = (key: string, value: string) => {
+    const redacted = redactTranscriptText(value, cfg);
+    if (redacted === value) {
+      return;
+    }
+    next ??= { ...source };
+    next[key] = redacted;
+  };
+
+  if (typeof source.text === "string") {
+    assign("text", source.text);
+  }
+  if (typeof source.thinking === "string") {
+    assign("thinking", source.thinking);
+  }
+  if (typeof source.partialJson === "string") {
+    assign("partialJson", source.partialJson);
+  }
+  return next ?? block;
+}
+
+function redactTranscriptContent(content: unknown, cfg?: OpenClawConfig): unknown {
+  if (typeof content === "string") {
+    return redactTranscriptText(content, cfg);
+  }
+  if (!Array.isArray(content)) {
+    return content;
+  }
+  let changed = false;
+  const redacted = content.map((block) => {
+    const next = redactTranscriptContentBlock(block, cfg);
+    changed ||= next !== block;
+    return next;
+  });
+  return changed ? redacted : content;
+}
+
+function redactTranscriptMessage(message: AgentMessage, cfg?: OpenClawConfig): AgentMessage {
+  const source = message as unknown as Record<string, unknown>;
+  const redactedContent = redactTranscriptContent(source.content, cfg);
+  if (redactedContent === source.content) {
+    return message;
+  }
+  return {
+    ...source,
+    content: redactedContent,
+  } as unknown as AgentMessage;
+}
+
 /**
  * Apply the tool-result guard to a SessionManager exactly once and expose
  * a flush method on the instance for easy teardown handling.
@@ -38,14 +104,31 @@ export function guardSessionManager(
   }
 
   const hookRunner = getGlobalHookRunner();
-  const beforeMessageWrite = hookRunner?.hasHooks("before_message_write")
-    ? (event: { message: import("@mariozechner/pi-agent-core").AgentMessage }) => {
-        return hookRunner.runBeforeMessageWrite(event, {
-          agentId: opts?.agentId,
-          sessionKey: opts?.sessionKey,
-        });
+  const beforeMessageWrite = (event: {
+    message: import("@mariozechner/pi-agent-core").AgentMessage;
+  }) => {
+    let message = event.message;
+    let changed = false;
+    if (hookRunner?.hasHooks("before_message_write")) {
+      const result = hookRunner.runBeforeMessageWrite(event, {
+        agentId: opts?.agentId,
+        sessionKey: opts?.sessionKey,
+      });
+      if (result?.block) {
+        return result;
       }
-    : undefined;
+      if (result?.message) {
+        message = result.message;
+        changed = true;
+      }
+    }
+    const redacted = redactTranscriptMessage(message, opts?.config);
+    if (redacted !== message) {
+      message = redacted;
+      changed = true;
+    }
+    return changed ? { message } : undefined;
+  };
 
   const transform = hookRunner?.hasHooks("tool_result_persist")
     ? (
diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts
@@ -466,7 +466,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
             ],
             title: "Sensitive Data Redaction Mode",
             description:
-              'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.',
+              'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.',
           },
           redactPatterns: {
             type: "array",
@@ -475,7 +475,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
             },
             title: "Custom Redaction Patterns",
             description:
-              "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
+              "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
           },
         },
         additionalProperties: false,
@@ -23982,12 +23982,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
     },
     "logging.redactSensitive": {
       label: "Sensitive Data Redaction Mode",
-      help: 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.',
+      help: 'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.',
       tags: ["privacy", "observability"],
     },
     "logging.redactPatterns": {
       label: "Custom Redaction Patterns",
-      help: "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
+      help: "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
       tags: ["privacy", "observability"],
     },
     "cli.banner": {
diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts
@@ -43,9 +43,9 @@ export const FIELD_HELP: Record<string, string> = {
   "logging.consoleStyle":
     'Console output format style: "pretty", "compact", or "json" based on operator and ingestion needs. Use json for machine parsing pipelines and pretty/compact for human-first terminal workflows.',
   "logging.redactSensitive":
-    'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields. Keep "tools" in shared logs unless you have isolated secure log sinks.',
+    'Sensitive redaction mode: "off" disables built-in masking, while "tools" redacts sensitive tool/config payload fields in log sinks and persisted transcript text. Keep "tools" enabled unless logs and transcripts are isolated.',
   "logging.redactPatterns":
-    "Additional custom redact regex patterns applied to log output before emission/storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
+    "Additional custom redact regex patterns applied to log output and persisted transcript text before storage. Use this to mask org-specific tokens and identifiers not covered by built-in redaction rules.",
   cli: "CLI presentation controls for local command output behavior such as banner and tagline style. Use this section to keep startup output aligned with operator preference without changing runtime behavior.",
   "cli.banner":
     "CLI startup banner controls for title/version line and tagline style behavior. Keep banner enabled for fast version/context checks, then tune tagline mode to your preferred noise level.",
diff --git a/src/config/types.base.ts b/src/config/types.base.ts
@@ -225,9 +225,9 @@ export type LoggingConfig = {
   maxFileBytes?: number;
   consoleLevel?: "silent" | "fatal" | "error" | "warn" | "info" | "debug" | "trace";
   consoleStyle?: "pretty" | "compact" | "json";
-  /** Redact sensitive tokens in tool summaries. Default: "tools". */
+  /** Redact sensitive tokens in log sinks and persisted transcript text. Default: "tools". */
   redactSensitive?: "off" | "tools";
-  /** Regex patterns used to redact sensitive tokens (defaults apply when unset). */
+  /** Regex patterns used to redact sensitive tokens from logs and transcripts. */
   redactPatterns?: string[];
 };
 
diff --git a/src/logging/redact.test.ts b/src/logging/redact.test.ts
@@ -132,6 +132,16 @@ describe("redactSensitiveText", () => {
     expect(output).toBe("token=abcdef…ghij");
   });
 
+  it("honors escaped character classes in custom patterns", () => {
+    const input = "contact peter@dc.io";
+    const output = redactSensitiveText(input, {
+      mode: "tools",
+      patterns: [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`],
+    });
+    expect(output).toBe("contact peter@d***.io");
+    expect(output).not.toContain("peter@dc.io");
+  });
+
   it("ignores unsafe nested-repetition custom patterns", () => {
     const input = `${"a".repeat(28)}!`;
     const output = redactSensitiveText(input, {
diff --git a/src/security/safe-regex.test.ts b/src/security/safe-regex.test.ts
@@ -12,6 +12,7 @@ describe("safe regex", () => {
     ["(a|aa)+$", true],
     ["^(?:foo|bar)$", false],
     ["^(ab|cd)+$", false],
+    [String.raw`([\w]|[-.])+@([\w]|[-.])+\.\w+`, false],
   ] as const)("classifies nested repetition for %s", (pattern, expected) => {
     expect(hasNestedRepetition(pattern)).toBe(expected);
   });
diff --git a/src/security/safe-regex.ts b/src/security/safe-regex.ts
@@ -140,19 +140,23 @@ function tokenizePattern(source: string): PatternToken[] {
   for (let i = 0; i < source.length; i += 1) {
     const ch = source[i];
 
-    if (ch === "\\") {
-      i += 1;
-      tokens.push({ kind: "simple-token" });
-      continue;
-    }
-
     if (inCharClass) {
+      if (ch === "\\") {
+        i += 1;
+        continue;
+      }
       if (ch === "]") {
         inCharClass = false;
       }
       continue;
     }
 
+    if (ch === "\\") {
+      i += 1;
+      tokens.push({ kind: "simple-token" });
+      continue;
+    }
+
     if (ch === "[") {
       inCharClass = true;
       tokens.push({ kind: "simple-token" });