fix: preserve omitted thinking replay turns

steipete · steipete · commit a018db771d8b · 2026-04-25T19:54:28.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -80,6 +80,9 @@ Docs: https://docs.openclaw.ai
 - Agents/Bedrock: prevent empty assistant stream-error turns from poisoning
   Converse replay by persisting, repairing, and replaying a non-empty fallback
   block. Fixes #71572. (#71627) Thanks @openperf.
+- Agents/Anthropic/Bedrock: preserve stripped thinking-only assistant replay
+  turns with non-empty omitted-reasoning text so provider adapters keep strict
+  user/assistant turn shape. Thanks @wujiaming88.
 - Browser/CDP: make readiness diagnostics use the same discovery-first fallback as reachability for bare `ws://` Browserless and Browserbase CDP URLs. Fixes #69532.
 - Browser/CDP: explain that loopback Browserless or other externally managed CDP services need `attachOnly: true` and matching Browserless `EXTERNAL` endpoint when reporting local port ownership conflicts, and fall back to the configured bare WebSocket root when a discovered Browserless endpoint rejects CDP. Fixes #49815.
 - Gateway/reload: preserve indefinite `gateway.reload.deferralTimeoutMs: 0` semantics for channel hot reload deferrals so active agent runs are not interrupted by a forced channel restart. (#71637) Thanks @Poo-Squirry.
diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md
@@ -133,13 +133,18 @@ external end-user instructions.
 
 - Tool result pairing repair and synthetic tool results.
 - Turn validation (merge consecutive user turns to satisfy strict alternation).
+- Older thinking-only assistant turns that must be stripped are replaced with
+  non-empty omitted-reasoning text so provider adapters do not drop the replay
+  turn.
 
 **Amazon Bedrock (Converse API)**
 
 - Empty assistant stream-error turns are repaired to a non-empty fallback text block
   before replay. Bedrock Converse rejects assistant messages with `content: []`, so
   persisted assistant turns with `stopReason: "error"` and empty content are also
   repaired on disk before load.
+- Older thinking-only assistant turns that must be stripped are replaced with
+  non-empty omitted-reasoning text so the Converse replay keeps strict turn shape.
 - Replay filters OpenClaw delivery-mirror and gateway-injected assistant turns.
 - Image sanitization applies through the global rule.
 
diff --git a/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts b/src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts
@@ -7,6 +7,7 @@ import {
 } from "./live-cache-test-support.js";
 import { isLiveTestEnabled } from "./live-test-helpers.js";
 import { wrapStreamFnSanitizeMalformedToolCalls } from "./pi-embedded-runner/run/attempt.tool-call-normalization.js";
+import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js";
 import { buildAssistantMessageWithZeroUsage } from "./stream-message-shared.js";
 
 const ANTHROPIC_LIVE = isLiveTestEnabled(["ANTHROPIC_LIVE_TEST"]);
@@ -33,7 +34,7 @@ function buildLiveAnthropicModel(): {
       name: modelId,
       api: "anthropic-messages" as const,
       provider: "anthropic",
-      baseUrl: "https://api.anthropic.com/v1",
+      baseUrl: "https://api.anthropic.com",
       reasoning: true,
       input: ["text"] as const,
       cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@@ -44,6 +45,94 @@ function buildLiveAnthropicModel(): {
 }
 
 describeLive("pi embedded anthropic replay sanitization (live)", () => {
+  it(
+    "accepts regular text-only assistant replay history",
+    async () => {
+      const { apiKey, model } = buildLiveAnthropicModel();
+      const messages: Message[] = [
+        {
+          role: "user",
+          content: "Remember the marker REGULAR_ANTHROPIC_REPLAY_OK.",
+          timestamp: Date.now(),
+        },
+        buildAssistantMessageWithZeroUsage({
+          model: { api: model.api, provider: model.provider, id: model.id },
+          content: [{ type: "text", text: "I remember REGULAR_ANTHROPIC_REPLAY_OK." }],
+          stopReason: "stop",
+        }),
+        {
+          role: "user",
+          content: "Reply with a short confirmation if this replay history is valid.",
+          timestamp: Date.now(),
+        },
+      ];
+
+      logLiveCache(`anthropic regular replay live model=${model.provider}/${model.id}`);
+      const response = await completeSimpleWithLiveTimeout(
+        model,
+        { messages },
+        {
+          apiKey,
+          cacheRetention: "none",
+          sessionId: "anthropic-regular-replay-live",
+          maxTokens: 64,
+          temperature: 0,
+        },
+        "anthropic regular text replay live synthetic transcript",
+        ANTHROPIC_TIMEOUT_MS,
+      );
+
+      const text = extractAssistantText(response);
+      logLiveCache(`anthropic regular replay live result=${JSON.stringify(text)}`);
+      expect(text.trim().length).toBeGreaterThan(0);
+    },
+    6 * 60_000,
+  );
+
+  it(
+    "accepts omitted-reasoning placeholder assistant replay history",
+    async () => {
+      const { apiKey, model } = buildLiveAnthropicModel();
+      const messages: Message[] = [
+        {
+          role: "user",
+          content: "Remember that the previous assistant reasoning was omitted.",
+          timestamp: Date.now(),
+        },
+        buildAssistantMessageWithZeroUsage({
+          model: { api: model.api, provider: model.provider, id: model.id },
+          content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }],
+          stopReason: "stop",
+        }),
+        {
+          role: "user",
+          content: "Reply with exactly OK if this placeholder replay history is valid.",
+          timestamp: Date.now(),
+        },
+      ];
+
+      logLiveCache(`anthropic omitted-reasoning replay live model=${model.provider}/${model.id}`);
+      const response = await completeSimpleWithLiveTimeout(
+        model,
+        { messages },
+        {
+          apiKey,
+          cacheRetention: "none",
+          sessionId: "anthropic-omitted-reasoning-replay-live",
+          maxTokens: 64,
+          temperature: 0,
+        },
+        "anthropic omitted reasoning replay live synthetic transcript",
+        ANTHROPIC_TIMEOUT_MS,
+      );
+
+      const text = extractAssistantText(response);
+      logLiveCache(`anthropic omitted-reasoning replay live result=${JSON.stringify(text)}`);
+      expect(text.trim().length).toBeGreaterThan(0);
+    },
+    6 * 60_000,
+  );
+
   it(
     "preserves toolCall replay history that Anthropic accepts end-to-end",
     async () => {
diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -16,6 +16,7 @@ import {
   TEST_SESSION_ID,
 } from "./pi-embedded-runner.sanitize-session-history.test-harness.js";
 import { validateReplayTurns } from "./pi-embedded-runner/replay-history.js";
+import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js";
 import { castAgentMessage, castAgentMessages } from "./test-helpers/agent-message-fixtures.js";
 import { extractToolCallsFromAssistant } from "./tool-call-id.js";
 import type { TranscriptPolicy } from "./transcript-policy.js";
@@ -1176,6 +1177,92 @@ describe("sanitizeSessionHistory", () => {
     ]);
   });
 
+  it("keeps regular latest Anthropic thinking replay while preserving older stripped turns", async () => {
+    setNonGoogleModelApi();
+
+    const messages = castAgentMessages([
+      makeUserMessage("first"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "old private reasoning",
+          thinkingSignature: "sig_old",
+        },
+      ]),
+      makeUserMessage("second"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "latest private reasoning",
+          thinkingSignature: "sig_latest",
+        },
+        { type: "text", text: "latest visible answer" },
+      ]),
+    ]);
+
+    const result = await sanitizeAnthropicHistory({
+      messages,
+      modelId: "claude-3-7-sonnet-20250219",
+    });
+
+    expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
+    ]);
+    expect((result[3] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      {
+        type: "thinking",
+        thinking: "latest private reasoning",
+        thinkingSignature: "sig_latest",
+      },
+      { type: "text", text: "latest visible answer" },
+    ]);
+  });
+
+  it.each([
+    {
+      provider: "anthropic",
+      modelApi: "anthropic-messages",
+      label: "anthropic",
+    },
+    {
+      provider: "amazon-bedrock",
+      modelApi: "bedrock-converse-stream",
+      label: "bedrock",
+    },
+  ])(
+    "preserves older stripped thinking-only assistant turns for $label replay",
+    async ({ provider, modelApi }) => {
+      setNonGoogleModelApi();
+
+      const messages = castAgentMessages([
+        makeUserMessage("first"),
+        makeAssistantMessage([
+          {
+            type: "thinking",
+            thinking: "old private reasoning",
+            thinkingSignature: "sig_old",
+          },
+        ]),
+        makeUserMessage("second"),
+        makeAssistantMessage([{ type: "text", text: "latest visible answer" }]),
+      ]);
+
+      const result = await sanitizeAnthropicHistory({
+        provider,
+        modelApi,
+        messages,
+        modelId: "claude-3-7-sonnet-20250219",
+      });
+
+      expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+        { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
+      ]);
+      expect((result[3] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+        { type: "text", text: "latest visible answer" },
+      ]);
+    },
+  );
+
   it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => {
     setNonGoogleModelApi();
 
diff --git a/src/agents/pi-embedded-runner/thinking.test.ts b/src/agents/pi-embedded-runner/thinking.test.ts
@@ -3,6 +3,7 @@ import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
 import { describe, expect, it } from "vitest";
 import { castAgentMessage, castAgentMessages } from "../test-helpers/agent-message-fixtures.js";
 import {
+  OMITTED_ASSISTANT_REASONING_TEXT,
   assessLastAssistantMessage,
   dropThinkingBlocks,
   isAssistantMessageWithContent,
@@ -103,6 +104,56 @@ describe("dropThinkingBlocks", () => {
       { type: "text", text: "latest text" },
     ]);
   });
+
+  it("uses non-empty omitted-reasoning text when an older assistant turn is thinking-only", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "first" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "thinking", thinking: "old", thinkingSignature: "sig_old" }],
+      }),
+      castAgentMessage({ role: "user", content: "second" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" },
+          { type: "text", text: "latest text" },
+        ],
+      }),
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
+    const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
+    const originalLatestAssistant = messages[3] as Extract<AgentMessage, { role: "assistant" }>;
+
+    expect(oldAssistant.content).toEqual([
+      { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
+    ]);
+    expect(latestAssistant.content).toEqual(originalLatestAssistant.content);
+  });
+
+  it("uses non-empty omitted-reasoning text when an older assistant turn is redacted-thinking-only", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "first" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "redacted_thinking", data: "opaque" }],
+      }),
+      castAgentMessage({ role: "user", content: "second" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "text", text: "latest text" }],
+      }),
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
+
+    expect(oldAssistant.content).toEqual([
+      { type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
+    ]);
+  });
 });
 
 describe("sanitizeThinkingForRecovery", () => {
@@ -191,11 +242,13 @@ describe("wrapAnthropicStreamWithRecovery", () => {
     "thinking or redacted_thinking blocks in the latest assistant message cannot be modified",
   );
 
-  it("retries once when the request is rejected before streaming", async () => {
+  it("retries once with omitted-reasoning text when the request is rejected before streaming", async () => {
     let callCount = 0;
+    const contexts: Array<{ messages?: AgentMessage[] }> = [];
     const wrapped = wrapAnthropicStreamWithRecovery(
-      (() => {
+      ((_model, context) => {
         callCount += 1;
+        contexts.push(context as { messages?: AgentMessage[] });
         return Promise.reject(anthropicThinkingError);
       }) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
       { id: "test-session" },
@@ -216,6 +269,44 @@ describe("wrapAnthropicStreamWithRecovery", () => {
       ),
     ).rejects.toBe(anthropicThinkingError);
     expect(callCount).toBe(2);
+    expect(contexts[1]?.messages?.[0]).toMatchObject({
+      role: "assistant",
+      content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }],
+    });
+  });
+
+  it("retries with visible assistant text when stripping thinking leaves content", async () => {
+    const contexts: Array<{ messages?: AgentMessage[] }> = [];
+    const wrapped = wrapAnthropicStreamWithRecovery(
+      ((_model, context) => {
+        contexts.push(context as { messages?: AgentMessage[] });
+        return Promise.reject(anthropicThinkingError);
+      }) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
+      { id: "test-session" },
+    );
+
+    await expect(
+      wrapped(
+        {} as never,
+        {
+          messages: castAgentMessages([
+            {
+              role: "assistant",
+              content: [
+                { type: "thinking", thinking: "secret", thinkingSignature: "sig" },
+                { type: "text", text: "visible answer" },
+              ],
+            },
+          ]),
+        } as never,
+        {} as never,
+      ),
+    ).rejects.toBe(anthropicThinkingError);
+
+    expect(contexts[1]?.messages?.[0]).toMatchObject({
+      role: "assistant",
+      content: [{ type: "text", text: "visible answer" }],
+    });
   });
 
   it("does not retry when the stream fails after yielding a chunk", async () => {
diff --git a/src/agents/pi-embedded-runner/thinking.ts b/src/agents/pi-embedded-runner/thinking.ts