Skip to content

Commit a018db7

Browse files
committed
fix: preserve omitted thinking replay turns
1 parent 690c98a commit a018db7

6 files changed

Lines changed: 288 additions & 12 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ Docs: https://docs.openclaw.ai
8080
- Agents/Bedrock: prevent empty assistant stream-error turns from poisoning
8181
Converse replay by persisting, repairing, and replaying a non-empty fallback
8282
block. Fixes #71572. (#71627) Thanks @openperf.
83+
- Agents/Anthropic/Bedrock: preserve stripped thinking-only assistant replay
84+
turns with non-empty omitted-reasoning text so provider adapters keep strict
85+
user/assistant turn shape. Thanks @wujiaming88.
8386
- Browser/CDP: make readiness diagnostics use the same discovery-first fallback as reachability for bare `ws://` Browserless and Browserbase CDP URLs. Fixes #69532.
8487
- Browser/CDP: explain that loopback Browserless or other externally managed CDP services need `attachOnly: true` and matching Browserless `EXTERNAL` endpoint when reporting local port ownership conflicts, and fall back to the configured bare WebSocket root when a discovered Browserless endpoint rejects CDP. Fixes #49815.
8588
- Gateway/reload: preserve indefinite `gateway.reload.deferralTimeoutMs: 0` semantics for channel hot reload deferrals so active agent runs are not interrupted by a forced channel restart. (#71637) Thanks @Poo-Squirry.

docs/reference/transcript-hygiene.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,13 +133,18 @@ external end-user instructions.
133133

134134
- Tool result pairing repair and synthetic tool results.
135135
- Turn validation (merge consecutive user turns to satisfy strict alternation).
136+
- Older thinking-only assistant turns that must be stripped are replaced with
137+
non-empty omitted-reasoning text so provider adapters do not drop the replay
138+
turn.
136139

137140
**Amazon Bedrock (Converse API)**
138141

139142
- Empty assistant stream-error turns are repaired to a non-empty fallback text block
140143
before replay. Bedrock Converse rejects assistant messages with `content: []`, so
141144
persisted assistant turns with `stopReason: "error"` and empty content are also
142145
repaired on disk before load.
146+
- Older thinking-only assistant turns that must be stripped are replaced with
147+
non-empty omitted-reasoning text so the Converse replay keeps strict turn shape.
143148
- Replay filters OpenClaw delivery-mirror and gateway-injected assistant turns.
144149
- Image sanitization applies through the global rule.
145150

src/agents/pi-embedded-runner.anthropic-tool-replay.live.test.ts

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
} from "./live-cache-test-support.js";
88
import { isLiveTestEnabled } from "./live-test-helpers.js";
99
import { wrapStreamFnSanitizeMalformedToolCalls } from "./pi-embedded-runner/run/attempt.tool-call-normalization.js";
10+
import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js";
1011
import { buildAssistantMessageWithZeroUsage } from "./stream-message-shared.js";
1112

1213
const ANTHROPIC_LIVE = isLiveTestEnabled(["ANTHROPIC_LIVE_TEST"]);
@@ -33,7 +34,7 @@ function buildLiveAnthropicModel(): {
3334
name: modelId,
3435
api: "anthropic-messages" as const,
3536
provider: "anthropic",
36-
baseUrl: "https://api.anthropic.com/v1",
37+
baseUrl: "https://api.anthropic.com",
3738
reasoning: true,
3839
input: ["text"] as const,
3940
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@@ -44,6 +45,94 @@ function buildLiveAnthropicModel(): {
4445
}
4546

4647
describeLive("pi embedded anthropic replay sanitization (live)", () => {
48+
it(
49+
"accepts regular text-only assistant replay history",
50+
async () => {
51+
const { apiKey, model } = buildLiveAnthropicModel();
52+
const messages: Message[] = [
53+
{
54+
role: "user",
55+
content: "Remember the marker REGULAR_ANTHROPIC_REPLAY_OK.",
56+
timestamp: Date.now(),
57+
},
58+
buildAssistantMessageWithZeroUsage({
59+
model: { api: model.api, provider: model.provider, id: model.id },
60+
content: [{ type: "text", text: "I remember REGULAR_ANTHROPIC_REPLAY_OK." }],
61+
stopReason: "stop",
62+
}),
63+
{
64+
role: "user",
65+
content: "Reply with a short confirmation if this replay history is valid.",
66+
timestamp: Date.now(),
67+
},
68+
];
69+
70+
logLiveCache(`anthropic regular replay live model=${model.provider}/${model.id}`);
71+
const response = await completeSimpleWithLiveTimeout(
72+
model,
73+
{ messages },
74+
{
75+
apiKey,
76+
cacheRetention: "none",
77+
sessionId: "anthropic-regular-replay-live",
78+
maxTokens: 64,
79+
temperature: 0,
80+
},
81+
"anthropic regular text replay live synthetic transcript",
82+
ANTHROPIC_TIMEOUT_MS,
83+
);
84+
85+
const text = extractAssistantText(response);
86+
logLiveCache(`anthropic regular replay live result=${JSON.stringify(text)}`);
87+
expect(text.trim().length).toBeGreaterThan(0);
88+
},
89+
6 * 60_000,
90+
);
91+
92+
it(
93+
"accepts omitted-reasoning placeholder assistant replay history",
94+
async () => {
95+
const { apiKey, model } = buildLiveAnthropicModel();
96+
const messages: Message[] = [
97+
{
98+
role: "user",
99+
content: "Remember that the previous assistant reasoning was omitted.",
100+
timestamp: Date.now(),
101+
},
102+
buildAssistantMessageWithZeroUsage({
103+
model: { api: model.api, provider: model.provider, id: model.id },
104+
content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }],
105+
stopReason: "stop",
106+
}),
107+
{
108+
role: "user",
109+
content: "Reply with exactly OK if this placeholder replay history is valid.",
110+
timestamp: Date.now(),
111+
},
112+
];
113+
114+
logLiveCache(`anthropic omitted-reasoning replay live model=${model.provider}/${model.id}`);
115+
const response = await completeSimpleWithLiveTimeout(
116+
model,
117+
{ messages },
118+
{
119+
apiKey,
120+
cacheRetention: "none",
121+
sessionId: "anthropic-omitted-reasoning-replay-live",
122+
maxTokens: 64,
123+
temperature: 0,
124+
},
125+
"anthropic omitted reasoning replay live synthetic transcript",
126+
ANTHROPIC_TIMEOUT_MS,
127+
);
128+
129+
const text = extractAssistantText(response);
130+
logLiveCache(`anthropic omitted-reasoning replay live result=${JSON.stringify(text)}`);
131+
expect(text.trim().length).toBeGreaterThan(0);
132+
},
133+
6 * 60_000,
134+
);
135+
47136
it(
48137
"preserves toolCall replay history that Anthropic accepts end-to-end",
49138
async () => {

src/agents/pi-embedded-runner.sanitize-session-history.test.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import {
1616
TEST_SESSION_ID,
1717
} from "./pi-embedded-runner.sanitize-session-history.test-harness.js";
1818
import { validateReplayTurns } from "./pi-embedded-runner/replay-history.js";
19+
import { OMITTED_ASSISTANT_REASONING_TEXT } from "./pi-embedded-runner/thinking.js";
1920
import { castAgentMessage, castAgentMessages } from "./test-helpers/agent-message-fixtures.js";
2021
import { extractToolCallsFromAssistant } from "./tool-call-id.js";
2122
import type { TranscriptPolicy } from "./transcript-policy.js";
@@ -1176,6 +1177,92 @@ describe("sanitizeSessionHistory", () => {
11761177
]);
11771178
});
11781179

1180+
it("keeps regular latest Anthropic thinking replay while preserving older stripped turns", async () => {
1181+
setNonGoogleModelApi();
1182+
1183+
const messages = castAgentMessages([
1184+
makeUserMessage("first"),
1185+
makeAssistantMessage([
1186+
{
1187+
type: "thinking",
1188+
thinking: "old private reasoning",
1189+
thinkingSignature: "sig_old",
1190+
},
1191+
]),
1192+
makeUserMessage("second"),
1193+
makeAssistantMessage([
1194+
{
1195+
type: "thinking",
1196+
thinking: "latest private reasoning",
1197+
thinkingSignature: "sig_latest",
1198+
},
1199+
{ type: "text", text: "latest visible answer" },
1200+
]),
1201+
]);
1202+
1203+
const result = await sanitizeAnthropicHistory({
1204+
messages,
1205+
modelId: "claude-3-7-sonnet-20250219",
1206+
});
1207+
1208+
expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
1209+
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
1210+
]);
1211+
expect((result[3] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
1212+
{
1213+
type: "thinking",
1214+
thinking: "latest private reasoning",
1215+
thinkingSignature: "sig_latest",
1216+
},
1217+
{ type: "text", text: "latest visible answer" },
1218+
]);
1219+
});
1220+
1221+
it.each([
1222+
{
1223+
provider: "anthropic",
1224+
modelApi: "anthropic-messages",
1225+
label: "anthropic",
1226+
},
1227+
{
1228+
provider: "amazon-bedrock",
1229+
modelApi: "bedrock-converse-stream",
1230+
label: "bedrock",
1231+
},
1232+
])(
1233+
"preserves older stripped thinking-only assistant turns for $label replay",
1234+
async ({ provider, modelApi }) => {
1235+
setNonGoogleModelApi();
1236+
1237+
const messages = castAgentMessages([
1238+
makeUserMessage("first"),
1239+
makeAssistantMessage([
1240+
{
1241+
type: "thinking",
1242+
thinking: "old private reasoning",
1243+
thinkingSignature: "sig_old",
1244+
},
1245+
]),
1246+
makeUserMessage("second"),
1247+
makeAssistantMessage([{ type: "text", text: "latest visible answer" }]),
1248+
]);
1249+
1250+
const result = await sanitizeAnthropicHistory({
1251+
provider,
1252+
modelApi,
1253+
messages,
1254+
modelId: "claude-3-7-sonnet-20250219",
1255+
});
1256+
1257+
expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
1258+
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
1259+
]);
1260+
expect((result[3] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
1261+
{ type: "text", text: "latest visible answer" },
1262+
]);
1263+
},
1264+
);
1265+
11791266
it("uses immutable thinking replay for anthropic-compatible providers when policy preserves signatures", async () => {
11801267
setNonGoogleModelApi();
11811268

src/agents/pi-embedded-runner/thinking.test.ts

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
33
import { describe, expect, it } from "vitest";
44
import { castAgentMessage, castAgentMessages } from "../test-helpers/agent-message-fixtures.js";
55
import {
6+
OMITTED_ASSISTANT_REASONING_TEXT,
67
assessLastAssistantMessage,
78
dropThinkingBlocks,
89
isAssistantMessageWithContent,
@@ -103,6 +104,56 @@ describe("dropThinkingBlocks", () => {
103104
{ type: "text", text: "latest text" },
104105
]);
105106
});
107+
108+
it("uses non-empty omitted-reasoning text when an older assistant turn is thinking-only", () => {
109+
const messages: AgentMessage[] = [
110+
castAgentMessage({ role: "user", content: "first" }),
111+
castAgentMessage({
112+
role: "assistant",
113+
content: [{ type: "thinking", thinking: "old", thinkingSignature: "sig_old" }],
114+
}),
115+
castAgentMessage({ role: "user", content: "second" }),
116+
castAgentMessage({
117+
role: "assistant",
118+
content: [
119+
{ type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" },
120+
{ type: "text", text: "latest text" },
121+
],
122+
}),
123+
];
124+
125+
const result = dropThinkingBlocks(messages);
126+
const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
127+
const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
128+
const originalLatestAssistant = messages[3] as Extract<AgentMessage, { role: "assistant" }>;
129+
130+
expect(oldAssistant.content).toEqual([
131+
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
132+
]);
133+
expect(latestAssistant.content).toEqual(originalLatestAssistant.content);
134+
});
135+
136+
it("uses non-empty omitted-reasoning text when an older assistant turn is redacted-thinking-only", () => {
137+
const messages: AgentMessage[] = [
138+
castAgentMessage({ role: "user", content: "first" }),
139+
castAgentMessage({
140+
role: "assistant",
141+
content: [{ type: "redacted_thinking", data: "opaque" }],
142+
}),
143+
castAgentMessage({ role: "user", content: "second" }),
144+
castAgentMessage({
145+
role: "assistant",
146+
content: [{ type: "text", text: "latest text" }],
147+
}),
148+
];
149+
150+
const result = dropThinkingBlocks(messages);
151+
const oldAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
152+
153+
expect(oldAssistant.content).toEqual([
154+
{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT },
155+
]);
156+
});
106157
});
107158

108159
describe("sanitizeThinkingForRecovery", () => {
@@ -191,11 +242,13 @@ describe("wrapAnthropicStreamWithRecovery", () => {
191242
"thinking or redacted_thinking blocks in the latest assistant message cannot be modified",
192243
);
193244

194-
it("retries once when the request is rejected before streaming", async () => {
245+
it("retries once with omitted-reasoning text when the request is rejected before streaming", async () => {
195246
let callCount = 0;
247+
const contexts: Array<{ messages?: AgentMessage[] }> = [];
196248
const wrapped = wrapAnthropicStreamWithRecovery(
197-
(() => {
249+
((_model, context) => {
198250
callCount += 1;
251+
contexts.push(context as { messages?: AgentMessage[] });
199252
return Promise.reject(anthropicThinkingError);
200253
}) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
201254
{ id: "test-session" },
@@ -216,6 +269,44 @@ describe("wrapAnthropicStreamWithRecovery", () => {
216269
),
217270
).rejects.toBe(anthropicThinkingError);
218271
expect(callCount).toBe(2);
272+
expect(contexts[1]?.messages?.[0]).toMatchObject({
273+
role: "assistant",
274+
content: [{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }],
275+
});
276+
});
277+
278+
it("retries with visible assistant text when stripping thinking leaves content", async () => {
279+
const contexts: Array<{ messages?: AgentMessage[] }> = [];
280+
const wrapped = wrapAnthropicStreamWithRecovery(
281+
((_model, context) => {
282+
contexts.push(context as { messages?: AgentMessage[] });
283+
return Promise.reject(anthropicThinkingError);
284+
}) as Parameters<typeof wrapAnthropicStreamWithRecovery>[0],
285+
{ id: "test-session" },
286+
);
287+
288+
await expect(
289+
wrapped(
290+
{} as never,
291+
{
292+
messages: castAgentMessages([
293+
{
294+
role: "assistant",
295+
content: [
296+
{ type: "thinking", thinking: "secret", thinkingSignature: "sig" },
297+
{ type: "text", text: "visible answer" },
298+
],
299+
},
300+
]),
301+
} as never,
302+
{} as never,
303+
),
304+
).rejects.toBe(anthropicThinkingError);
305+
306+
expect(contexts[1]?.messages?.[0]).toMatchObject({
307+
role: "assistant",
308+
content: [{ type: "text", text: "visible answer" }],
309+
});
219310
});
220311

221312
it("does not retry when the stream fails after yielding a chunk", async () => {

0 commit comments

Comments
 (0)