Skip to content

Commit b180b8a

Browse files
committed
fix: strip workflow function responses from replies
1 parent a099acc commit b180b8a

8 files changed

Lines changed: 201 additions & 10 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai
3636

3737
- Agents/Azure OpenAI Responses: default unset Azure OpenAI API versions to `preview` so `/openai/v1/responses` calls use Azure's current Responses API route. (#82026) Thanks @leoge007.
3838
- Agents: retry empty final turns for generic `anthropic-messages` providers instead of limiting non-visible recovery to Kimi, so custom/proxied Anthropic-compatible routes can recover with a visible answer. Addresses #46080. Thanks @wmgx, @w1tv, and @iFwu.
39+
- Agents/replies: strip workflow `<function_response>` scaffolding from user-visible sanitizer paths so raw tool output does not leak into chat history, transcript mirrors, or channel replies. Fixes #47444. Thanks @5toCode.
3940
- Control UI: rotate browser service-worker caches per build so updated Gateways are less likely to keep serving stale dashboard bundles that trigger protocol mismatch errors.
4041
- Discord: report unresolved configured bot-token SecretRefs during startup instead of treating the account as unconfigured. (#82009) Thanks @giodl73-repo.
4142
- CLI/config: preserve numeric-looking object keys such as Discord guild IDs during `config patch` recursive merges. (#81999) Thanks @giodl73-repo.

src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,19 @@ describe("sanitizeUserFacingText", () => {
280280
expect(sanitizeUserFacingText(input)).toBe("Before\n\nAfter");
281281
});
282282

283+
it("strips workflow function response wrappers before user-facing delivery", () => {
284+
const input = [
285+
"Before",
286+
"<function_response>",
287+
'Searching for: "what skills matter most in the age of AI"',
288+
"...",
289+
"</function_response>",
290+
"After",
291+
].join("\n");
292+
293+
expect(sanitizeUserFacingText(input)).toBe("Before\n\nAfter");
294+
});
295+
283296
it("preserves literal tool-call tag examples in user-facing prose", () => {
284297
const input = "Use `<tool_call>` to describe the XML tag in docs.";
285298
expect(sanitizeUserFacingText(input)).toBe(input);

src/agents/pi-embedded-utils.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,28 @@ File contents here`,
468468
expect(extractAssistantText(msg)).toBe("Prefix\n\nSuffix");
469469
});
470470

471+
it("strips raw <function_response> workflow blocks from assistant text", () => {
472+
const msg = makeAssistantMessage({
473+
role: "assistant",
474+
content: [
475+
{
476+
type: "text",
477+
text: [
478+
"Prefix",
479+
"<function_response>",
480+
'Searching for: "what skills matter most in the age of AI"',
481+
"...",
482+
"</function_response>",
483+
"Suffix",
484+
].join("\n"),
485+
},
486+
],
487+
timestamp: Date.now(),
488+
});
489+
490+
expect(extractAssistantText(msg)).toBe("Prefix\n\nSuffix");
491+
});
492+
471493
it("strips dangling <tool_call> XML content to end-of-string", () => {
472494
const msg = makeAssistantMessage({
473495
role: "assistant",

src/auto-reply/reply/agent-runner-payloads.test.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { createTestRegistry } from "../../test-utils/channel-plugins.js";
44
import {
55
getReplyPayloadMetadata,
66
markReplyPayloadForSourceSuppressionDelivery,
7+
setReplyPayloadMetadata,
78
} from "../reply-payload.js";
89
import { buildReplyPayloads } from "./agent-runner-payloads.js";
910

@@ -87,6 +88,37 @@ describe("buildReplyPayloads media filter integration", () => {
8788
});
8889
});
8990

91+
it("sanitizes source reply transcript mirror text with final payload text", async () => {
92+
const text = [
93+
"Visible",
94+
"<function_response>",
95+
'Searching for: "what skills matter most in the age of AI"',
96+
"...",
97+
"</function_response>",
98+
"Done",
99+
].join("\n");
100+
const payload = setReplyPayloadMetadata(
101+
{ text },
102+
{
103+
sourceReplyTranscriptMirror: {
104+
sessionKey: "agent:main",
105+
text,
106+
},
107+
},
108+
);
109+
110+
const { replyPayloads } = await buildReplyPayloads({
111+
...baseParams,
112+
payloads: [payload],
113+
});
114+
115+
expect(replyPayloads).toHaveLength(1);
116+
expect(replyPayloads[0]?.text).toBe("Visible\n\nDone");
117+
expect(getReplyPayloadMetadata(replyPayloads[0])?.sourceReplyTranscriptMirror?.text).toBe(
118+
"Visible\n\nDone",
119+
);
120+
});
121+
90122
it("strips media URL from payload when in messagingToolSentMediaUrls", async () => {
91123
const { replyPayloads } = await buildReplyPayloads({
92124
...baseParams,

src/auto-reply/reply/agent-runner-payloads.ts

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
2+
import { sanitizeUserFacingText } from "../../agents/pi-embedded-helpers/sanitize-user-facing-text.js";
23
import type { MessagingToolSend } from "../../agents/pi-embedded-messaging.types.js";
34
import type { ReplyToMode } from "../../config/types.js";
45
import { logVerbose } from "../../globals.js";
56
import { createLazyImportLoader } from "../../shared/lazy-promise.js";
67
import { stripLegacyBracketToolCallBlocks } from "../../shared/text/assistant-visible-text.js";
78
import { stripHeartbeatToken } from "../heartbeat.js";
8-
import { copyReplyPayloadMetadata } from "../reply-payload.js";
9+
import {
10+
copyReplyPayloadMetadata,
11+
getReplyPayloadMetadata,
12+
setReplyPayloadMetadata,
13+
} from "../reply-payload.js";
914
import type { OriginatingChannelType } from "../templating.js";
1015
import { SILENT_REPLY_TOKEN } from "../tokens.js";
1116
import type { ReplyPayload, ReplyThreadingPolicy } from "../types.js";
@@ -97,17 +102,52 @@ function shouldKeepPayloadDuringSilentTurn(payload: ReplyPayload): boolean {
97102
return payload.audioAsVoice === true && resolveSendableOutboundReplyParts(payload).hasMedia;
98103
}
99104

105+
function sanitizeFinalReplyText(
106+
payload: ReplyPayload,
107+
text: string | undefined,
108+
): string | undefined {
109+
if (!text) {
110+
return text;
111+
}
112+
return sanitizeUserFacingText(text, { errorContext: Boolean(payload.isError) });
113+
}
114+
100115
function sanitizeHeartbeatPayload(payload: ReplyPayload): ReplyPayload {
101116
const text = payload.text;
102117
if (!text) {
103118
return payload;
104119
}
105-
const cleaned = stripLegacyBracketToolCallBlocks(text);
120+
const withoutLegacyBlocks = stripLegacyBracketToolCallBlocks(text);
121+
const cleaned = sanitizeFinalReplyText(payload, withoutLegacyBlocks);
106122
if (cleaned === text) {
107123
return payload;
108124
}
109-
logVerbose("Stripped legacy tool-call block from heartbeat reply");
110-
return copyReplyPayloadMetadata(payload, { ...payload, text: cleaned });
125+
if (withoutLegacyBlocks !== text) {
126+
logVerbose("Stripped legacy tool-call block from heartbeat reply");
127+
}
128+
return copyPayloadWithSanitizedText(payload, cleaned);
129+
}
130+
131+
function copyPayloadWithSanitizedText(
132+
payload: ReplyPayload,
133+
text: string | undefined,
134+
): ReplyPayload {
135+
const sanitizedText = sanitizeFinalReplyText(payload, text);
136+
const next = copyReplyPayloadMetadata(payload, {
137+
...payload,
138+
text: sanitizedText,
139+
});
140+
const mirror = getReplyPayloadMetadata(payload)?.sourceReplyTranscriptMirror;
141+
if (!mirror?.text) {
142+
return next;
143+
}
144+
setReplyPayloadMetadata(next, {
145+
sourceReplyTranscriptMirror: {
146+
...mirror,
147+
text: sanitizeFinalReplyText(payload, mirror.text) || undefined,
148+
},
149+
});
150+
return next;
111151
}
112152

113153
export async function buildReplyPayloads(params: {
@@ -148,7 +188,7 @@ export async function buildReplyPayloads(params: {
148188
}
149189

150190
if (!text || !text.includes("HEARTBEAT_OK")) {
151-
sanitizedPayloads.push(copyReplyPayloadMetadata(payload, { ...payload, text }));
191+
sanitizedPayloads.push(copyPayloadWithSanitizedText(payload, text));
152192
continue;
153193
}
154194
const stripped = stripHeartbeatToken(text, { mode: "message" });
@@ -160,9 +200,7 @@ export async function buildReplyPayloads(params: {
160200
if (stripped.shouldSkip && !hasMedia) {
161201
continue;
162202
}
163-
sanitizedPayloads.push(
164-
copyReplyPayloadMetadata(payload, { ...payload, text: stripped.text }),
165-
);
203+
sanitizedPayloads.push(copyPayloadWithSanitizedText(payload, stripped.text));
166204
}
167205
}
168206

src/auto-reply/reply/agent-runner.runreplyagent.e2e.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,34 @@ describe("runReplyAgent typing (heartbeat)", () => {
762762
expect(blockOptions.timeoutMs).toBeTypeOf("number");
763763
});
764764

765+
it("strips workflow function response scaffolding from final delivery", async () => {
766+
state.runEmbeddedPiAgentMock.mockImplementationOnce(async () => ({
767+
payloads: [
768+
{
769+
text: [
770+
"Visible intro.",
771+
"<function_calls>",
772+
'<invoke name="exec"><parameter name="command">node scripts/search.mjs</parameter></invoke>',
773+
"</function_calls>",
774+
"<function_response>",
775+
'Searching for: "what skills matter most in the age of AI"',
776+
"...",
777+
"</function_response>",
778+
"Visible answer.",
779+
].join("\n"),
780+
},
781+
],
782+
meta: {},
783+
}));
784+
785+
const { run } = createMinimalRun();
786+
const res = await run();
787+
const payloads = Array.isArray(res) ? res : res ? [res] : [];
788+
789+
expect(payloads).toHaveLength(1);
790+
expect(payloads[0]?.text).toBe("Visible intro.\n\n\nVisible answer.");
791+
});
792+
765793
it("handles typing for normal and silent tool results", async () => {
766794
const cases = [
767795
{

src/shared/text/assistant-visible-text.test.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,24 @@ describe("stripAssistantInternalScaffolding", () => {
133133
expectVisibleText('Result:\n<tool_result>\n{"output": "data"}\n', "Result:\n");
134134
});
135135

136+
it("strips workflow <function_response> blocks with plain output", () => {
137+
expectVisibleText(
138+
[
139+
"Before",
140+
"<function_response>",
141+
'Searching for: "what skills matter most in the age of AI"',
142+
"...",
143+
"</function_response>",
144+
"After",
145+
].join("\n"),
146+
"Before\n\nAfter",
147+
);
148+
});
149+
150+
it("strips dangling workflow <function_response> content to end-of-string", () => {
151+
expectVisibleText("Before\n<function_response>\nraw command output\n", "Before\n");
152+
});
153+
136154
it("strips <tool_result> closed with mismatched </tool_call> and preserves trailing text", () => {
137155
expectVisibleText(
138156
'Prefix\n<tool_result> {"output": "data"} </tool_call>\nSuffix',
@@ -386,6 +404,20 @@ describe("stripAssistantInternalScaffolding", () => {
386404
);
387405
});
388406

407+
it("preserves inline function_response examples in prose", () => {
408+
expectVisibleText(
409+
"Use <function_response> to describe the response wrapper.",
410+
"Use <function_response> to describe the response wrapper.",
411+
);
412+
});
413+
414+
it("preserves line-leading function_response prose examples", () => {
415+
expectVisibleText(
416+
"<function_response> is the response wrapper.",
417+
"<function_response> is the response wrapper.",
418+
);
419+
});
420+
389421
it("preserves non-tool tag names that share the tool_call prefix", () => {
390422
expectVisibleText(
391423
'prefix <tool_call-example>{"name":"read"}</tool_call-example> suffix',

src/shared/text/assistant-visible-text.ts

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ const LEGACY_BRACKET_TOOL_BLOCK_QUICK_RE = /\[\s*\/?\s*TOOL_(?:CALL|RESULT)\s*\]
1818
* closing tag, or to end-of-string if the stream was truncated mid-tag.
1919
*/
2020
const TOOL_CALL_QUICK_RE =
21-
/<\s*\/?\s*(?:tool_call|tool_result|function_calls?|function|tool_calls)\b/i;
21+
/<\s*\/?\s*(?:tool_call|tool_result|function_calls?|function_response|function|tool_calls)\b/i;
2222
const TOOL_CALL_TAG_NAMES = new Set([
2323
"tool_call",
2424
"tool_result",
2525
"function_call",
2626
"function_calls",
27+
"function_response",
2728
"function",
2829
"tool_calls",
2930
]);
@@ -168,6 +169,25 @@ function isLikelyStandaloneFunctionToolCall(
168169
return idx < 0 || text[idx] === "\n" || text[idx] === "\r" || /[.!?:]/.test(text[idx]);
169170
}
170171

172+
function isStandaloneOpeningTagLine(
173+
text: string,
174+
tagStart: number,
175+
tag: ParsedToolCallTag,
176+
): boolean {
177+
let idx = tagStart - 1;
178+
while (idx >= 0 && (text[idx] === " " || text[idx] === "\t")) {
179+
idx -= 1;
180+
}
181+
if (!(idx < 0 || text[idx] === "\n" || text[idx] === "\r")) {
182+
return false;
183+
}
184+
let after = tag.end;
185+
while (after < text.length && (text[after] === " " || text[after] === "\t")) {
186+
after += 1;
187+
}
188+
return after >= text.length || text[after] === "\n" || text[after] === "\r";
189+
}
190+
171191
function parseToolCallTagAt(text: string, start: number): ParsedToolCallTag | null {
172192
if (text[start] !== "<") {
173193
return null;
@@ -288,7 +308,12 @@ export function stripToolCallXmlTags(
288308
: null;
289309
const shouldStripStandaloneFunction =
290310
tag.tagName !== "function" || isLikelyStandaloneFunctionToolCall(text, idx, tag);
291-
if (!tag.isClose && payloadKind && shouldStripStandaloneFunction) {
311+
const shouldStripStandaloneResult =
312+
tag.tagName === "function_response" && isStandaloneOpeningTagLine(text, idx, tag);
313+
if (
314+
!tag.isClose &&
315+
((payloadKind && shouldStripStandaloneFunction) || shouldStripStandaloneResult)
316+
) {
292317
inToolCallBlock = true;
293318
toolCallBlockContentStart = tag.end;
294319
toolCallBlockNeedsQuoteBalance =

0 commit comments

Comments
 (0)