Skip to content

Commit 4f54861

Browse files
steipeteooiuuii
andcommitted
fix: bound aggregate prompt tool results
Keep aggregate tool-result truncation on the prompt history boundary instead of rewriting the just-appended persisted branch entry. Co-authored-by: luyifan <al3060388206@gmail.com>
1 parent 6fd4aa8 commit 4f54861

4 files changed

Lines changed: 240 additions & 30 deletions

File tree

src/agents/embedded-agent-runner/run/attempt.spawn-workspace.context-engine.test.ts

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,32 @@ function requireRecords(value: unknown, label: string): Array<Record<string, unk
6666
return value as Array<Record<string, unknown>>;
6767
}
6868

69+
function sumToolResultTextChars(messages: AgentMessage[]): number {
70+
return messages.reduce((sum, message) => {
71+
if (message.role !== "toolResult") {
72+
return sum;
73+
}
74+
const content = (message as { content?: unknown }).content;
75+
if (!Array.isArray(content)) {
76+
return sum;
77+
}
78+
return (
79+
sum +
80+
content.reduce((blockSum, block) => {
81+
if (
82+
block &&
83+
typeof block === "object" &&
84+
(block as { type?: unknown }).type === "text" &&
85+
typeof (block as { text?: unknown }).text === "string"
86+
) {
87+
return blockSum + (block as { text: string }).text.length;
88+
}
89+
return blockSum;
90+
}, 0)
91+
);
92+
}, 0);
93+
}
94+
6995
function findRecord(
7096
records: Array<Record<string, unknown>>,
7197
predicate: (record: Record<string, unknown>) => boolean,
@@ -2376,4 +2402,84 @@ describe("runEmbeddedAttempt tool-result guard budget wiring", () => {
23762402
.contextWindowTokens,
23772403
).toBe(1_000_000);
23782404
});
2405+
2406+
it("bounds aggregate tool-result prompt history without rewriting append results", async () => {
2407+
const toolText = "process output ".repeat(70);
2408+
const sessionMessages: AgentMessage[] = [{ role: "user", content: "seed", timestamp: 1 }];
2409+
for (let index = 0; index < 8; index += 1) {
2410+
const toolCallId = `call_${index}`;
2411+
sessionMessages.push({
2412+
role: "assistant",
2413+
content: [{ type: "toolCall", id: toolCallId, name: "process", input: {} }],
2414+
timestamp: 2 + index * 2,
2415+
} as unknown as AgentMessage);
2416+
sessionMessages.push({
2417+
role: "toolResult",
2418+
toolCallId,
2419+
toolName: "process",
2420+
content: [{ type: "text", text: `${index}: ${toolText}` }],
2421+
isError: false,
2422+
timestamp: 3 + index * 2,
2423+
} as AgentMessage);
2424+
}
2425+
let submittedMessages: AgentMessage[] = [];
2426+
let promptHandlerMessages: AgentMessage[] = [];
2427+
let afterTurnMessages: AgentMessage[] = [];
2428+
const afterTurn = vi.fn(async ({ messages }: { messages: AgentMessage[] }) => {
2429+
afterTurnMessages = messages;
2430+
});
2431+
2432+
await createContextEngineAttemptRunner({
2433+
contextEngine: {
2434+
...createContextEngineBootstrapAndAssemble(),
2435+
afterTurn,
2436+
},
2437+
sessionKey,
2438+
tempPaths,
2439+
sessionMessages,
2440+
attemptOverrides: {
2441+
contextTokenBudget: 128_000,
2442+
config: {
2443+
agents: {
2444+
defaults: {
2445+
contextLimits: {
2446+
toolResultMaxChars: 1_000,
2447+
},
2448+
},
2449+
list: [{ id: "main" }],
2450+
},
2451+
} as OpenClawConfig,
2452+
},
2453+
createSession: () => {
2454+
const session = createDefaultEmbeddedSession({ initialMessages: sessionMessages });
2455+
session.agent.streamFn = async (_model, context) => {
2456+
const providerMessages = (context as { messages?: AgentMessage[] } | undefined)?.messages;
2457+
submittedMessages = providerMessages ?? [];
2458+
return {
2459+
async result() {
2460+
return doneMessage;
2461+
},
2462+
[Symbol.asyncIterator]() {
2463+
return (async function* () {})();
2464+
},
2465+
};
2466+
};
2467+
session.prompt = async (_prompt, options) => {
2468+
promptHandlerMessages = session.messages.map((message) => message as AgentMessage);
2469+
options?.preflightResult?.(true);
2470+
await session.agent.streamFn?.({} as never, { messages: session.messages } as never, {});
2471+
session.messages = [...session.messages, doneMessage];
2472+
};
2473+
return session;
2474+
},
2475+
});
2476+
2477+
expect(sumToolResultTextChars(sessionMessages)).toBeGreaterThan(4_000);
2478+
expect(sumToolResultTextChars(promptHandlerMessages)).toBeGreaterThan(4_000);
2479+
expect(sumToolResultTextChars(submittedMessages)).toBeLessThanOrEqual(4_000);
2480+
expect(JSON.stringify(submittedMessages)).toContain("truncated");
2481+
expect(afterTurn).toHaveBeenCalledTimes(1);
2482+
expect(sumToolResultTextChars(afterTurnMessages)).toBeGreaterThan(4_000);
2483+
expect(JSON.stringify(afterTurnMessages)).not.toContain("truncated");
2484+
});
23792485
});

src/agents/embedded-agent-runner/run/attempt.ts

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,7 @@ import {
179179
buildEmptyExplicitToolAllowlistError,
180180
collectExplicitToolAllowlistSources,
181181
} from "../../tool-allowlist-guard.js";
182-
import {
183-
filterRuntimeCompatibleTools,
184-
} from "../../tool-schema-projection.js";
182+
import { filterRuntimeCompatibleTools } from "../../tool-schema-projection.js";
185183
import { logRuntimeToolSchemaQuarantine } from "../../tool-schema-quarantine.js";
186184
import {
187185
addClientToolsToToolSearchCatalog,
@@ -266,6 +264,7 @@ import {
266264
} from "../tool-result-context-guard.js";
267265
import {
268266
resolveLiveToolResultMaxChars,
267+
truncateOversizedToolResultsInMessages,
269268
truncateOversizedToolResultsInSessionManager,
270269
} from "../tool-result-truncation.js";
271270
import { splitSdkTools } from "../tool-split.js";
@@ -460,6 +459,7 @@ export {
460459
};
461460

462461
const MAX_BTW_SNAPSHOT_MESSAGES = 100;
462+
const PROMPT_TOOL_RESULT_AGGREGATE_CAP_MULTIPLIER = 4;
463463

464464
function summarizeMessagePayload(msg: AgentMessage): { textChars: number; imageBlocks: number } {
465465
const content = (msg as { content?: unknown }).content;
@@ -3434,6 +3434,31 @@ export async function runEmbeddedAttempt(
34343434
activeSession.agent.state.messages = filteredMessages;
34353435
}
34363436
prePromptMessageCount = activeSession.messages.length;
3437+
const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS;
3438+
const promptToolResultMaxChars = resolveLiveToolResultMaxChars({
3439+
contextWindowTokens: contextTokenBudget,
3440+
cfg: params.config,
3441+
agentId: sessionAgentId,
3442+
});
3443+
let promptHistoryMessages = activeSession.messages;
3444+
const promptToolResultTruncation = truncateOversizedToolResultsInMessages(
3445+
activeSession.messages,
3446+
contextTokenBudget,
3447+
promptToolResultMaxChars,
3448+
promptToolResultMaxChars * PROMPT_TOOL_RESULT_AGGREGATE_CAP_MULTIPLIER,
3449+
);
3450+
if (promptToolResultTruncation.truncatedCount > 0) {
3451+
promptHistoryMessages = promptToolResultTruncation.messages;
3452+
log.info(
3453+
`[tool-result-truncation] Truncated ${promptToolResultTruncation.truncatedCount} ` +
3454+
`tool result(s) for prompt history ` +
3455+
`(maxChars=${promptToolResultMaxChars} ` +
3456+
`aggregateBudgetChars=${
3457+
promptToolResultMaxChars * PROMPT_TOOL_RESULT_AGGREGATE_CAP_MULTIPLIER
3458+
}) ` +
3459+
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
3460+
);
3461+
}
34373462

34383463
const promptSubmission = resolveRuntimeContextPromptParts({
34393464
effectivePrompt: promptForRuntimeContextSplit,
@@ -3470,8 +3495,8 @@ export async function runEmbeddedAttempt(
34703495
const runtimeContextMessageForCurrentTurn =
34713496
buildRuntimeContextCustomMessage(runtimeContextForHook);
34723497
const messagesForCurrentPrompt = runtimeContextMessageForCurrentTurn
3473-
? [...activeSession.messages, runtimeContextMessageForCurrentTurn]
3474-
: activeSession.messages;
3498+
? [...promptHistoryMessages, runtimeContextMessageForCurrentTurn]
3499+
: promptHistoryMessages;
34753500
const hookMessagesForCurrentPrompt = normalizeMessagesForCurrentPromptBoundary({
34763501
messages: messagesForCurrentPrompt,
34773502
prompt: promptForModel,
@@ -3705,7 +3730,6 @@ export async function runEmbeddedAttempt(
37053730
const promptLen = effectivePrompt.length;
37063731
const sessionSummary = summarizeSessionContext(activeSession.messages);
37073732
const reserveTokens = settingsManager.getCompactionReserveTokens();
3708-
const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS;
37093733
emitTrustedDiagnosticEvent({
37103734
type: "context.assembled",
37113735
runId: params.runId,
@@ -3789,11 +3813,7 @@ export async function runEmbeddedAttempt(
37893813
prompt: promptForModel,
37903814
contextTokenBudget,
37913815
reserveTokens,
3792-
toolResultMaxChars: resolveLiveToolResultMaxChars({
3793-
contextWindowTokens: contextTokenBudget,
3794-
cfg: params.config,
3795-
agentId: sessionAgentId,
3796-
}),
3816+
toolResultMaxChars: promptToolResultMaxChars,
37973817
});
37983818
if (preemptiveCompaction) {
37993819
contextBudgetStatus = buildPrePromptContextBudgetStatus({
@@ -3901,6 +3921,29 @@ export async function runEmbeddedAttempt(
39013921
if (normalizedReplayMessages !== activeSession.messages) {
39023922
activeSession.agent.state.messages = normalizedReplayMessages;
39033923
}
3924+
const installProviderPromptHistoryTransform = (): (() => void) => {
3925+
const baseStreamFn = activeSession.agent.streamFn;
3926+
const providerPromptStreamFn = wrapStreamFnWithMessageTransform(
3927+
baseStreamFn,
3928+
(messages) => {
3929+
const providerPromptHistoryTruncation = truncateOversizedToolResultsInMessages(
3930+
messages,
3931+
contextTokenBudget,
3932+
promptToolResultMaxChars,
3933+
promptToolResultMaxChars * PROMPT_TOOL_RESULT_AGGREGATE_CAP_MULTIPLIER,
3934+
);
3935+
return providerPromptHistoryTruncation.truncatedCount > 0
3936+
? providerPromptHistoryTruncation.messages
3937+
: messages;
3938+
},
3939+
);
3940+
activeSession.agent.streamFn = providerPromptStreamFn;
3941+
return () => {
3942+
if (activeSession.agent.streamFn === providerPromptStreamFn) {
3943+
activeSession.agent.streamFn = baseStreamFn;
3944+
}
3945+
};
3946+
};
39043947
finalPromptText = promptForSession;
39053948
trajectoryRecorder?.recordEvent("prompt.submitted", {
39063949
prompt: promptForModel,
@@ -3928,6 +3971,7 @@ export async function runEmbeddedAttempt(
39283971
captureCurrentPromptForModel = true;
39293972
}
39303973
};
3974+
const cleanupProviderPromptHistoryTransform = installProviderPromptHistoryTransform();
39313975
try {
39323976
if (promptSubmission.runtimeOnly) {
39333977
await promptActiveSession(promptForSession, {
@@ -3956,6 +4000,7 @@ export async function runEmbeddedAttempt(
39564000
}
39574001
}
39584002
} finally {
4003+
cleanupProviderPromptHistoryTransform();
39594004
cleanupModelPromptTransform();
39604005
}
39614006
}

src/agents/embedded-agent-runner/tool-result-truncation.test.ts

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,37 @@ describe("truncateOversizedToolResultsInMessages", () => {
422422
expect(text.length).toBeLessThan(500_000);
423423
}
424424
});
425+
426+
it("bounds aggregate tool-result text in prompt history without rewriting callers", () => {
427+
const medium = "alpha beta gamma delta epsilon ".repeat(800);
428+
const messages: AgentMessage[] = [
429+
makeUserMessage("hello"),
430+
makeAssistantMessage("calling tools"),
431+
makeToolResult(medium, "call_1"),
432+
makeToolResult(medium, "call_2"),
433+
makeToolResult(medium, "call_3"),
434+
];
435+
436+
const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages(
437+
messages,
438+
128_000,
439+
12_000,
440+
12_000,
441+
);
442+
443+
const totalChars = result.reduce(
444+
(sum, message) =>
445+
sum + (message.role === "toolResult" ? getToolResultTextLength(message) : 0),
446+
0,
447+
);
448+
expect(truncatedCount).toBeGreaterThan(0);
449+
expect(totalChars).toBeLessThanOrEqual(12_000);
450+
expect(result[0]).toBe(messages[0]);
451+
expect(result[1]).toBe(messages[1]);
452+
expect(messages.reduce((sum, message) => sum + getToolResultTextLength(message), 0)).toBe(
453+
medium.length * 3,
454+
);
455+
});
425456
});
426457

427458
describe("truncateOversizedToolResultsInSession", () => {
@@ -490,7 +521,7 @@ describe("truncateOversizedToolResultsInSession", () => {
490521
).toBe(false);
491522
});
492523

493-
it("prefers truncating newer aggregate tool-result entries before older larger ones", async () => {
524+
it("prefers truncating older aggregate tool-result entries before newer results", async () => {
494525
const dir = await createTmpDir();
495526
const sm = SessionManager.create(dir, dir);
496527
sm.appendMessage(makeUserMessage("hello"));
@@ -526,9 +557,9 @@ describe("truncateOversizedToolResultsInSession", () => {
526557
entry.type === "message" ? getFirstToolResultText(entry.message) : "",
527558
);
528559

529-
expect(afterTexts[0]).toBe(beforeTexts[0]);
530-
expect(afterTexts[1]).not.toBe(beforeTexts[1]);
531-
expect(afterTexts[1]).toContain("truncated");
560+
expect(afterTexts[0]).not.toBe(beforeTexts[0]);
561+
expect(afterTexts[0]).toContain("truncated");
562+
expect(afterTexts[1]).toBe(beforeTexts[1]);
532563
});
533564

534565
it("allows persisted-session recovery truncation to shrink below the old 2k floor", async () => {

0 commit comments

Comments
 (0)