Skip to content

Commit 64f2890

Browse files
authored
fix(agents): split system prompt cache prefix by transport (#59054)
* fix(agents): restore Anthropic prompt cache seam * fix(agents): strip cache boundary for completions * fix(agents): strip cache boundary for cli backends * chore(changelog): note cross-transport cache boundary rollout * fix(agents): route default stream fallbacks through boundary shapers * fix(agents): strip cache boundary for provider streams
1 parent b0e1551 commit 64f2890

18 files changed

Lines changed: 480 additions & 34 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
2121
- Tests/runtime: trim local unit-test import/runtime fan-out across browser, WhatsApp, cron, task, and reply flows so owner suites start faster with lower shared-worker overhead while preserving the same focused behavior coverage. (#60249) Thanks @shakkernerd.
2222
- Tests/secrets runtime: restore split secrets suite cache and env isolation cleanup so broader runs do not leak stale plugin or provider snapshot state. (#60395) Thanks @shakkernerd.
2323
- Memory/dreaming (experimental): add opt-in weighted short-term recall promotion to `MEMORY.md`, managed dreaming modes (`off|core|rem|deep`), and a `/dreaming` command plus Dreams UI so durable memory promotion can run on background cadence without manual scheduling. (#60569) Thanks @vignesh07.
24+
- Agents/system prompts: add an internal cache-prefix boundary across Anthropic-family, OpenAI-family, Google, and CLI transport shaping so stable system-prompt prefixes stay reusable without leaking internal cache markers to provider payloads. (#59054)
2425

2526
### Fixes
2627

src/agents/anthropic-payload-policy.test.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
applyAnthropicPayloadPolicyToParams,
44
resolveAnthropicPayloadPolicy,
55
} from "./anthropic-payload-policy.js";
6+
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "./system-prompt-cache-boundary.js";
67

78
type TestPayload = {
89
messages: Array<{ role: string; content: unknown }>;
@@ -102,4 +103,65 @@ describe("anthropic payload policy", () => {
102103
content: [{ type: "text", text: "Hello", cache_control: { type: "ephemeral" } }],
103104
});
104105
});
106+
107+
it("splits cached stable system content from uncached dynamic content", () => {
108+
const policy = resolveAnthropicPayloadPolicy({
109+
provider: "anthropic",
110+
api: "anthropic-messages",
111+
baseUrl: "https://api.anthropic.com/v1",
112+
cacheRetention: "long",
113+
enableCacheControl: true,
114+
});
115+
const payload: TestPayload = {
116+
system: [
117+
{
118+
type: "text",
119+
text: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic lab suffix`,
120+
},
121+
],
122+
messages: [{ role: "user", content: "Hello" }],
123+
};
124+
125+
applyAnthropicPayloadPolicyToParams(payload, policy);
126+
127+
expect(payload.system).toEqual([
128+
{
129+
type: "text",
130+
text: "Stable prefix",
131+
cache_control: { type: "ephemeral", ttl: "1h" },
132+
},
133+
{
134+
type: "text",
135+
text: "Dynamic lab suffix",
136+
},
137+
]);
138+
});
139+
140+
it("strips the boundary even when cache retention is disabled", () => {
141+
const policy = resolveAnthropicPayloadPolicy({
142+
provider: "anthropic",
143+
api: "anthropic-messages",
144+
baseUrl: "https://api.anthropic.com/v1",
145+
cacheRetention: "none",
146+
enableCacheControl: true,
147+
});
148+
const payload: TestPayload = {
149+
system: [
150+
{
151+
type: "text",
152+
text: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic lab suffix`,
153+
},
154+
],
155+
messages: [{ role: "user", content: "Hello" }],
156+
};
157+
158+
applyAnthropicPayloadPolicyToParams(payload, policy);
159+
160+
expect(payload.system).toEqual([
161+
{
162+
type: "text",
163+
text: "Stable prefix\nDynamic lab suffix",
164+
},
165+
]);
166+
});
105167
});

src/agents/anthropic-payload-policy.ts

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
import { resolveProviderRequestCapabilities } from "./provider-attribution.js";
2+
import {
3+
splitSystemPromptCacheBoundary,
4+
stripSystemPromptCacheBoundary,
5+
} from "./system-prompt-cache-boundary.js";
26

37
export type AnthropicServiceTier = "auto" | "standard_only";
48

@@ -46,13 +50,57 @@ function applyAnthropicCacheControlToSystem(
4650
return;
4751
}
4852

53+
const normalizedBlocks: Array<unknown> = [];
4954
for (const block of system) {
5055
if (!block || typeof block !== "object") {
56+
normalizedBlocks.push(block);
5157
continue;
5258
}
5359
const record = block as Record<string, unknown>;
54-
if (record.type === "text" && record.cache_control === undefined) {
55-
record.cache_control = cacheControl;
60+
if (record.type !== "text" || typeof record.text !== "string") {
61+
normalizedBlocks.push(block);
62+
continue;
63+
}
64+
const split = splitSystemPromptCacheBoundary(record.text);
65+
if (!split) {
66+
if (record.cache_control === undefined) {
67+
record.cache_control = cacheControl;
68+
}
69+
normalizedBlocks.push(record);
70+
continue;
71+
}
72+
73+
const { cache_control: existingCacheControl, ...rest } = record;
74+
if (split.stablePrefix) {
75+
normalizedBlocks.push({
76+
...rest,
77+
text: split.stablePrefix,
78+
cache_control: existingCacheControl ?? cacheControl,
79+
});
80+
}
81+
if (split.dynamicSuffix) {
82+
normalizedBlocks.push({
83+
...rest,
84+
text: split.dynamicSuffix,
85+
});
86+
}
87+
}
88+
89+
system.splice(0, system.length, ...normalizedBlocks);
90+
}
91+
92+
function stripAnthropicSystemPromptBoundary(system: unknown): void {
93+
if (!Array.isArray(system)) {
94+
return;
95+
}
96+
97+
for (const block of system) {
98+
if (!block || typeof block !== "object") {
99+
continue;
100+
}
101+
const record = block as Record<string, unknown>;
102+
if (record.type === "text" && typeof record.text === "string") {
103+
record.text = stripSystemPromptCacheBoundary(record.text);
56104
}
57105
}
58106
}
@@ -136,11 +184,16 @@ export function applyAnthropicPayloadPolicyToParams(
136184
payloadObj.service_tier = policy.serviceTier;
137185
}
138186

187+
if (policy.cacheControl) {
188+
applyAnthropicCacheControlToSystem(payloadObj.system, policy.cacheControl);
189+
} else {
190+
stripAnthropicSystemPromptBoundary(payloadObj.system);
191+
}
192+
139193
if (!policy.cacheControl) {
140194
return;
141195
}
142196

143-
applyAnthropicCacheControlToSystem(payloadObj.system, policy.cacheControl);
144197
// Preserve Anthropic cache-write scope by only tagging the trailing user turn.
145198
applyAnthropicCacheControlToMessages(payloadObj.messages, policy.cacheControl);
146199
}

src/agents/cli-runner.helpers.test.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { MAX_IMAGE_BYTES } from "../media/constants.js";
44
import { buildCliArgs, loadPromptRefImages } from "./cli-runner/helpers.js";
55
import * as promptImageUtils from "./pi-embedded-runner/run/images.js";
66
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
7+
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "./system-prompt-cache-boundary.js";
78
import * as toolImages from "./tool-images.js";
89

910
describe("loadPromptRefImages", () => {
@@ -117,4 +118,19 @@ describe("buildCliArgs", () => {
117118
}),
118119
).toEqual(["exec", "resume", "thread-123", "--model", "gpt-5.4"]);
119120
});
121+
122+
it("strips the internal cache boundary from CLI system prompt args", () => {
123+
expect(
124+
buildCliArgs({
125+
backend: {
126+
command: "claude",
127+
systemPromptArg: "--append-system-prompt",
128+
},
129+
baseArgs: ["-p"],
130+
modelId: "claude-sonnet-4-6",
131+
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
132+
useResume: false,
133+
}),
134+
).toEqual(["-p", "--append-system-prompt", "Stable prefix\nDynamic suffix"]);
135+
});
120136
});

src/agents/cli-runner/helpers.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import type { EmbeddedContextFile } from "../pi-embedded-helpers.js";
1717
import { detectImageReferences, loadImageFromRef } from "../pi-embedded-runner/run/images.js";
1818
import type { SandboxFsBridge } from "../sandbox/fs-bridge.js";
1919
import { detectRuntimeShell } from "../shell-utils.js";
20+
import { stripSystemPromptCacheBoundary } from "../system-prompt-cache-boundary.js";
2021
import { buildSystemPromptParams } from "../system-prompt-params.js";
2122
import { buildAgentSystemPrompt } from "../system-prompt.js";
2223
import { sanitizeImageBlocks } from "../tool-images.js";
@@ -253,7 +254,7 @@ export function buildCliArgs(params: {
253254
args.push(params.backend.modelArg, params.modelId);
254255
}
255256
if (!params.useResume && params.systemPrompt && params.backend.systemPromptArg) {
256-
args.push(params.backend.systemPromptArg, params.systemPrompt);
257+
args.push(params.backend.systemPromptArg, stripSystemPromptCacheBoundary(params.systemPrompt));
257258
}
258259
if (!params.useResume && params.sessionId) {
259260
if (params.backend.sessionArgs && params.backend.sessionArgs.length > 0) {

src/agents/google-transport-stream.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
import { parseGeminiAuth } from "../infra/gemini-auth.js";
1111
import { normalizeGoogleApiBaseUrl } from "../infra/google-api-base-url.js";
1212
import { buildGuardedModelFetch } from "./provider-transport-fetch.js";
13+
import { stripSystemPromptCacheBoundary } from "./system-prompt-cache-boundary.js";
1314
import { transformTransportMessages } from "./transport-message-transform.js";
1415
import {
1516
createEmptyTransportUsage,
@@ -445,7 +446,11 @@ export function buildGoogleGenerativeAiParams(
445446
}
446447
if (context.systemPrompt) {
447448
params.systemInstruction = {
448-
parts: [{ text: sanitizeTransportPayloadText(context.systemPrompt) }],
449+
parts: [
450+
{
451+
text: sanitizeTransportPayloadText(stripSystemPromptCacheBoundary(context.systemPrompt)),
452+
},
453+
],
449454
};
450455
}
451456
if (context.tools?.length) {

src/agents/openai-transport-stream.test.ts

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ import {
1010
import { attachModelProviderRequestTransport } from "./provider-request-config.js";
1111
import {
1212
buildTransportAwareSimpleStreamFn,
13+
createBoundaryAwareStreamFnForModel,
1314
isTransportAwareApiSupported,
1415
prepareTransportAwareSimpleModel,
1516
resolveTransportAwareSimpleApi,
1617
} from "./provider-transport-stream.js";
18+
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "./system-prompt-cache-boundary.js";
1719

1820
describe("openai transport stream", () => {
1921
it("reports the supported transport-aware APIs", () => {
@@ -24,6 +26,51 @@ describe("openai transport stream", () => {
2426
expect(isTransportAwareApiSupported("google-generative-ai")).toBe(true);
2527
});
2628

29+
it("builds boundary-aware stream shapers for supported default agent transports", () => {
30+
expect(
31+
createBoundaryAwareStreamFnForModel({
32+
id: "gpt-5.4",
33+
name: "GPT-5.4",
34+
api: "openai-responses",
35+
provider: "openai",
36+
baseUrl: "https://api.openai.com/v1",
37+
reasoning: true,
38+
input: ["text"],
39+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
40+
contextWindow: 200000,
41+
maxTokens: 8192,
42+
} satisfies Model<"openai-responses">),
43+
).toBeTypeOf("function");
44+
expect(
45+
createBoundaryAwareStreamFnForModel({
46+
id: "claude-sonnet-4-6",
47+
name: "Claude Sonnet 4.6",
48+
api: "anthropic-messages",
49+
provider: "anthropic",
50+
baseUrl: "https://api.anthropic.com",
51+
reasoning: true,
52+
input: ["text"],
53+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
54+
contextWindow: 200000,
55+
maxTokens: 8192,
56+
} satisfies Model<"anthropic-messages">),
57+
).toBeTypeOf("function");
58+
expect(
59+
createBoundaryAwareStreamFnForModel({
60+
id: "gemini-3.1-pro-preview",
61+
name: "Gemini 3.1 Pro Preview",
62+
api: "google-generative-ai",
63+
provider: "google",
64+
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
65+
reasoning: true,
66+
input: ["text"],
67+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
68+
contextWindow: 200000,
69+
maxTokens: 8192,
70+
} satisfies Model<"google-generative-ai">),
71+
).toBeTypeOf("function");
72+
});
73+
2774
it("prepares a custom simple-completion api alias when transport overrides are attached", () => {
2875
const model = attachModelProviderRequestTransport(
2976
{
@@ -439,6 +486,31 @@ describe("openai transport stream", () => {
439486
expect(params.input?.[0]).toMatchObject({ role: "developer" });
440487
});
441488

489+
it("strips the internal cache boundary from OpenAI system prompts", () => {
490+
const params = buildOpenAIResponsesParams(
491+
{
492+
id: "gpt-5.4",
493+
name: "GPT-5.4",
494+
api: "openai-responses",
495+
provider: "openai",
496+
baseUrl: "https://api.openai.com/v1",
497+
reasoning: true,
498+
input: ["text"],
499+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
500+
contextWindow: 200000,
501+
maxTokens: 8192,
502+
} satisfies Model<"openai-responses">,
503+
{
504+
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
505+
messages: [],
506+
tools: [],
507+
} as never,
508+
undefined,
509+
) as { input?: Array<{ content?: string }> };
510+
511+
expect(params.input?.[0]?.content).toBe("Stable prefix\nDynamic suffix");
512+
});
513+
442514
it("defaults responses tool schemas to strict on native OpenAI routes", () => {
443515
const params = buildOpenAIResponsesParams(
444516
{
@@ -689,6 +761,31 @@ describe("openai transport stream", () => {
689761
expect(params.messages?.[0]).toMatchObject({ role: "system" });
690762
});
691763

764+
it("strips the internal cache boundary from OpenAI completions system prompts", () => {
765+
const params = buildOpenAICompletionsParams(
766+
{
767+
id: "gpt-4.1",
768+
name: "GPT-4.1",
769+
api: "openai-completions",
770+
provider: "openai",
771+
baseUrl: "https://api.openai.com/v1",
772+
reasoning: false,
773+
input: ["text"],
774+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
775+
contextWindow: 200000,
776+
maxTokens: 8192,
777+
} satisfies Model<"openai-completions">,
778+
{
779+
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
780+
messages: [],
781+
tools: [],
782+
} as never,
783+
undefined,
784+
) as { messages?: Array<{ content?: string }> };
785+
786+
expect(params.messages?.[0]?.content).toBe("Stable prefix\nDynamic suffix");
787+
});
788+
692789
it("uses system role and streaming usage compat for native ModelStudio completions providers", () => {
693790
const params = buildOpenAICompletionsParams(
694791
{

src/agents/openai-transport-stream.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import {
2929
} from "./openai-responses-payload-policy.js";
3030
import { resolveProviderRequestCapabilities } from "./provider-attribution.js";
3131
import { buildGuardedModelFetch } from "./provider-transport-fetch.js";
32+
import { stripSystemPromptCacheBoundary } from "./system-prompt-cache-boundary.js";
3233
import { transformTransportMessages } from "./transport-message-transform.js";
3334
import { mergeTransportMetadata, sanitizeTransportPayloadText } from "./transport-stream-shared.js";
3435

@@ -225,7 +226,7 @@ function convertResponsesMessages(
225226
if (includeSystemPrompt && context.systemPrompt) {
226227
messages.push({
227228
role: model.reasoning && options?.supportsDeveloperRole !== false ? "developer" : "system",
228-
content: sanitizeTransportPayloadText(context.systemPrompt),
229+
content: sanitizeTransportPayloadText(stripSystemPromptCacheBoundary(context.systemPrompt)),
229230
});
230231
}
231232
let msgIndex = 0;
@@ -1294,9 +1295,15 @@ export function buildOpenAICompletionsParams(
12941295
options: OpenAICompletionsOptions | undefined,
12951296
) {
12961297
const compat = getCompat(model);
1298+
const completionsContext = context.systemPrompt
1299+
? {
1300+
...context,
1301+
systemPrompt: stripSystemPromptCacheBoundary(context.systemPrompt),
1302+
}
1303+
: context;
12971304
const params: Record<string, unknown> = {
12981305
model: model.id,
1299-
messages: convertMessages(model as never, context, compat as never),
1306+
messages: convertMessages(model as never, completionsContext, compat as never),
13001307
stream: true,
13011308
};
13021309
if (compat.supportsUsageInStreaming) {

0 commit comments

Comments
 (0)