Skip to content

Commit eef24d4

Browse files
authored
fix(models): preserve provider prompt cache boundaries
Split Anthropic system prompts at the cache boundary so only stable prefixes get cache_control, strip the internal marker when cache control is disabled, and keep OpenAI-compatible Anthropic cache-control routes from caching dynamic suffixes.\n\nFixes #89386.
1 parent c3baec7 commit eef24d4

4 files changed

Lines changed: 216 additions & 24 deletions

File tree

src/llm/providers/anthropic.test.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { beforeEach, describe, expect, it, vi } from "vitest";
2+
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "../../agents/system-prompt-cache-boundary.js";
23
import type { Context, Model } from "../types.js";
34

45
const anthropicMockState = vi.hoisted(() => ({
@@ -216,4 +217,66 @@ describe("Anthropic provider", () => {
216217
expect(result.stopReason).toBe("error");
217218
expect((capturedPayload as { stop_sequences?: unknown }).stop_sequences).toEqual(["STOP"]);
218219
});
220+
221+
it("splits the system prompt cache boundary into cached and uncached Anthropic blocks", async () => {
222+
let capturedPayload: unknown;
223+
const stream = streamSimpleAnthropic(
224+
makeAnthropicModel(),
225+
{
226+
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
227+
messages: [{ role: "user", content: "hello", timestamp: 0 }],
228+
},
229+
{
230+
apiKey: "sk-ant-provider",
231+
onPayload: (payload) => {
232+
capturedPayload = payload;
233+
throw new Error("stop before network");
234+
},
235+
},
236+
);
237+
238+
const result = await stream.result();
239+
240+
expect(result.stopReason).toBe("error");
241+
expect((capturedPayload as { system?: unknown }).system).toEqual([
242+
{
243+
type: "text",
244+
text: "Stable prefix",
245+
cache_control: { type: "ephemeral" },
246+
},
247+
{
248+
type: "text",
249+
text: "Dynamic suffix",
250+
},
251+
]);
252+
});
253+
254+
it("strips the internal cache boundary when Anthropic cache control is disabled", async () => {
255+
let capturedPayload: unknown;
256+
const stream = streamSimpleAnthropic(
257+
makeAnthropicModel(),
258+
{
259+
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
260+
messages: [{ role: "user", content: "hello", timestamp: 0 }],
261+
},
262+
{
263+
apiKey: "sk-ant-provider",
264+
cacheRetention: "none",
265+
onPayload: (payload) => {
266+
capturedPayload = payload;
267+
throw new Error("stop before network");
268+
},
269+
},
270+
);
271+
272+
const result = await stream.result();
273+
274+
expect(result.stopReason).toBe("error");
275+
expect((capturedPayload as { system?: unknown }).system).toEqual([
276+
{
277+
type: "text",
278+
text: "Stable prefix\nDynamic suffix",
279+
},
280+
]);
281+
});
219282
});

src/llm/providers/anthropic.ts

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@ import type {
55
MessageCreateParamsStreaming,
66
MessageParam,
77
RawMessageStreamEvent,
8+
TextBlockParam,
89
} from "@anthropic-ai/sdk/resources/messages.js";
10+
import {
11+
splitSystemPromptCacheBoundary,
12+
stripSystemPromptCacheBoundary,
13+
} from "../../agents/system-prompt-cache-boundary.js";
914
import { getEnvApiKey } from "../env-api-keys.js";
1015
import { calculateCost, clampThinkingLevel } from "../model-utils.js";
1116
import type {
@@ -951,21 +956,10 @@ function buildParams(
951956
},
952957
];
953958
if (context.systemPrompt) {
954-
params.system.push({
955-
type: "text",
956-
text: sanitizeSurrogates(context.systemPrompt),
957-
...(cacheControl ? { cache_control: cacheControl } : {}),
958-
});
959+
params.system.push(...buildSystemPromptBlocks(context.systemPrompt, cacheControl));
959960
}
960961
} else if (context.systemPrompt) {
961-
// Add cache control to system prompt for non-OAuth tokens
962-
params.system = [
963-
{
964-
type: "text",
965-
text: sanitizeSurrogates(context.systemPrompt),
966-
...(cacheControl ? { cache_control: cacheControl } : {}),
967-
},
968-
];
962+
params.system = buildSystemPromptBlocks(context.systemPrompt, cacheControl);
969963
}
970964

971965
// Temperature is incompatible with extended thinking (adaptive or budget-based).
@@ -1220,6 +1214,41 @@ function convertMessages(
12201214
return params;
12211215
}
12221216

1217+
function buildSystemPromptBlocks(
1218+
systemPrompt: string,
1219+
cacheControl: CacheControlEphemeral | undefined,
1220+
): TextBlockParam[] {
1221+
if (!cacheControl) {
1222+
return [
1223+
{ type: "text", text: sanitizeSurrogates(stripSystemPromptCacheBoundary(systemPrompt)) },
1224+
];
1225+
}
1226+
1227+
const split = splitSystemPromptCacheBoundary(systemPrompt);
1228+
if (!split) {
1229+
return [
1230+
{
1231+
type: "text",
1232+
text: sanitizeSurrogates(systemPrompt),
1233+
cache_control: cacheControl,
1234+
},
1235+
];
1236+
}
1237+
1238+
const blocks: TextBlockParam[] = [];
1239+
if (split.stablePrefix) {
1240+
blocks.push({
1241+
type: "text",
1242+
text: sanitizeSurrogates(split.stablePrefix),
1243+
cache_control: cacheControl,
1244+
});
1245+
}
1246+
if (split.dynamicSuffix) {
1247+
blocks.push({ type: "text", text: sanitizeSurrogates(split.dynamicSuffix) });
1248+
}
1249+
return blocks.length > 0 ? blocks : [{ type: "text", text: "" }];
1250+
}
1251+
12231252
function shouldUseFineGrainedToolStreamingBeta(
12241253
model: Model<"anthropic-messages">,
12251254
context: Context,

src/llm/providers/openai-completions.test.ts

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import type { ChatCompletionChunk } from "openai/resources/chat/completions.js";
22
import { describe, expect, it, vi } from "vitest";
3+
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "../../agents/system-prompt-cache-boundary.js";
34
import type { Context, Model } from "../types.js";
45

56
type DeepPartial<T> = { [P in keyof T]?: DeepPartial<T[P]> };
@@ -224,6 +225,75 @@ describe("OpenAI-compatible completions params", () => {
224225
expect(capturedCacheKey).toBeUndefined();
225226
expect(capturedRetention).toBe("24h");
226227
});
228+
229+
it("strips the internal cache boundary from OpenAI-compatible system prompts", async () => {
230+
let capturedMessages: unknown;
231+
const stream = streamOpenAICompletions(
232+
createModel(32_000),
233+
{
234+
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
235+
messages: [{ role: "user", content: "hi", timestamp: 1 }],
236+
},
237+
{
238+
apiKey: "sk-test",
239+
onPayload(payload) {
240+
capturedMessages = (payload as { messages?: unknown }).messages;
241+
throw new Error("stop before network");
242+
},
243+
},
244+
);
245+
246+
const result = await stream.result();
247+
248+
expect(result.stopReason).toBe("error");
249+
const messages = capturedMessages as Array<{ role: string; content: unknown }>;
250+
expect(messages[0]).toEqual({
251+
role: "system",
252+
content: "Stable prefix\nDynamic suffix",
253+
});
254+
});
255+
256+
it("splits the cache boundary before applying Anthropic cache control for OpenRouter Anthropic models", async () => {
257+
let capturedMessages: unknown;
258+
const stream = streamOpenAICompletions(
259+
{
260+
...createModel(32_000),
261+
id: "anthropic/claude-sonnet-4.6",
262+
provider: "openrouter",
263+
baseUrl: "https://openrouter.ai/api/v1",
264+
},
265+
{
266+
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
267+
messages: [{ role: "user", content: "hi", timestamp: 1 }],
268+
},
269+
{
270+
apiKey: "sk-test",
271+
onPayload(payload) {
272+
capturedMessages = (payload as { messages?: unknown }).messages;
273+
throw new Error("stop before network");
274+
},
275+
},
276+
);
277+
278+
const result = await stream.result();
279+
280+
expect(result.stopReason).toBe("error");
281+
const messages = capturedMessages as Array<{ role: string; content: unknown }>;
282+
expect(messages[0]).toEqual({
283+
role: "system",
284+
content: [
285+
{
286+
type: "text",
287+
text: "Stable prefix",
288+
cache_control: { type: "ephemeral" },
289+
},
290+
{
291+
type: "text",
292+
text: "Dynamic suffix",
293+
},
294+
],
295+
});
296+
});
227297
});
228298

229299
describe("openai-completions stop-reason tool-call guard", () => {

src/llm/providers/openai-completions.ts

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ import type {
1010
ChatCompletionSystemMessageParam,
1111
ChatCompletionToolMessageParam,
1212
} from "openai/resources/chat/completions.js";
13+
import {
14+
splitSystemPromptCacheBoundary,
15+
stripSystemPromptCacheBoundary,
16+
} from "../../agents/system-prompt-cache-boundary.js";
1317
import { createReasoningTagTextPartitioner } from "../../shared/text/reasoning-tag-text-partitioner.js";
1418
import { getEnvApiKey } from "../env-api-keys.js";
1519
import { calculateCost, clampThinkingLevel } from "../model-utils.js";
@@ -584,8 +588,10 @@ function buildParams(
584588
compat: ResolvedOpenAICompletionsCompat = getCompat(model),
585589
cacheRetention: CacheRetention = resolveCacheRetention(options?.cacheRetention),
586590
) {
587-
const messages = convertMessages(model, context, compat);
588591
const cacheControl = getCompatCacheControl(compat, cacheRetention);
592+
const messages = convertMessages(model, context, compat, {
593+
preserveSystemPromptCacheBoundary: cacheControl !== undefined,
594+
});
589595

590596
type ChatCompletionRequestParams = Omit<
591597
OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming,
@@ -835,13 +841,7 @@ function addCacheControlToTextContent(
835841
if (content.length === 0) {
836842
return false;
837843
}
838-
message.content = [
839-
{
840-
type: "text",
841-
text: content,
842-
cache_control: cacheControl,
843-
},
844-
] as ChatCompletionTextPartWithCacheControl[];
844+
message.content = buildCacheControlledTextParts(content, cacheControl);
845845
return true;
846846
}
847847

@@ -852,19 +852,43 @@ function addCacheControlToTextContent(
852852
for (let i = content.length - 1; i >= 0; i--) {
853853
const part = content[i];
854854
if (part?.type === "text") {
855-
const textPart = part as ChatCompletionTextPartWithCacheControl;
856-
textPart.cache_control = cacheControl;
855+
const text = (part as ChatCompletionTextPartWithCacheControl).text;
856+
content.splice(i, 1, ...buildCacheControlledTextParts(text, cacheControl));
857857
return true;
858858
}
859859
}
860860

861861
return false;
862862
}
863863

864+
function buildCacheControlledTextParts(
865+
text: string,
866+
cacheControl: OpenAICompatCacheControl,
867+
): ChatCompletionTextPartWithCacheControl[] {
868+
const split = splitSystemPromptCacheBoundary(text);
869+
if (!split) {
870+
return [{ type: "text", text, cache_control: cacheControl }];
871+
}
872+
873+
const parts: ChatCompletionTextPartWithCacheControl[] = [];
874+
if (split.stablePrefix) {
875+
parts.push({
876+
type: "text",
877+
text: split.stablePrefix,
878+
cache_control: cacheControl,
879+
});
880+
}
881+
if (split.dynamicSuffix) {
882+
parts.push({ type: "text", text: split.dynamicSuffix });
883+
}
884+
return parts.length > 0 ? parts : [{ type: "text", text: "" }];
885+
}
886+
864887
export function convertMessages(
865888
model: Model<"openai-completions">,
866889
context: Context,
867890
compat: ResolvedOpenAICompletionsCompat,
891+
options: { preserveSystemPromptCacheBoundary?: boolean } = {},
868892
): ChatCompletionMessageParam[] {
869893
const params: ChatCompletionMessageParam[] = [];
870894

@@ -892,7 +916,13 @@ export function convertMessages(
892916
if (context.systemPrompt) {
893917
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
894918
const role = useDeveloperRole ? "developer" : "system";
895-
params.push({ role, content: sanitizeSurrogates(context.systemPrompt) });
919+
const systemPrompt = options.preserveSystemPromptCacheBoundary
920+
? context.systemPrompt
921+
: stripSystemPromptCacheBoundary(context.systemPrompt);
922+
params.push({
923+
role,
924+
content: sanitizeSurrogates(systemPrompt),
925+
});
896926
}
897927

898928
let lastRole: string | null = null;

0 commit comments

Comments
 (0)