Skip to content

Commit 535a019

Browse files
committed
fix(cache): honour explicit cacheRetention for OpenRouter→Anthropic models
cacheRetention: "long" was silently ignored for models routed through OpenRouter, producing only 5-minute ephemeral cache markers instead of the expected 1-hour TTL. cacheRetention: "none" was also ignored. Two issues in the chain: 1. resolveCacheRetention() bailed out early when the provider didn't match a known Anthropic/Google cache family, even when the operator had explicitly configured cacheRetention in model params. The explicit-config checks now run before the family-based early return. The application of cache markers is separately gated by endpoint-class and model-ref checks in createOpenRouterSystemCacheWrapper, so returning a value for any provider is safe — unsupported routes skip marker insertion. 2. applyAnthropicEphemeralCacheControlMarkers() always hardcoded { type: "ephemeral" }, discarding any TTL the wrapper resolved. It now accepts an optional cacheControl parameter (defaults to { type: "ephemeral" } for backward compat) and a skipMarkerInsertion flag for the "none" case. When cacheRetention is "none", no new cache_control markers are inserted on system/developer messages, but the thinking-block sanitizer (stripping stale cache_control from thinking/redacted_thinking blocks) still runs. Before: OpenRouter→Anthropic requests always got cache_control without ttl (or with markers despite "none"), regardless of config. After: cacheRetention: "long" → { type: "ephemeral", ttl: "1h" }; cacheRetention: "none" → no new markers, sanitizer still active.
1 parent 9ba545a commit 535a019

7 files changed

Lines changed: 227 additions & 41 deletions

src/agents/anthropic-payload-policy.ts

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ function isLongTtlEligibleEndpoint(baseUrl: string | undefined): boolean {
4949
);
5050
}
5151

52-
function resolveAnthropicEphemeralCacheControl(
52+
export function resolveAnthropicEphemeralCacheControl(
5353
baseUrl: string | undefined,
5454
cacheRetention: AnthropicPayloadPolicyInput["cacheRetention"],
5555
): AnthropicEphemeralCacheControl | undefined {
@@ -225,6 +225,8 @@ export function applyAnthropicPayloadPolicyToParams(
225225

226226
export function applyAnthropicEphemeralCacheControlMarkers(
227227
payloadObj: Record<string, unknown>,
228+
cacheControl: AnthropicEphemeralCacheControl = { type: "ephemeral" },
229+
skipMarkerInsertion: boolean = false,
228230
): void {
229231
const messages = payloadObj.messages;
230232
if (!Array.isArray(messages)) {
@@ -233,18 +235,18 @@ export function applyAnthropicEphemeralCacheControlMarkers(
233235

234236
for (const message of messages as Array<{ role?: string; content?: unknown }>) {
235237
if (message.role === "system" || message.role === "developer") {
236-
if (typeof message.content === "string") {
237-
message.content = [
238-
{ type: "text", text: message.content, cache_control: { type: "ephemeral" } },
239-
];
240-
continue;
241-
}
242-
if (Array.isArray(message.content) && message.content.length > 0) {
243-
const last = message.content[message.content.length - 1];
244-
if (last && typeof last === "object") {
245-
const record = last as Record<string, unknown>;
246-
if (record.type !== "thinking" && record.type !== "redacted_thinking") {
247-
record.cache_control = { type: "ephemeral" };
238+
if (!skipMarkerInsertion) {
239+
if (typeof message.content === "string") {
240+
message.content = [{ type: "text", text: message.content, cache_control: cacheControl }];
241+
continue;
242+
}
243+
if (Array.isArray(message.content) && message.content.length > 0) {
244+
const last = message.content[message.content.length - 1];
245+
if (last && typeof last === "object") {
246+
const record = last as Record<string, unknown>;
247+
if (record.type !== "thinking" && record.type !== "redacted_thinking") {
248+
record.cache_control = cacheControl;
249+
}
248250
}
249251
}
250252
}

src/agents/pi-embedded-runner/anthropic-cache-control-payload.test.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,29 @@ describe("applyAnthropicEphemeralCacheControlMarkers", () => {
3232
},
3333
]);
3434
});
35+
36+
it("preserves ttl in custom cacheControl when provided", () => {
37+
const payload = {
38+
messages: [
39+
{
40+
role: "system",
41+
content: "system prompt",
42+
},
43+
],
44+
} satisfies Record<string, unknown>;
45+
46+
applyAnthropicEphemeralCacheControlMarkers(payload, {
47+
type: "ephemeral",
48+
ttl: "1h",
49+
});
50+
51+
expect(payload.messages).toEqual([
52+
{
53+
role: "system",
54+
content: [
55+
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral", ttl: "1h" } },
56+
],
57+
},
58+
]);
59+
});
3560
});

src/agents/pi-embedded-runner/extra-params.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,16 @@ function applyPrePluginStreamWrappers(ctx: ApplyExtraParamsContext): void {
683683
function applyPostPluginStreamWrappers(
684684
ctx: ApplyExtraParamsContext & { providerWrapperHandled: boolean },
685685
): void {
686-
ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(ctx.agent.streamFn);
686+
const openRouterCacheRetention = resolveCacheRetention(
687+
ctx.effectiveExtraParams,
688+
ctx.provider,
689+
typeof ctx.model?.api === "string" ? ctx.model.api : undefined,
690+
ctx.modelId,
691+
);
692+
ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(
693+
ctx.agent.streamFn,
694+
openRouterCacheRetention,
695+
);
687696
ctx.agent.streamFn = createOpenAIStringContentWrapper(ctx.agent.streamFn);
688697
ctx.agent.streamFn = createOpenAICompletionsToolsCompatWrapper(ctx.agent.streamFn);
689698

src/agents/pi-embedded-runner/prompt-cache-retention.test.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,69 @@ describe("prompt cache retention", () => {
2929
resolveCacheRetention(undefined, "google", "google-generative-ai", "gemini-3.1-pro-preview"),
3030
).toBeUndefined();
3131
});
32+
it("honours explicit cacheRetention for OpenRouter Anthropic models", () => {
33+
expect(
34+
resolveCacheRetention(
35+
{ cacheRetention: "long" },
36+
"openrouter",
37+
"openai-completions",
38+
"anthropic/claude-haiku-4.5",
39+
),
40+
).toBe("long");
41+
});
42+
43+
it('honours explicit cacheRetention "short" for proxy providers', () => {
44+
expect(
45+
resolveCacheRetention(
46+
{ cacheRetention: "short" },
47+
"openrouter",
48+
"openai-completions",
49+
"anthropic/claude-sonnet-4.6",
50+
),
51+
).toBe("short");
52+
});
53+
54+
it('honours explicit cacheRetention "none" for proxy providers', () => {
55+
expect(
56+
resolveCacheRetention(
57+
{ cacheRetention: "none" },
58+
"openrouter",
59+
"openai-completions",
60+
"anthropic/claude-sonnet-4.6",
61+
),
62+
).toBe("none");
63+
});
64+
65+
it("returns undefined for proxy providers without explicit config", () => {
66+
expect(
67+
resolveCacheRetention(
68+
undefined,
69+
"openrouter",
70+
"openai-completions",
71+
"anthropic/claude-sonnet-4.6",
72+
),
73+
).toBeUndefined();
74+
});
75+
it("ignores explicit cacheRetention for non-OpenRouter providers without cache family", () => {
76+
expect(
77+
resolveCacheRetention(
78+
{ cacheRetention: "long" },
79+
"amazon-bedrock",
80+
"openai-completions",
81+
"some/non-anthropic-model",
82+
),
83+
).toBeUndefined();
84+
});
85+
it("ignores explicit cacheRetention for OpenRouter non-Anthropic models", () => {
86+
expect(
87+
resolveCacheRetention(
88+
{ cacheRetention: "long" },
89+
"openrouter",
90+
"openai-completions",
91+
"deepseek/deepseek-r1",
92+
),
93+
).toBeUndefined();
94+
});
3295

3396
it("identifies supported direct Google cache families", () => {
3497
expect(

src/agents/pi-embedded-runner/prompt-cache-retention.ts

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js";
2-
import { resolveAnthropicCacheRetentionFamily } from "./anthropic-family-cache-semantics.js";
2+
import {
3+
isAnthropicModelRef,
4+
resolveAnthropicCacheRetentionFamily,
5+
} from "./anthropic-family-cache-semantics.js";
36

47
type CacheRetention = "none" | "short" | "long";
58

@@ -22,6 +25,7 @@ export function resolveCacheRetention(
2225
): CacheRetention | undefined {
2326
const hasExplicitCacheConfig =
2427
extraParams?.cacheRetention !== undefined || extraParams?.cacheControlTtl !== undefined;
28+
2529
const family = resolveAnthropicCacheRetentionFamily({
2630
provider,
2731
modelApi,
@@ -30,21 +34,31 @@ export function resolveCacheRetention(
3034
});
3135
const googleEligible = isGooglePromptCacheEligible({ modelApi, modelId });
3236

33-
if (!family && !googleEligible) {
34-
return undefined;
35-
}
37+
// Determine if this is a verified OpenRouter→Anthropic route.
38+
// OpenRouter uses the "openai-completions" API and the model ref
39+
// starts with "anthropic/". This mirrors the endpoint-class +
40+
// model-ref check in createOpenRouterSystemCacheWrapper.
41+
const isOpenRouterAnthropicRoute =
42+
provider === "openrouter" && modelId != null && isAnthropicModelRef(modelId);
3643

37-
const newVal = extraParams?.cacheRetention;
38-
if (newVal === "none" || newVal === "short" || newVal === "long") {
39-
return newVal;
40-
}
44+
const isEligible = !!family || googleEligible || isOpenRouterAnthropicRoute;
4145

42-
const legacy = extraParams?.cacheControlTtl;
43-
if (legacy === "5m") {
44-
return "short";
46+
if (hasExplicitCacheConfig && isEligible) {
47+
const newVal = extraParams?.cacheRetention;
48+
if (newVal === "none" || newVal === "short" || newVal === "long") {
49+
return newVal;
50+
}
51+
const legacy = extraParams?.cacheControlTtl;
52+
if (legacy === "5m") {
53+
return "short";
54+
}
55+
if (legacy === "1h") {
56+
return "long";
57+
}
4558
}
46-
if (legacy === "1h") {
47-
return "long";
59+
60+
if (!family && !googleEligible) {
61+
return undefined;
4862
}
4963

5064
return family === "anthropic-direct" ? "short" : undefined;

src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ import {
77
createOpenRouterWrapper,
88
} from "./proxy-stream-wrappers.js";
99

10-
function runSystemCacheWrapper(model: Partial<Model<"openai-completions">>) {
10+
function runSystemCacheWrapper(
11+
model: Partial<Model<"openai-completions">>,
12+
cacheRetention?: "short" | "long" | "none",
13+
) {
1114
const payload = {
1215
messages: [{ role: "system", content: "system prompt" }],
1316
};
@@ -16,7 +19,7 @@ function runSystemCacheWrapper(model: Partial<Model<"openai-completions">>) {
1619
return createAssistantMessageEventStream();
1720
};
1821

19-
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
22+
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn, cacheRetention);
2023
void wrapped(
2124
{
2225
api: "openai-completions",
@@ -210,4 +213,62 @@ describe("proxy stream wrappers", () => {
210213
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
211214
]);
212215
});
216+
217+
it("includes ttl: 1h in cache_control when cacheRetention is long", () => {
218+
const payload = runSystemCacheWrapper({}, "long");
219+
expect(payload.messages[0]?.content).toEqual([
220+
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral", ttl: "1h" } },
221+
]);
222+
});
223+
224+
it("omits ttl in cache_control when cacheRetention is short", () => {
225+
const payload = runSystemCacheWrapper({}, "short");
226+
expect(payload.messages[0]?.content).toEqual([
227+
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
228+
]);
229+
});
230+
231+
it("omits ttl in cache_control when cacheRetention is not provided", () => {
232+
const payload = runSystemCacheWrapper({});
233+
expect(payload.messages[0]?.content).toEqual([
234+
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
235+
]);
236+
});
237+
238+
it("skips cache_control markers when cacheRetention is none but still strips thinking cache_control", () => {
239+
const payload = {
240+
messages: [
241+
{ role: "system", content: "system prompt" },
242+
{
243+
role: "assistant",
244+
content: [
245+
{ type: "thinking", text: "draft", cache_control: { type: "ephemeral" } },
246+
{ type: "text", text: "answer" },
247+
],
248+
},
249+
],
250+
} satisfies Record<string, unknown>;
251+
const baseStreamFn: StreamFn = (_resolvedModel, _context, options) => {
252+
options?.onPayload?.(payload, {} as any);
253+
return createAssistantMessageEventStream();
254+
};
255+
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn, "none");
256+
wrapped({} as any, {} as any, { onPayload: () => {} } as any);
257+
// System message: no cache_control added
258+
expect(payload.messages[0]?.content).toBe("system prompt");
259+
// Thinking block: cache_control stripped (sanitizer still active)
260+
expect(payload.messages[1]?.content[0]).not.toHaveProperty("cache_control");
261+
});
262+
263+
it("honours cacheRetention on custom provider pointing to openrouter.ai", () => {
264+
const payload = runSystemCacheWrapper(
265+
{ provider: "custom-openrouter", baseUrl: "https://openrouter.ai/api/v1" } as Partial<
266+
Model<"openai-completions">
267+
>,
268+
{ cacheRetention: "long" },
269+
);
270+
expect(payload.messages[0]?.content).toEqual([
271+
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral", ttl: "1h" } },
272+
]);
273+
});
213274
});

src/agents/pi-embedded-runner/proxy-stream-wrappers.ts

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@ import type { StreamFn } from "@mariozechner/pi-agent-core";
22
import { streamSimple } from "@mariozechner/pi-ai";
33
import type { ThinkLevel } from "../../auto-reply/thinking.js";
44
import { normalizeOptionalLowercaseString, readStringValue } from "../../shared/string-coerce.js";
5+
import {
6+
applyAnthropicEphemeralCacheControlMarkers,
7+
resolveAnthropicEphemeralCacheControl,
8+
type AnthropicEphemeralCacheControl,
9+
} from "../anthropic-payload-policy.js";
510
import { resolveProviderRequestPolicy } from "../provider-attribution.js";
611
import { resolveProviderRequestPolicyConfig } from "../provider-request-config.js";
7-
import { applyAnthropicEphemeralCacheControlMarkers } from "./anthropic-cache-control-payload.js";
812
import { isAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
913
import { mapThinkingLevelToReasoningEffort } from "./reasoning-effort-utils.js";
1014
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
@@ -150,7 +154,10 @@ function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkL
150154
}
151155
}
152156

153-
export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
157+
export function createOpenRouterSystemCacheWrapper(
158+
baseStreamFn: StreamFn | undefined,
159+
cacheRetention?: "short" | "long" | "none",
160+
): StreamFn {
154161
const underlying = baseStreamFn ?? streamSimple;
155162
return (model, context, options) => {
156163
const provider = readStringValue(model.provider);
@@ -164,19 +171,24 @@ export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | unde
164171
capability: "llm",
165172
transport: "stream",
166173
}).endpointClass;
167-
if (
168-
!modelId ||
169-
!isAnthropicModelRef(modelId) ||
170-
!(
171-
endpointClass === "openrouter" ||
172-
(endpointClass === "default" && normalizeOptionalLowercaseString(provider) === "openrouter")
173-
)
174-
) {
174+
const isOpenRouterRoute =
175+
endpointClass === "openrouter" ||
176+
(endpointClass === "default" && normalizeOptionalLowercaseString(provider) === "openrouter");
177+
if (!modelId || !isAnthropicModelRef(modelId) || !isOpenRouterRoute) {
175178
return underlying(model, context, options);
176179
}
177-
180+
// cacheRetention "none" means no new cache markers, but the sanitizer
181+
// (thinking/redacted_thinking cleanup) must still run.
182+
const cacheControl: AnthropicEphemeralCacheControl | undefined =
183+
cacheRetention === "none"
184+
? undefined
185+
: resolveAnthropicEphemeralCacheControl(undefined, cacheRetention);
178186
return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {
179-
applyAnthropicEphemeralCacheControlMarkers(payloadObj);
187+
applyAnthropicEphemeralCacheControlMarkers(
188+
payloadObj,
189+
cacheControl ?? { type: "ephemeral" },
190+
cacheRetention === "none",
191+
);
180192
});
181193
};
182194
}

0 commit comments

Comments
 (0)