Skip to content

Commit b27a02d

Browse files
committed
Respect cacheRetention for OpenRouter Anthropic
Anthropic models provided via OpenRouter have had caching of the system prompt enabled similarly to those provided directly via Anthropic. But they didn't respect the cacheRetention setting, instead always adding a 5 minute cache_control marker (i.e. the "short" option), even if cacheRetention was explicitly off. The setting is now respected, using 1h ttl for the "long" option or disabling cache on "none". The default behavior (cacheRetention not specified) is the "short" cache, like the direct Anthropic models.
1 parent e6897c8 commit b27a02d

4 files changed

Lines changed: 92 additions & 19 deletions

File tree

src/agents/pi-embedded-runner/anthropic-stream-wrappers.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ const PI_AI_OAUTH_ANTHROPIC_BETAS = [
1919
...PI_AI_DEFAULT_ANTHROPIC_BETAS,
2020
] as const;
2121

22-
type CacheRetention = "none" | "short" | "long";
22+
export type CacheRetention = "none" | "short" | "long";
2323

2424
function isAnthropic1MModel(modelId: string): boolean {
2525
const normalized = modelId.trim().toLowerCase();
@@ -182,13 +182,15 @@ function normalizeOpenAiStringModeAnthropicToolChoice(toolChoice: unknown): unkn
182182
export function resolveCacheRetention(
183183
extraParams: Record<string, unknown> | undefined,
184184
provider: string,
185+
modelId: string,
185186
): CacheRetention | undefined {
186187
const isAnthropicDirect = provider === "anthropic";
187188
const hasBedrockOverride =
188189
extraParams?.cacheRetention !== undefined || extraParams?.cacheControlTtl !== undefined;
189190
const isAnthropicBedrock = provider === "amazon-bedrock" && hasBedrockOverride;
191+
const isAnthropicOpenRouter = provider === "openrouter" && modelId.startsWith("anthropic/");
190192

191-
if (!isAnthropicDirect && !isAnthropicBedrock) {
193+
if (!isAnthropicDirect && !isAnthropicBedrock && !isAnthropicOpenRouter) {
192194
return undefined;
193195
}
194196

@@ -205,7 +207,7 @@ export function resolveCacheRetention(
205207
return "long";
206208
}
207209

208-
return isAnthropicDirect ? "short" : undefined;
210+
return isAnthropicDirect || isAnthropicOpenRouter ? "short" : undefined;
209211
}
210212

211213
export function resolveAnthropicBetas(

src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,25 @@ type StreamPayload = {
1111
}>;
1212
};
1313

14-
function runOpenRouterPayload(payload: StreamPayload, modelId: string) {
14+
function runOpenRouterPayload(
15+
payload: StreamPayload,
16+
modelId: string,
17+
cacheRetention: string | undefined,
18+
) {
1519
const baseStreamFn: StreamFn = (model, _context, options) => {
1620
options?.onPayload?.(payload, model);
1721
return createAssistantMessageEventStream();
1822
};
1923
const agent = { streamFn: baseStreamFn };
2024

21-
applyExtraParamsToAgent(agent, undefined, "openrouter", modelId);
25+
const conf = {
26+
agents: {
27+
defaults: {
28+
models: { [`openrouter/${modelId}`]: { params: { cacheRetention: cacheRetention } } },
29+
},
30+
},
31+
};
32+
applyExtraParamsToAgent(agent, conf, "openrouter", modelId);
2233

2334
const model = {
2435
api: "openai-completions",
@@ -39,14 +50,61 @@ describe("extra-params: OpenRouter Anthropic cache_control", () => {
3950
],
4051
};
4152

42-
runOpenRouterPayload(payload, "anthropic/claude-opus-4-6");
53+
runOpenRouterPayload(payload, "anthropic/claude-opus-4-6", "short");
4354

4455
expect(payload.messages[0].content).toEqual([
4556
{ type: "text", text: "You are a helpful assistant.", cache_control: { type: "ephemeral" } },
4657
]);
4758
expect(payload.messages[1].content).toBe("Hello");
4859
});
4960

61+
it("injects cache_control by default if cacheRetention is not set", () => {
62+
const payload = {
63+
messages: [
64+
{ role: "system", content: "You are a helpful assistant." },
65+
{ role: "user", content: "Hello" },
66+
],
67+
};
68+
69+
runOpenRouterPayload(payload, "anthropic/claude-opus-4-6", undefined);
70+
71+
expect(payload.messages[0].content).toEqual([
72+
{ type: "text", text: "You are a helpful assistant.", cache_control: { type: "ephemeral" } },
73+
]);
74+
});
75+
76+
it("doesn't inject cache_control if cacheRetention is none", () => {
77+
const payload = {
78+
messages: [
79+
{ role: "system", content: "You are a helpful assistant." },
80+
{ role: "user", content: "Hello" },
81+
],
82+
};
83+
84+
runOpenRouterPayload(payload, "anthropic/claude-opus-4-6", "none");
85+
86+
expect(payload.messages[0].content).toEqual("You are a helpful assistant.");
87+
});
88+
89+
it("injects cache_control with ttl 1h if cacheRetention is long", () => {
90+
const payload = {
91+
messages: [
92+
{ role: "system", content: "You are a helpful assistant." },
93+
{ role: "user", content: "Hello" },
94+
],
95+
};
96+
97+
runOpenRouterPayload(payload, "anthropic/claude-opus-4-6", "long");
98+
99+
expect(payload.messages[0].content).toEqual([
100+
{
101+
type: "text",
102+
text: "You are a helpful assistant.",
103+
cache_control: { type: "ephemeral", ttl: "1h" },
104+
},
105+
]);
106+
});
107+
50108
it("adds cache_control to last content block when system message is already array", () => {
51109
const payload = {
52110
messages: [
@@ -60,7 +118,7 @@ describe("extra-params: OpenRouter Anthropic cache_control", () => {
60118
],
61119
};
62120

63-
runOpenRouterPayload(payload, "anthropic/claude-opus-4-6");
121+
runOpenRouterPayload(payload, "anthropic/claude-opus-4-6", "short");
64122

65123
const content = payload.messages[0].content as Array<Record<string, unknown>>;
66124
expect(content[0]).toEqual({ type: "text", text: "Part 1" });
@@ -76,7 +134,7 @@ describe("extra-params: OpenRouter Anthropic cache_control", () => {
76134
messages: [{ role: "system", content: "You are a helpful assistant." }],
77135
};
78136

79-
runOpenRouterPayload(payload, "google/gemini-3-pro");
137+
runOpenRouterPayload(payload, "google/gemini-3-pro", "short");
80138

81139
expect(payload.messages[0].content).toBe("You are a helpful assistant.");
82140
});
@@ -86,7 +144,7 @@ describe("extra-params: OpenRouter Anthropic cache_control", () => {
86144
messages: [{ role: "user", content: "Hello" }],
87145
};
88146

89-
runOpenRouterPayload(payload, "anthropic/claude-opus-4-6");
147+
runOpenRouterPayload(payload, "anthropic/claude-opus-4-6", "short");
90148

91149
expect(payload.messages[0].content).toBe("Hello");
92150
});

src/agents/pi-embedded-runner/extra-params.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { streamSimple } from "@mariozechner/pi-ai";
44
import type { ThinkLevel } from "../../auto-reply/thinking.js";
55
import type { OpenClawConfig } from "../../config/config.js";
66
import {
7+
type CacheRetention,
78
createAnthropicBetaHeadersWrapper,
89
createAnthropicToolPayloadCompatibilityWrapper,
910
createBedrockNoCacheWrapper,
@@ -71,14 +72,15 @@ export function resolveExtraParams(params: {
7172
}
7273

7374
type CacheRetentionStreamOptions = Partial<SimpleStreamOptions> & {
74-
cacheRetention?: "none" | "short" | "long";
75+
cacheRetention?: CacheRetention;
7576
openaiWsWarmup?: boolean;
7677
};
7778

7879
function createStreamFnWithExtraParams(
7980
baseStreamFn: StreamFn | undefined,
8081
extraParams: Record<string, unknown> | undefined,
8182
provider: string,
83+
cacheRetention: CacheRetention | undefined,
8284
): StreamFn | undefined {
8385
if (!extraParams || Object.keys(extraParams).length === 0) {
8486
return undefined;
@@ -101,7 +103,6 @@ function createStreamFnWithExtraParams(
101103
if (typeof extraParams.openaiWsWarmup === "boolean") {
102104
streamParams.openaiWsWarmup = extraParams.openaiWsWarmup;
103105
}
104-
const cacheRetention = resolveCacheRetention(extraParams, provider);
105106
if (cacheRetention) {
106107
streamParams.cacheRetention = cacheRetention;
107108
}
@@ -351,7 +352,13 @@ export function applyExtraParamsToAgent(
351352
)
352353
: undefined;
353354
const merged = Object.assign({}, resolvedExtraParams, override);
354-
const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, merged, provider);
355+
const cacheRetention = resolveCacheRetention(merged, provider, modelId);
356+
const wrappedStreamFn = createStreamFnWithExtraParams(
357+
agent.streamFn,
358+
merged,
359+
provider,
360+
cacheRetention,
361+
);
355362

356363
if (wrappedStreamFn) {
357364
log.debug(`applying extraParams to agent streamFn for ${provider}/${modelId}`);
@@ -404,7 +411,7 @@ export function applyExtraParamsToAgent(
404411
const skipReasoningInjection = modelId === "auto" || isProxyReasoningUnsupported(modelId);
405412
const openRouterThinkingLevel = skipReasoningInjection ? undefined : thinkingLevel;
406413
agent.streamFn = createOpenRouterWrapper(agent.streamFn, openRouterThinkingLevel);
407-
agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn);
414+
agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn, cacheRetention);
408415
}
409416

410417
if (provider === "kilocode") {

src/agents/pi-embedded-runner/proxy-stream-wrappers.ts

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type { StreamFn } from "@mariozechner/pi-agent-core";
22
import { streamSimple } from "@mariozechner/pi-ai";
33
import type { ThinkLevel } from "../../auto-reply/thinking.js";
4+
import type { CacheRetention } from "./anthropic-stream-wrappers.js";
45

56
const OPENROUTER_APP_HEADERS: Record<string, string> = {
67
"HTTP-Referer": "https://openclaw.ai",
@@ -59,18 +60,25 @@ function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkL
5960
}
6061
}
6162

62-
export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
63+
export function createOpenRouterSystemCacheWrapper(
64+
baseStreamFn: StreamFn | undefined,
65+
cacheRetention: CacheRetention | undefined,
66+
): StreamFn {
6367
const underlying = baseStreamFn ?? streamSimple;
6468
return (model, context, options) => {
6569
if (
6670
typeof model.provider !== "string" ||
6771
typeof model.id !== "string" ||
68-
!isOpenRouterAnthropicModel(model.provider, model.id)
72+
!isOpenRouterAnthropicModel(model.provider, model.id) ||
73+
!cacheRetention ||
74+
!["short", "long"].includes(cacheRetention)
6975
) {
7076
return underlying(model, context, options);
7177
}
7278

7379
const originalOnPayload = options?.onPayload;
80+
const cacheControl =
81+
cacheRetention === "short" ? { type: "ephemeral" } : { type: "ephemeral", ttl: "1h" };
7482
return underlying(model, context, {
7583
...options,
7684
onPayload: (payload) => {
@@ -81,13 +89,11 @@ export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | unde
8189
continue;
8290
}
8391
if (typeof msg.content === "string") {
84-
msg.content = [
85-
{ type: "text", text: msg.content, cache_control: { type: "ephemeral" } },
86-
];
92+
msg.content = [{ type: "text", text: msg.content, cache_control: cacheControl }];
8793
} else if (Array.isArray(msg.content) && msg.content.length > 0) {
8894
const last = msg.content[msg.content.length - 1];
8995
if (last && typeof last === "object") {
90-
(last as Record<string, unknown>).cache_control = { type: "ephemeral" };
96+
(last as Record<string, unknown>).cache_control = cacheControl;
9197
}
9298
}
9399
}

0 commit comments

Comments
 (0)