Skip to content

Commit 0a3211d

Browse files
authored
fix(openrouter): gate prompt cache markers by endpoint (#60761)
* fix(openrouter): gate prompt cache markers by endpoint * test(openrouter): use claude sonnet 4.6 cache model
1 parent ee742ce commit 0a3211d

5 files changed

Lines changed: 102 additions & 7 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ Docs: https://docs.openclaw.ai
172172
- Exec approvals/channels: decouple initiating-surface approval availability from native delivery enablement so Telegram, Slack, and Discord still expose approvals when approvers exist and native target routing is configured separately. (#59776) Thanks @joelnishanth.
173173
- Agents/logging: keep orphaned-user transcript repair warnings focused on interactive runs, and downgrade background-trigger repairs (`heartbeat`, `cron`, `memory`, `overflow`) to debug logs to reduce false-alarm gateway noise.
174174
- Gateway/node pairing: require `operator.pairing` for node approvals end-to-end, while still requiring `operator.write` or `operator.admin` when the pending node commands need those higher scopes. (#60461) Thanks @eleqtrizit.
175+
- Providers/OpenRouter: gate Anthropic prompt-cache `cache_control` markers to native/default OpenRouter routes and preserve them for native OpenRouter hosts behind custom provider ids. Thanks @vincentkoc.
175176

176177
## 2026.4.1
177178

extensions/openrouter/index.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ export default definePluginEntry({
2323
const {
2424
buildPassthroughGeminiSanitizingReplayPolicy,
2525
composeProviderStreamWrappers,
26-
createOpenRouterSystemCacheWrapper,
2726
createOpenRouterWrapper,
2827
createProviderApiKeyAuthMethod,
2928
DEFAULT_CONTEXT_TOKENS,
@@ -146,7 +145,6 @@ export default definePluginEntry({
146145
? (streamFn) => injectOpenRouterRouting(streamFn, providerRouting)
147146
: undefined,
148147
(streamFn) => createOpenRouterWrapper(streamFn, openRouterThinkingLevel),
149-
(streamFn) => createOpenRouterSystemCacheWrapper(streamFn),
150148
);
151149
},
152150
isCacheTtlEligible: (ctx) => isOpenRouterCacheTtlModel(ctx.modelId),

src/agents/pi-embedded-runner/extra-params.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import {
1717
shouldApplySiliconFlowThinkingOffCompat,
1818
} from "./moonshot-stream-wrappers.js";
1919
import { createOpenAIResponsesContextManagementWrapper } from "./openai-stream-wrappers.js";
20+
import { createOpenRouterSystemCacheWrapper } from "./proxy-stream-wrappers.js";
2021
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
2122

2223
const defaultProviderRuntimeDeps = {
@@ -328,6 +329,8 @@ function applyPrePluginStreamWrappers(ctx: ApplyExtraParamsContext): void {
328329
function applyPostPluginStreamWrappers(
329330
ctx: ApplyExtraParamsContext & { providerWrapperHandled: boolean },
330331
): void {
332+
ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(ctx.agent.streamFn);
333+
331334
if (!ctx.providerWrapperHandled) {
332335
// Guard Google-family payloads against invalid negative thinking budgets
333336
// emitted by upstream model-ID heuristics for Gemini 3.1 variants.

src/agents/pi-embedded-runner/proxy-stream-wrappers.test.ts

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ import type { StreamFn } from "@mariozechner/pi-agent-core";
22
import type { Context, Model } from "@mariozechner/pi-ai";
33
import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
44
import { describe, expect, it } from "vitest";
5-
import { createOpenRouterWrapper } from "./proxy-stream-wrappers.js";
5+
import {
6+
createOpenRouterSystemCacheWrapper,
7+
createOpenRouterWrapper,
8+
} from "./proxy-stream-wrappers.js";
69

710
describe("proxy stream wrappers", () => {
811
it("adds OpenRouter attribution headers to stream options", () => {
@@ -35,4 +38,79 @@ describe("proxy stream wrappers", () => {
3538
},
3639
]);
3740
});
41+
42+
it("injects cache_control markers for declared OpenRouter Anthropic models on the default route", () => {
43+
const payload = {
44+
messages: [{ role: "system", content: "system prompt" }],
45+
};
46+
const baseStreamFn: StreamFn = (model, _context, options) => {
47+
options?.onPayload?.(payload, model);
48+
return createAssistantMessageEventStream();
49+
};
50+
51+
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
52+
void wrapped(
53+
{
54+
api: "openai-completions",
55+
provider: "openrouter",
56+
id: "anthropic/claude-sonnet-4.6",
57+
} as Model<"openai-completions">,
58+
{ messages: [] },
59+
{},
60+
);
61+
62+
expect(payload.messages[0]?.content).toEqual([
63+
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
64+
]);
65+
});
66+
67+
it("does not inject cache_control markers for declared OpenRouter providers on custom proxy URLs", () => {
68+
const payload = {
69+
messages: [{ role: "system", content: "system prompt" }],
70+
};
71+
const baseStreamFn: StreamFn = (model, _context, options) => {
72+
options?.onPayload?.(payload, model);
73+
return createAssistantMessageEventStream();
74+
};
75+
76+
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
77+
void wrapped(
78+
{
79+
api: "openai-completions",
80+
provider: "openrouter",
81+
id: "anthropic/claude-sonnet-4.6",
82+
baseUrl: "https://proxy.example.com/v1",
83+
} as Model<"openai-completions">,
84+
{ messages: [] },
85+
{},
86+
);
87+
88+
expect(payload.messages[0]?.content).toBe("system prompt");
89+
});
90+
91+
it("injects cache_control markers for native OpenRouter hosts behind custom provider ids", () => {
92+
const payload = {
93+
messages: [{ role: "system", content: "system prompt" }],
94+
};
95+
const baseStreamFn: StreamFn = (model, _context, options) => {
96+
options?.onPayload?.(payload, model);
97+
return createAssistantMessageEventStream();
98+
};
99+
100+
const wrapped = createOpenRouterSystemCacheWrapper(baseStreamFn);
101+
void wrapped(
102+
{
103+
api: "openai-completions",
104+
provider: "custom-openrouter",
105+
id: "anthropic/claude-sonnet-4.6",
106+
baseUrl: "https://openrouter.ai/api/v1",
107+
} as Model<"openai-completions">,
108+
{ messages: [] },
109+
{},
110+
);
111+
112+
expect(payload.messages[0]?.content).toEqual([
113+
{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } },
114+
]);
115+
});
38116
});

src/agents/pi-embedded-runner/proxy-stream-wrappers.ts

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import type { StreamFn } from "@mariozechner/pi-agent-core";
22
import { streamSimple } from "@mariozechner/pi-ai";
33
import type { ThinkLevel } from "../../auto-reply/thinking.js";
4+
import { resolveProviderRequestPolicy } from "../provider-attribution.js";
45
import { isProxyReasoningUnsupportedModelHint } from "../../plugin-sdk/provider-model-shared.js";
56
import { resolveProviderRequestPolicyConfig } from "../provider-request-config.js";
67
import { applyAnthropicEphemeralCacheControlMarkers } from "./anthropic-cache-control-payload.js";
7-
import { isOpenRouterAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
8+
import { isAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
89
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
910
const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE";
1011
const KILOCODE_FEATURE_DEFAULT = "openclaw";
@@ -58,10 +59,24 @@ function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkL
5859
export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
5960
const underlying = baseStreamFn ?? streamSimple;
6061
return (model, context, options) => {
62+
const provider = typeof model.provider === "string" ? model.provider : undefined;
63+
const modelId = typeof model.id === "string" ? model.id : undefined;
64+
// Keep OpenRouter-specific cache markers on verified OpenRouter routes
65+
// (or the provider's default route), but not on arbitrary OpenAI proxies.
66+
const endpointClass = resolveProviderRequestPolicy({
67+
provider,
68+
api: typeof model.api === "string" ? model.api : undefined,
69+
baseUrl: typeof model.baseUrl === "string" ? model.baseUrl : undefined,
70+
capability: "llm",
71+
transport: "stream",
72+
}).endpointClass;
6173
if (
62-
typeof model.provider !== "string" ||
63-
typeof model.id !== "string" ||
64-
!isOpenRouterAnthropicModelRef(model.provider, model.id)
74+
!modelId ||
75+
!isAnthropicModelRef(modelId) ||
76+
!(
77+
endpointClass === "openrouter" ||
78+
(endpointClass === "default" && provider?.trim().toLowerCase() === "openrouter")
79+
)
6580
) {
6681
return underlying(model, context, options);
6782
}

0 commit comments

Comments
 (0)