Skip to content

Commit b150a44

Browse files
committed
fix(mistral): migrate cached read costs
1 parent cbb4521 commit b150a44

4 files changed

Lines changed: 137 additions & 18 deletions

File tree

docs/reference/prompt-caching.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ Per-agent heartbeat is supported at `agents.list[].heartbeat`.
105105

106106
- Prompt caching is automatic on supported recent models. OpenClaw does not need to inject block-level cache markers.
107107
- OpenClaw uses `prompt_cache_key` to keep cache routing stable across turns. Direct OpenAI hosts use `prompt_cache_retention: "24h"` when `cacheRetention: "long"` is selected.
108-
- OpenAI-compatible Completions providers receive `prompt_cache_key` only when their model config explicitly sets `compat.supportsPromptCacheKey: true`; with that same opt-in, explicit `cacheRetention: "long"` also forwards `prompt_cache_retention: "24h"`, and `cacheRetention: "none"` suppresses both fields.
108+
- OpenAI-compatible Completions providers receive `prompt_cache_key` only when their model config explicitly sets `compat.supportsPromptCacheKey: true`. Long-retention forwarding is a separate capability: explicit `cacheRetention: "long"` sends `prompt_cache_retention: "24h"` only when that compat entry also supports long cache retention. Providers such as Mistral can opt into cache keys while setting `compat.supportsLongCacheRetention: false` to suppress the long-retention field. `cacheRetention: "none"` suppresses both fields.
109109
- OpenAI responses expose cached prompt tokens via `usage.prompt_tokens_details.cached_tokens` (or `input_tokens_details.cached_tokens` on Responses API events). OpenClaw maps that to `cacheRead`.
110110
- OpenAI does not expose a separate cache-write token counter, so `cacheWrite` stays `0` on OpenAI paths even when the provider is warming a cache.
111111
- OpenAI returns useful tracing and rate-limit headers such as `x-request-id`, `openai-processing-ms`, and `x-ratelimit-*`, but cache-hit accounting should come from the usage payload, not from headers.

src/commands/doctor-legacy-config.migrations.test.ts

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,14 +1648,73 @@ describe("normalizeCompatibilityConfigValues", () => {
16481648
res.config.models?.providers?.mistral?.models?.map((model) => ({
16491649
id: model.id,
16501650
maxTokens: model.maxTokens,
1651+
cacheRead: model.cost.cacheRead,
16511652
})),
16521653
).toEqual([
1653-
{ id: "mistral-large-latest", maxTokens: 16384 },
1654-
{ id: "magistral-small", maxTokens: 40000 },
1654+
{ id: "mistral-large-latest", maxTokens: 16384, cacheRead: 0.05 },
1655+
{ id: "magistral-small", maxTokens: 40000, cacheRead: 0.05 },
16551656
]);
16561657
expect(res.changes).toEqual([
16571658
"Normalized models.providers.mistral.models[0].maxTokens (262144 → 16384) to avoid Mistral context-window rejects.",
1659+
"Normalized models.providers.mistral.models[0].cost.cacheRead (0 → 0.05) for Mistral prompt-cache billing.",
16581660
"Normalized models.providers.mistral.models[1].maxTokens (128000 → 40000) to avoid Mistral context-window rejects.",
1661+
"Normalized models.providers.mistral.models[1].cost.cacheRead (0 → 0.05) for Mistral prompt-cache billing.",
1662+
]);
1663+
});
1664+
1665+
it("normalizes old zero Mistral cacheRead costs while preserving custom costs", () => {
1666+
const res = normalizeCompatibilityConfigValues({
1667+
models: {
1668+
providers: {
1669+
mistral: {
1670+
baseUrl: "https://api.mistral.ai/v1",
1671+
api: "openai-completions",
1672+
models: [
1673+
{
1674+
id: "codestral-latest",
1675+
name: "Codestral",
1676+
reasoning: false,
1677+
input: ["text"],
1678+
cost: { input: 0.3, output: 0.9, cacheRead: 0, cacheWrite: 0 },
1679+
contextWindow: 256000,
1680+
maxTokens: 32000,
1681+
},
1682+
{
1683+
id: "mistral-medium-3-5",
1684+
name: "Mistral Medium 3.5 Custom",
1685+
reasoning: false,
1686+
input: ["text"],
1687+
cost: { input: 1.5, output: 7.5, cacheRead: 0.07, cacheWrite: 0 },
1688+
contextWindow: 128000,
1689+
maxTokens: 32000,
1690+
},
1691+
{
1692+
id: "custom-mistral-model",
1693+
name: "Custom Mistral",
1694+
reasoning: false,
1695+
input: ["text"],
1696+
cost: { input: 1, output: 2, cacheRead: 0, cacheWrite: 0 },
1697+
contextWindow: 128000,
1698+
maxTokens: 32000,
1699+
},
1700+
],
1701+
},
1702+
},
1703+
},
1704+
});
1705+
1706+
expect(
1707+
res.config.models?.providers?.mistral?.models?.map((model) => ({
1708+
id: model.id,
1709+
cacheRead: model.cost.cacheRead,
1710+
})),
1711+
).toEqual([
1712+
{ id: "codestral-latest", cacheRead: 0.03 },
1713+
{ id: "mistral-medium-3-5", cacheRead: 0.07 },
1714+
{ id: "custom-mistral-model", cacheRead: 0 },
1715+
]);
1716+
expect(res.changes).toEqual([
1717+
"Normalized models.providers.mistral.models[0].cost.cacheRead (0 → 0.03) for Mistral prompt-cache billing.",
16591718
]);
16601719
});
16611720
});

src/commands/doctor/shared/legacy-config-compatibility-base.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import {
33
normalizeLegacyBrowserConfig,
44
normalizeLegacyCrossContextMessageConfig,
55
normalizeLegacyMediaProviderOptions,
6-
normalizeLegacyMistralModelMaxTokens,
6+
normalizeLegacyMistralModelDefaults,
77
normalizeLegacyOpenAIModelProviderApi,
88
normalizeLegacyOllamaNativeNumCtxParams,
99
normalizeLegacyRuntimeModelRefs,
@@ -44,5 +44,5 @@ export function normalizeBaseCompatibilityConfigValues(
4444
next = normalizeLegacyCrossContextMessageConfig(next, changes);
4545
next = normalizeLegacyMediaProviderOptions(next, changes);
4646
next = normalizeLegacyOllamaNativeNumCtxParams(next, changes);
47-
return normalizeLegacyMistralModelMaxTokens(next, changes);
47+
return normalizeLegacyMistralModelDefaults(next, changes);
4848
}

src/commands/doctor/shared/legacy-config-core-normalizers.ts

Lines changed: 73 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,7 +1356,47 @@ export function normalizeLegacyOllamaNativeNumCtxParams(
13561356
};
13571357
}
13581358

1359-
export function normalizeLegacyMistralModelMaxTokens(
1359+
const MISTRAL_MODEL_CACHE_READ_COST_BY_ID: Record<string, number> = {
1360+
"codestral-latest": 0.03,
1361+
"devstral-medium-latest": 0.04,
1362+
"magistral-small": 0.05,
1363+
"mistral-large-latest": 0.05,
1364+
"mistral-medium-2508": 0.04,
1365+
"mistral-medium-3-5": 0.15,
1366+
"mistral-small-latest": 0.01,
1367+
"pixtral-large-latest": 0.2,
1368+
};
1369+
1370+
function normalizeLegacyMistralModelCost<T extends Record<string, unknown>>(params: {
1371+
providerId: string;
1372+
model: T;
1373+
modelId: string;
1374+
index: number;
1375+
changes: string[];
1376+
}): { model: T; changed: boolean } {
1377+
const cost = params.model.cost;
1378+
if (!isRecord(cost) || cost.cacheRead !== 0) {
1379+
return { model: params.model, changed: false };
1380+
}
1381+
1382+
const normalizedCacheRead = MISTRAL_MODEL_CACHE_READ_COST_BY_ID[params.modelId.toLowerCase()];
1383+
if (normalizedCacheRead === undefined) {
1384+
return { model: params.model, changed: false };
1385+
}
1386+
1387+
params.changes.push(
1388+
`Normalized models.providers.${sanitizeForLog(params.providerId)}.models[${params.index}].cost.cacheRead (0 → ${normalizedCacheRead}) for Mistral prompt-cache billing.`,
1389+
);
1390+
return {
1391+
model: {
1392+
...params.model,
1393+
cost: { ...cost, cacheRead: normalizedCacheRead },
1394+
},
1395+
changed: true,
1396+
};
1397+
}
1398+
1399+
export function normalizeLegacyMistralModelDefaults(
13601400
cfg: OpenClawConfig,
13611401
changes: string[],
13621402
): OpenClawConfig {
@@ -1382,6 +1422,12 @@ export function normalizeLegacyMistralModelMaxTokens(
13821422
return model;
13831423
}
13841424
const modelId = normalizeOptionalString(model.id) ?? "";
1425+
if (!modelId) {
1426+
return model;
1427+
}
1428+
1429+
let nextModel = model;
1430+
let modelChanged = false;
13851431
const contextWindow =
13861432
typeof model.contextWindow === "number" && Number.isFinite(model.contextWindow)
13871433
? model.contextWindow
@@ -1390,25 +1436,39 @@ export function normalizeLegacyMistralModelMaxTokens(
13901436
typeof model.maxTokens === "number" && Number.isFinite(model.maxTokens)
13911437
? model.maxTokens
13921438
: null;
1393-
if (!modelId || contextWindow === null || maxTokens === null) {
1394-
return model;
1439+
1440+
if (contextWindow !== null && maxTokens !== null) {
1441+
const normalizedMaxTokens = resolveNormalizedProviderModelMaxTokens({
1442+
providerId,
1443+
modelId,
1444+
contextWindow,
1445+
rawMaxTokens: maxTokens,
1446+
});
1447+
if (normalizedMaxTokens !== maxTokens) {
1448+
nextModel = Object.assign({}, nextModel, { maxTokens: normalizedMaxTokens });
1449+
modelChanged = true;
1450+
changes.push(
1451+
`Normalized models.providers.${providerId}.models[${index}].maxTokens (${maxTokens}${normalizedMaxTokens}) to avoid Mistral context-window rejects.`,
1452+
);
1453+
}
13951454
}
13961455

1397-
const normalizedMaxTokens = resolveNormalizedProviderModelMaxTokens({
1456+
const costNormalization = normalizeLegacyMistralModelCost({
13981457
providerId,
1458+
model: nextModel,
13991459
modelId,
1400-
contextWindow,
1401-
rawMaxTokens: maxTokens,
1460+
index,
1461+
changes,
14021462
});
1403-
if (normalizedMaxTokens === maxTokens) {
1404-
return model;
1463+
if (costNormalization.changed) {
1464+
nextModel = costNormalization.model;
1465+
modelChanged = true;
14051466
}
14061467

1407-
modelsChanged = true;
1408-
changes.push(
1409-
`Normalized models.providers.${providerId}.models[${index}].maxTokens (${maxTokens}${normalizedMaxTokens}) to avoid Mistral context-window rejects.`,
1410-
);
1411-
return Object.assign({}, model, { maxTokens: normalizedMaxTokens });
1468+
if (modelChanged) {
1469+
modelsChanged = true;
1470+
}
1471+
return modelChanged ? nextModel : model;
14121472
});
14131473

14141474
if (!modelsChanged) {

0 commit comments

Comments
 (0)