fix(mistral): migrate cached read costs

Alix-007 · Alix-007 · commit b150a44ecc8a · 2026-06-02T08:23:36.000+08:00
diff --git a/docs/reference/prompt-caching.md b/docs/reference/prompt-caching.md
@@ -105,7 +105,7 @@ Per-agent heartbeat is supported at `agents.list[].heartbeat`.
 
 - Prompt caching is automatic on supported recent models. OpenClaw does not need to inject block-level cache markers.
 - OpenClaw uses `prompt_cache_key` to keep cache routing stable across turns. Direct OpenAI hosts use `prompt_cache_retention: "24h"` when `cacheRetention: "long"` is selected.
-- OpenAI-compatible Completions providers receive `prompt_cache_key` only when their model config explicitly sets `compat.supportsPromptCacheKey: true`; with that same opt-in, explicit `cacheRetention: "long"` also forwards `prompt_cache_retention: "24h"`, and `cacheRetention: "none"` suppresses both fields.
+- OpenAI-compatible Completions providers receive `prompt_cache_key` only when their model config explicitly sets `compat.supportsPromptCacheKey: true`. Long-retention forwarding is a separate capability: explicit `cacheRetention: "long"` sends `prompt_cache_retention: "24h"` only when that compat entry also supports long cache retention. Providers such as Mistral can opt into cache keys while setting `compat.supportsLongCacheRetention: false` to suppress the long-retention field. `cacheRetention: "none"` suppresses both fields.
 - OpenAI responses expose cached prompt tokens via `usage.prompt_tokens_details.cached_tokens` (or `input_tokens_details.cached_tokens` on Responses API events). OpenClaw maps that to `cacheRead`.
 - OpenAI does not expose a separate cache-write token counter, so `cacheWrite` stays `0` on OpenAI paths even when the provider is warming a cache.
 - OpenAI returns useful tracing and rate-limit headers such as `x-request-id`, `openai-processing-ms`, and `x-ratelimit-*`, but cache-hit accounting should come from the usage payload, not from headers.
diff --git a/src/commands/doctor-legacy-config.migrations.test.ts b/src/commands/doctor-legacy-config.migrations.test.ts
@@ -1648,14 +1648,73 @@ describe("normalizeCompatibilityConfigValues", () => {
       res.config.models?.providers?.mistral?.models?.map((model) => ({
         id: model.id,
         maxTokens: model.maxTokens,
+        cacheRead: model.cost.cacheRead,
       })),
     ).toEqual([
-      { id: "mistral-large-latest", maxTokens: 16384 },
-      { id: "magistral-small", maxTokens: 40000 },
+      { id: "mistral-large-latest", maxTokens: 16384, cacheRead: 0.05 },
+      { id: "magistral-small", maxTokens: 40000, cacheRead: 0.05 },
     ]);
     expect(res.changes).toEqual([
       "Normalized models.providers.mistral.models[0].maxTokens (262144 → 16384) to avoid Mistral context-window rejects.",
+      "Normalized models.providers.mistral.models[0].cost.cacheRead (0 → 0.05) for Mistral prompt-cache billing.",
       "Normalized models.providers.mistral.models[1].maxTokens (128000 → 40000) to avoid Mistral context-window rejects.",
+      "Normalized models.providers.mistral.models[1].cost.cacheRead (0 → 0.05) for Mistral prompt-cache billing.",
+    ]);
+  });
+
+  it("normalizes old zero Mistral cacheRead costs while preserving custom costs", () => {
+    const res = normalizeCompatibilityConfigValues({
+      models: {
+        providers: {
+          mistral: {
+            baseUrl: "https://api.mistral.ai/v1",
+            api: "openai-completions",
+            models: [
+              {
+                id: "codestral-latest",
+                name: "Codestral",
+                reasoning: false,
+                input: ["text"],
+                cost: { input: 0.3, output: 0.9, cacheRead: 0, cacheWrite: 0 },
+                contextWindow: 256000,
+                maxTokens: 32000,
+              },
+              {
+                id: "mistral-medium-3-5",
+                name: "Mistral Medium 3.5 Custom",
+                reasoning: false,
+                input: ["text"],
+                cost: { input: 1.5, output: 7.5, cacheRead: 0.07, cacheWrite: 0 },
+                contextWindow: 128000,
+                maxTokens: 32000,
+              },
+              {
+                id: "custom-mistral-model",
+                name: "Custom Mistral",
+                reasoning: false,
+                input: ["text"],
+                cost: { input: 1, output: 2, cacheRead: 0, cacheWrite: 0 },
+                contextWindow: 128000,
+                maxTokens: 32000,
+              },
+            ],
+          },
+        },
+      },
+    });
+
+    expect(
+      res.config.models?.providers?.mistral?.models?.map((model) => ({
+        id: model.id,
+        cacheRead: model.cost.cacheRead,
+      })),
+    ).toEqual([
+      { id: "codestral-latest", cacheRead: 0.03 },
+      { id: "mistral-medium-3-5", cacheRead: 0.07 },
+      { id: "custom-mistral-model", cacheRead: 0 },
+    ]);
+    expect(res.changes).toEqual([
+      "Normalized models.providers.mistral.models[0].cost.cacheRead (0 → 0.03) for Mistral prompt-cache billing.",
     ]);
   });
 });
diff --git a/src/commands/doctor/shared/legacy-config-compatibility-base.ts b/src/commands/doctor/shared/legacy-config-compatibility-base.ts
@@ -3,7 +3,7 @@ import {
   normalizeLegacyBrowserConfig,
   normalizeLegacyCrossContextMessageConfig,
   normalizeLegacyMediaProviderOptions,
-  normalizeLegacyMistralModelMaxTokens,
+  normalizeLegacyMistralModelDefaults,
   normalizeLegacyOpenAIModelProviderApi,
   normalizeLegacyOllamaNativeNumCtxParams,
   normalizeLegacyRuntimeModelRefs,
@@ -44,5 +44,5 @@ export function normalizeBaseCompatibilityConfigValues(
   next = normalizeLegacyCrossContextMessageConfig(next, changes);
   next = normalizeLegacyMediaProviderOptions(next, changes);
   next = normalizeLegacyOllamaNativeNumCtxParams(next, changes);
-  return normalizeLegacyMistralModelMaxTokens(next, changes);
+  return normalizeLegacyMistralModelDefaults(next, changes);
 }
diff --git a/src/commands/doctor/shared/legacy-config-core-normalizers.ts b/src/commands/doctor/shared/legacy-config-core-normalizers.ts
@@ -1356,7 +1356,47 @@ export function normalizeLegacyOllamaNativeNumCtxParams(
   };
 }
 
-export function normalizeLegacyMistralModelMaxTokens(
+const MISTRAL_MODEL_CACHE_READ_COST_BY_ID: Record<string, number> = {
+  "codestral-latest": 0.03,
+  "devstral-medium-latest": 0.04,
+  "magistral-small": 0.05,
+  "mistral-large-latest": 0.05,
+  "mistral-medium-2508": 0.04,
+  "mistral-medium-3-5": 0.15,
+  "mistral-small-latest": 0.01,
+  "pixtral-large-latest": 0.2,
+};
+
+function normalizeLegacyMistralModelCost<T extends Record<string, unknown>>(params: {
+  providerId: string;
+  model: T;
+  modelId: string;
+  index: number;
+  changes: string[];
+}): { model: T; changed: boolean } {
+  const cost = params.model.cost;
+  if (!isRecord(cost) || cost.cacheRead !== 0) {
+    return { model: params.model, changed: false };
+  }
+
+  const normalizedCacheRead = MISTRAL_MODEL_CACHE_READ_COST_BY_ID[params.modelId.toLowerCase()];
+  if (normalizedCacheRead === undefined) {
+    return { model: params.model, changed: false };
+  }
+
+  params.changes.push(
+    `Normalized models.providers.${sanitizeForLog(params.providerId)}.models[${params.index}].cost.cacheRead (0 → ${normalizedCacheRead}) for Mistral prompt-cache billing.`,
+  );
+  return {
+    model: {
+      ...params.model,
+      cost: { ...cost, cacheRead: normalizedCacheRead },
+    },
+    changed: true,
+  };
+}
+
+export function normalizeLegacyMistralModelDefaults(
   cfg: OpenClawConfig,
   changes: string[],
 ): OpenClawConfig {
@@ -1382,6 +1422,12 @@ export function normalizeLegacyMistralModelMaxTokens(
         return model;
       }
       const modelId = normalizeOptionalString(model.id) ?? "";
+      if (!modelId) {
+        return model;
+      }
+
+      let nextModel = model;
+      let modelChanged = false;
       const contextWindow =
         typeof model.contextWindow === "number" && Number.isFinite(model.contextWindow)
           ? model.contextWindow
@@ -1390,25 +1436,39 @@ export function normalizeLegacyMistralModelMaxTokens(
         typeof model.maxTokens === "number" && Number.isFinite(model.maxTokens)
           ? model.maxTokens
           : null;
-      if (!modelId || contextWindow === null || maxTokens === null) {
-        return model;
+
+      if (contextWindow !== null && maxTokens !== null) {
+        const normalizedMaxTokens = resolveNormalizedProviderModelMaxTokens({
+          providerId,
+          modelId,
+          contextWindow,
+          rawMaxTokens: maxTokens,
+        });
+        if (normalizedMaxTokens !== maxTokens) {
+          nextModel = Object.assign({}, nextModel, { maxTokens: normalizedMaxTokens });
+          modelChanged = true;
+          changes.push(
+            `Normalized models.providers.${providerId}.models[${index}].maxTokens (${maxTokens} → ${normalizedMaxTokens}) to avoid Mistral context-window rejects.`,
+          );
+        }
       }
 
-      const normalizedMaxTokens = resolveNormalizedProviderModelMaxTokens({
+      const costNormalization = normalizeLegacyMistralModelCost({
         providerId,
+        model: nextModel,
         modelId,
-        contextWindow,
-        rawMaxTokens: maxTokens,
+        index,
+        changes,
       });
-      if (normalizedMaxTokens === maxTokens) {
-        return model;
+      if (costNormalization.changed) {
+        nextModel = costNormalization.model;
+        modelChanged = true;
       }
 
-      modelsChanged = true;
-      changes.push(
-        `Normalized models.providers.${providerId}.models[${index}].maxTokens (${maxTokens} → ${normalizedMaxTokens}) to avoid Mistral context-window rejects.`,
-      );
-      return Object.assign({}, model, { maxTokens: normalizedMaxTokens });
+      if (modelChanged) {
+        modelsChanged = true;
+      }
+      return modelChanged ? nextModel : model;
     });
 
     if (!modelsChanged) {