fix(models): keep user model switches strict

steipete · steipete · commit d2320e4d4b42 · 2026-04-27T23:32:44.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes
 
 - Control UI/Agents: redact tool-call args, partial/final results, derived exec output, and configured custom secret patterns before streaming tool events to the Control UI, so tool output cannot expose provider or channel credentials. Fixes #72283. (#72319) Thanks @volcano303 and @BunsDev.
+- Models/fallbacks: treat user-selected session models as exact choices, so `/model ollama/...` and model-picker switches fail visibly when the selected provider is unreachable instead of answering from an unrelated configured fallback. Fixes #73023. Thanks @pavelyortho-cyber.
 - CLI/model probes: fail local `infer model run` probes when the provider returns no text output, so unreachable local providers and empty completions no longer look like successful smoke tests. Refs #73023. Thanks @pavelyortho-cyber.
 - CLI/Ollama: run local `infer model run` through the lean provider completion path and skip global model discovery for one-shot local probes, so Ollama smoke tests no longer pay full chat-agent/tool startup cost or hang before the native `/api/chat` request. Fixes #72851. Thanks @TotalRes2020.
 - Doctor/gateway services: ignore launchd/systemd companion services that only reference the gateway as a dependency, suppress inactive Linux extra-service warnings, and avoid rewriting a running systemd gateway command/entrypoint during doctor repair. Carries forward #39118. Thanks @therk.
diff --git a/docs/concepts/model-failover.md b/docs/concepts/model-failover.md
@@ -24,7 +24,7 @@ For a normal text run, OpenClaw evaluates candidates in this order:
     Resolve the active session model and auth-profile preference.
   </Step>
   <Step title="Build candidate chain">
-    Build the model candidate chain from the currently selected session model, then `agents.defaults.model.fallbacks` in order, ending with the configured primary when the run started from an override.
+    Build the model candidate chain from the configured model or an auto-selected fallback model, then `agents.defaults.model.fallbacks` in order. Explicit user model selections are strict and do not silently fall back to a different model.
   </Step>
   <Step title="Try the current provider">
     Try the current provider with auth-profile rotation/cooldown rules.
@@ -207,7 +207,7 @@ If all profiles for a provider fail, OpenClaw moves to the next model in `agents
 
 Overloaded and rate-limit errors are handled more aggressively than billing cooldowns. By default, OpenClaw allows one same-provider auth-profile retry, then switches to the next configured model fallback without waiting. Provider-busy signals such as `ModelNotReadyException` land in that overloaded bucket. Tune this with `auth.cooldowns.overloadedProfileRotations`, `auth.cooldowns.overloadedBackoffMs`, and `auth.cooldowns.rateLimitedProfileRotations`.
 
-When a run starts with a model override (hooks or CLI), fallbacks still end at `agents.defaults.model.primary` after trying any configured fallbacks.
+When a run starts from the configured primary or an auto-selected fallback override, OpenClaw can walk the configured fallback chain. Explicit user selections (for example `/model ollama/qwen3.5:27b`, the model picker, or one-off CLI provider/model overrides) are strict: if that provider/model is unreachable or fails before producing a reply, OpenClaw reports the failure instead of answering from an unrelated fallback.
 
 ### Candidate chain rules
 
@@ -264,6 +264,7 @@ That means fallback retries have to coordinate with live model switching:
 
 - Only explicit user-driven model changes mark a pending live switch. That includes `/model`, `session_status(model=...)`, and `sessions.patch`.
 - System-driven model changes such as fallback rotation, heartbeat overrides, or compaction never mark a pending live switch on their own.
+- User-driven model overrides are treated as exact selections for fallback policy, so an unreachable selected provider surfaces as a failure instead of being masked by `agents.defaults.model.fallbacks`.
 - Before a fallback retry starts, the reply runner persists the selected fallback override fields to the session entry.
 - Auto fallback overrides remain selected on subsequent turns so OpenClaw does not probe a known-bad primary on every message. `/new`, `/reset`, and `sessions.reset` clear auto-sourced overrides and return the session to the configured default.
 - `/status` shows the selected model and, when fallback state differs, the active fallback model and reason.
diff --git a/docs/concepts/models.md b/docs/concepts/models.md
@@ -156,6 +156,7 @@ You can switch models for the current session without restarting:
     - If the agent is idle, the next run uses the new model right away.
     - If a run is already active, OpenClaw marks a live switch as pending and only restarts into the new model at a clean retry point.
     - If tool activity or reply output has already started, the pending switch can stay queued until a later retry opportunity or the next user turn.
+    - A user-selected `/model` ref is strict for that session: if the selected provider/model is unreachable, the reply fails visibly instead of silently answering from `agents.defaults.model.fallbacks`.
     - `/model status` is the detailed view (auth candidates and, when configured, provider endpoint `baseUrl` + `api` mode).
   </Accordion>
   <Accordion title="Ref parsing">
diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md
@@ -210,6 +210,11 @@ transport, but it does not start a chat-agent turn or load MCP/tool context. If
 this succeeds while normal agent replies fail, troubleshoot the model's agent
 prompt/tool capacity next.
 
+When you switch a conversation with `/model ollama/<model>`, OpenClaw treats
+that as an exact user selection. If the configured Ollama `baseUrl` is
+unreachable, the next reply fails with the provider error instead of silently
+answering from another configured fallback model.
+
 Live-verify the local text path, native stream path, and embeddings against
 local Ollama with:
 
diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts
@@ -696,6 +696,9 @@ async function agentCommandInternal(
     const hasStoredOverride = Boolean(
       sessionEntry?.modelOverride || sessionEntry?.providerOverride,
     );
+    let storedModelOverrideSource = hasStoredOverride
+      ? sessionEntry?.modelOverrideSource
+      : undefined;
     const explicitProviderOverride =
       typeof opts.provider === "string"
         ? normalizeExplicitOverrideInput(opts.provider, "provider")
@@ -910,7 +913,9 @@ async function agentCommandInternal(
         const effectiveFallbacksOverride = resolveEffectiveModelFallbacks({
           cfg,
           agentId: sessionAgentId,
-          hasSessionModelOverride: Boolean(storedModelOverride),
+          hasSessionModelOverride:
+            hasExplicitRunOverride || Boolean(storedProviderOverride || storedModelOverride),
+          modelOverrideSource: hasExplicitRunOverride ? "user" : storedModelOverrideSource,
         });
 
         let fallbackAttemptIndex = 0;
@@ -1061,6 +1066,7 @@ async function agentCommandInternal(
             err.provider !== previousProvider
           ) {
             storedModelOverride = err.model;
+            storedModelOverrideSource = "user";
           }
           lifecycleEnded = false;
           log.info(
diff --git a/src/agents/agent-scope.test.ts b/src/agents/agent-scope.test.ts
@@ -225,8 +225,24 @@ describe("resolveAgentConfig", () => {
         cfg,
         agentId: "linus",
         hasSessionModelOverride: true,
+        modelOverrideSource: "auto",
       }),
     ).toEqual(["openai/gpt-5.4"]);
+    expect(
+      resolveEffectiveModelFallbacks({
+        cfg,
+        agentId: "linus",
+        hasSessionModelOverride: true,
+        modelOverrideSource: "user",
+      }),
+    ).toEqual([]);
+    expect(
+      resolveEffectiveModelFallbacks({
+        cfg,
+        agentId: "linus",
+        hasSessionModelOverride: true,
+      }),
+    ).toEqual([]);
     expect(
       resolveEffectiveModelFallbacks({
         cfg: cfgNoOverride,
@@ -257,13 +273,15 @@ describe("resolveAgentConfig", () => {
         cfg: cfgInheritDefaults,
         agentId: "linus",
         hasSessionModelOverride: true,
+        modelOverrideSource: "auto",
       }),
     ).toEqual(["openai/gpt-5.4"]);
     expect(
       resolveEffectiveModelFallbacks({
         cfg: cfgDisable,
         agentId: "linus",
         hasSessionModelOverride: true,
+        modelOverrideSource: "auto",
       }),
     ).toEqual([]);
   });
diff --git a/src/agents/agent-scope.ts b/src/agents/agent-scope.ts
@@ -205,11 +205,15 @@ export function resolveEffectiveModelFallbacks(params: {
   cfg: OpenClawConfig;
   agentId: string;
   hasSessionModelOverride: boolean;
+  modelOverrideSource?: "auto" | "user";
 }): string[] | undefined {
   const agentFallbacksOverride = resolveAgentModelFallbacksOverride(params.cfg, params.agentId);
   if (!params.hasSessionModelOverride) {
     return agentFallbacksOverride;
   }
+  if (params.modelOverrideSource !== "auto") {
+    return [];
+  }
   const defaultFallbacks = resolveAgentModelFallbackValues(params.cfg.agents?.defaults?.model);
   return agentFallbacksOverride ?? defaultFallbacks;
 }
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
@@ -957,7 +957,7 @@ export async function runAgentTurnWithFallback(params: {
       const onToolResult = params.opts?.onToolResult;
       const outcomePlan = buildAgentRuntimeOutcomePlan();
       const fallbackResult = await runWithModelFallback<EmbeddedAgentRunResult>({
-        ...resolveModelFallbackOptions(params.followupRun.run),
+        ...resolveModelFallbackOptions(effectiveRun, runtimeConfig),
         runId,
         classifyResult: async ({ result, provider, model }) => {
           const classification = outcomePlan.classifyRunResult({
diff --git a/src/auto-reply/reply/agent-runner-run-params.ts b/src/auto-reply/reply/agent-runner-run-params.ts
@@ -1,4 +1,4 @@
-import { resolveRunModelFallbacksOverride } from "../../agents/agent-scope.js";
+import { resolveEffectiveModelFallbacks } from "../../agents/agent-scope.js";
 import type { resolveProviderScopedAuthProfile } from "./agent-runner-auth-profile.js";
 import type { FollowupRun } from "./queue.js";
 
@@ -26,17 +26,21 @@ export const resolveEnforceFinalTagWithResolver = (
     }) ||
     false);
 
-export function resolveModelFallbackOptions(run: FollowupRun["run"]) {
-  const config = run.config;
+export function resolveModelFallbackOptions(
+  run: FollowupRun["run"],
+  configOverride: FollowupRun["run"]["config"] = run.config,
+) {
+  const config = configOverride;
   return {
     cfg: config,
     provider: run.provider,
     model: run.model,
     agentDir: run.agentDir,
-    fallbacksOverride: resolveRunModelFallbacksOverride({
+    fallbacksOverride: resolveEffectiveModelFallbacks({
       cfg: config,
       agentId: run.agentId,
-      sessionKey: run.sessionKey,
+      hasSessionModelOverride: run.hasSessionModelOverride === true,
+      modelOverrideSource: run.modelOverrideSource,
     }),
   };
 }
diff --git a/src/auto-reply/reply/agent-runner-utils.test.ts b/src/auto-reply/reply/agent-runner-utils.test.ts
@@ -2,15 +2,15 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { FollowupRun } from "./queue.js";
 
 const hoisted = vi.hoisted(() => {
-  const resolveRunModelFallbacksOverrideMock = vi.fn();
+  const resolveEffectiveModelFallbacksMock = vi.fn();
   const getChannelPluginMock = vi.fn();
   const isReasoningTagProviderMock = vi.fn();
-  return { resolveRunModelFallbacksOverrideMock, getChannelPluginMock, isReasoningTagProviderMock };
+  return { resolveEffectiveModelFallbacksMock, getChannelPluginMock, isReasoningTagProviderMock };
 });
 
 vi.mock("../../agents/agent-scope.js", () => ({
-  resolveRunModelFallbacksOverride: (...args: unknown[]) =>
-    hoisted.resolveRunModelFallbacksOverrideMock(...args),
+  resolveEffectiveModelFallbacks: (...args: unknown[]) =>
+    hoisted.resolveEffectiveModelFallbacksMock(...args),
 }));
 
 vi.mock("../../channels/plugins/index.js", () => ({
@@ -56,22 +56,23 @@ function makeRun(overrides: Partial<FollowupRun["run"]> = {}): FollowupRun["run"
 
 describe("agent-runner-utils", () => {
   beforeEach(() => {
-    hoisted.resolveRunModelFallbacksOverrideMock.mockClear();
+    hoisted.resolveEffectiveModelFallbacksMock.mockClear();
     hoisted.getChannelPluginMock.mockReset();
     hoisted.isReasoningTagProviderMock.mockReset();
     hoisted.isReasoningTagProviderMock.mockReturnValue(false);
   });
 
   it("resolves model fallback options from run context", () => {
-    hoisted.resolveRunModelFallbacksOverrideMock.mockReturnValue(["fallback-model"]);
-    const run = makeRun();
+    hoisted.resolveEffectiveModelFallbacksMock.mockReturnValue(["fallback-model"]);
+    const run = makeRun({ hasSessionModelOverride: true, modelOverrideSource: "user" });
 
     const resolved = resolveModelFallbackOptions(run);
 
-    expect(hoisted.resolveRunModelFallbacksOverrideMock).toHaveBeenCalledWith({
+    expect(hoisted.resolveEffectiveModelFallbacksMock).toHaveBeenCalledWith({
       cfg: run.config,
       agentId: run.agentId,
-      sessionKey: run.sessionKey,
+      hasSessionModelOverride: true,
+      modelOverrideSource: "user",
     });
     expect(resolved).toEqual({
       cfg: run.config,
@@ -83,15 +84,16 @@ describe("agent-runner-utils", () => {
   });
 
   it("passes through missing agentId for helper-based fallback resolution", () => {
-    hoisted.resolveRunModelFallbacksOverrideMock.mockReturnValue(["fallback-model"]);
+    hoisted.resolveEffectiveModelFallbacksMock.mockReturnValue(["fallback-model"]);
     const run = makeRun({ agentId: undefined });
 
     const resolved = resolveModelFallbackOptions(run);
 
-    expect(hoisted.resolveRunModelFallbacksOverrideMock).toHaveBeenCalledWith({
+    expect(hoisted.resolveEffectiveModelFallbacksMock).toHaveBeenCalledWith({
       cfg: run.config,
       agentId: undefined,
-      sessionKey: run.sessionKey,
+      hasSessionModelOverride: false,
+      modelOverrideSource: undefined,
     });
     expect(resolved.fallbacksOverride).toEqual(["fallback-model"]);
   });
diff --git a/src/auto-reply/reply/directive-handling.impl.ts b/src/auto-reply/reply/directive-handling.impl.ts
@@ -454,6 +454,7 @@ export async function handleDirectiveOnly(
         key: sessionKey,
         nextProvider: modelSelection.provider,
         nextModel: modelSelection.model,
+        nextModelOverrideSource: "user",
         nextAuthProfileId: profileOverride,
         nextAuthProfileIdSource: profileOverride ? "user" : undefined,
       });
diff --git a/src/auto-reply/reply/directive-handling.model.test.ts b/src/auto-reply/reply/directive-handling.model.test.ts
@@ -806,6 +806,7 @@ describe("handleDirectiveOnly model persist behavior (fixes #1435)", () => {
       key: sessionKey,
       nextProvider: "openai",
       nextModel: "gpt-4o",
+      nextModelOverrideSource: "user",
       nextAuthProfileId: undefined,
       nextAuthProfileIdSource: undefined,
     });
@@ -848,6 +849,7 @@ describe("handleDirectiveOnly model persist behavior (fixes #1435)", () => {
       key: sessionKey,
       nextProvider: "anthropic",
       nextModel: "claude-opus-4-6",
+      nextModelOverrideSource: "user",
       nextAuthProfileId: "anthropic:work",
       nextAuthProfileIdSource: "user",
     });
diff --git a/src/auto-reply/reply/followup-runner.ts b/src/auto-reply/reply/followup-runner.ts
@@ -3,7 +3,6 @@ import {
   hasOutboundReplyContent,
   resolveSendableOutboundReplyParts,
 } from "openclaw/plugin-sdk/reply-payload";
-import { resolveRunModelFallbacksOverride } from "../../agents/agent-scope.js";
 import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js";
 import { resolveContextTokensForModel } from "../../agents/context.js";
 import { DEFAULT_CONTEXT_TOKENS } from "../../agents/defaults.js";
@@ -27,6 +26,7 @@ import { runPreflightCompactionIfNeeded } from "./agent-runner-memory.js";
 import {
   resolveQueuedReplyExecutionConfig,
   resolveQueuedReplyRuntimeConfig,
+  resolveModelFallbackOptions,
   resolveRunAuthProfile,
 } from "./agent-runner-utils.js";
 import { resolveFollowupDeliveryPayloads } from "./followup-delivery.js";
@@ -263,16 +263,9 @@ export function createFollowupRunner(params: {
       try {
         const outcomePlan = buildAgentRuntimeOutcomePlan();
         const fallbackResult = await runWithModelFallback<EmbeddedAgentRunResult>({
+          ...resolveModelFallbackOptions(run, runtimeConfig),
           cfg: runtimeConfig,
-          provider: run.provider,
-          model: run.model,
           runId,
-          agentDir: run.agentDir,
-          fallbacksOverride: resolveRunModelFallbacksOverride({
-            cfg: runtimeConfig,
-            agentId: run.agentId,
-            sessionKey: run.sessionKey,
-          }),
           classifyResult: ({ result, provider, model }) =>
             outcomePlan.classifyRunResult({ result, provider, model }),
           run: async (provider, model, runOptions) => {
diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts
@@ -769,6 +769,10 @@ export async function runPreparedReply(
     ({ activeSessionId, isActive, isStreaming } = queueState.busyState);
   }
   const authProfileIdSource = preparedSessionState.sessionEntry?.authProfileOverrideSource;
+  const runHasSessionModelOverride = Boolean(
+    normalizeOptionalString(preparedSessionState.sessionEntry?.modelOverride) ||
+    normalizeOptionalString(preparedSessionState.sessionEntry?.providerOverride),
+  );
   const followupRun = {
     prompt: queuedBody,
     transcriptPrompt: transcriptCommandBody,
@@ -816,6 +820,10 @@ export async function runPreparedReply(
       skillsSnapshot,
       provider,
       model,
+      hasSessionModelOverride: runHasSessionModelOverride,
+      modelOverrideSource: runHasSessionModelOverride
+        ? preparedSessionState.sessionEntry?.modelOverrideSource
+        : undefined,
       authProfileId,
       authProfileIdSource,
       thinkLevel: resolvedThinkLevel,
diff --git a/src/auto-reply/reply/queue/state.test.ts b/src/auto-reply/reply/queue/state.test.ts
@@ -59,4 +59,28 @@ describe("refreshQueuedFollowupSession", () => {
       authProfileIdSource: undefined,
     });
   });
+
+  it("retargets queued runs with user model override source", () => {
+    const queue = getFollowupQueue(QUEUE_KEY, { mode: "queue" });
+    const queuedRun: FollowupRun = {
+      prompt: "queued message",
+      enqueuedAt: Date.now(),
+      run: makeRun(),
+    };
+    queue.items.push(queuedRun);
+
+    refreshQueuedFollowupSession({
+      key: QUEUE_KEY,
+      nextProvider: "ollama",
+      nextModel: "qwen3.5:27b",
+      nextModelOverrideSource: "user",
+    });
+
+    expect(queue.items[0]?.run).toMatchObject({
+      provider: "ollama",
+      model: "qwen3.5:27b",
+      hasSessionModelOverride: true,
+      modelOverrideSource: "user",
+    });
+  });
 });
diff --git a/src/auto-reply/reply/queue/state.ts b/src/auto-reply/reply/queue/state.ts
@@ -94,6 +94,7 @@ export function refreshQueuedFollowupSession(params: {
   nextSessionFile?: string;
   nextProvider?: string;
   nextModel?: string;
+  nextModelOverrideSource?: "auto" | "user";
   nextAuthProfileId?: string;
   nextAuthProfileIdSource?: "auto" | "user";
 }): void {
@@ -112,6 +113,7 @@ export function refreshQueuedFollowupSession(params: {
   const shouldRewriteSelection =
     typeof params.nextProvider === "string" ||
     typeof params.nextModel === "string" ||
+    Object.hasOwn(params, "nextModelOverrideSource") ||
     Object.hasOwn(params, "nextAuthProfileId") ||
     Object.hasOwn(params, "nextAuthProfileIdSource");
   if (!shouldRewriteSession && !shouldRewriteSelection) {
@@ -136,6 +138,10 @@ export function refreshQueuedFollowupSession(params: {
       if (typeof params.nextModel === "string") {
         run.model = params.nextModel;
       }
+      if (Object.hasOwn(params, "nextModelOverrideSource")) {
+        run.hasSessionModelOverride = Boolean(run.provider || run.model);
+        run.modelOverrideSource = params.nextModelOverrideSource;
+      }
       if (Object.hasOwn(params, "nextAuthProfileId")) {
         run.authProfileId = normalizeOptionalString(params.nextAuthProfileId);
       }
diff --git a/src/auto-reply/reply/queue/types.ts b/src/auto-reply/reply/queue/types.ts
@@ -71,6 +71,8 @@ export type FollowupRun = {
     skillsSnapshot?: SkillSnapshot;
     provider: string;
     model: string;
+    hasSessionModelOverride?: boolean;
+    modelOverrideSource?: "auto" | "user";
     authProfileId?: string;
     authProfileIdSource?: "auto" | "user";
     thinkLevel?: ThinkLevel;
diff --git a/src/commands/agent.test.ts b/src/commands/agent.test.ts