fix(ollama): propagate supportsTools, disable idle watchdog, fix thinking and keep_alive

velteyn · velteyn · commit ee2e3a3f9630 · 2026-05-14T20:38:08.000Z
- buildOllamaModelDefinition: compat always includes supportsTools,
  defaulting to true when capabilities are unknown (Ollama's /api/chat
  universally supports tool calls)
- AgentModelEntryConfig: add optional compat override path
- resolveLlmIdleTimeoutMs: move isLocalProviderBaseUrl check to top
  so local providers always get idleTimeout=0 (network silence during
  prompt evaluation is not a hang signal)
- createConfiguredOllamaCompatStreamWrapper: guard think parameter with
  model.reasoning check; non-reasoning models get think:false forced
- buildOllamaChatRequest: add keep_alive: "1h" default with requestParams
  spread override so model-level config can customize
diff --git a/extensions/ollama/src/provider-models.ts b/extensions/ollama/src/provider-models.ts
@@ -247,13 +247,10 @@ export function buildOllamaModelDefinition(
     capabilities === undefined
       ? isReasoningModelHeuristic(modelId)
       : capabilities.includes("thinking");
-  const compat =
-    capabilities === undefined
-      ? { supportsUsageInStreaming: true }
-      : {
-          supportsTools: capabilities.includes("tools"),
-          supportsUsageInStreaming: true,
-        };
+  const compat = {
+    supportsTools: capabilities === undefined ? true : capabilities.includes("tools"),
+    supportsUsageInStreaming: true,
+  };
   return {
     id: modelId,
     name: modelId,
diff --git a/extensions/ollama/src/stream.ts b/extensions/ollama/src/stream.ts
@@ -394,14 +394,25 @@ export function createConfiguredOllamaCompatStreamWrapper(
   const runtimeThinkValue = isNativeOllamaTransport
     ? resolveOllamaThinkValue(ctx.thinkingLevel)
     : undefined;
-  // "off" is also the implicit agent default. Preserve explicit native Ollama
-  // model config unless the active run requests a non-off thinking level.
-  const ollamaThinkValue =
-    runtimeThinkValue === false && configuredThinkValue !== undefined
-      ? undefined
-      : runtimeThinkValue;
-  if (ollamaThinkValue !== undefined) {
-    streamFn = createOllamaThinkingWrapper(streamFn, ollamaThinkValue);
+  // Non-reasoning Ollama models cannot accept a truthy thinking/think
+  // parameter — Ollama returns 400 "\"<model>\" does not support thinking".
+  // Force think: false for these models regardless of runtime think level
+  // or model-level config to prevent the error on fallback/think propagation.
+  const modelSupportsThinking = model?.reasoning === true;
+  if (!modelSupportsThinking) {
+    if (runtimeThinkValue !== false || configuredThinkValue !== undefined) {
+      streamFn = createOllamaThinkingWrapper(streamFn, false);
+    }
+  } else {
+    // "off" is also the implicit agent default. Preserve explicit native Ollama
+    // model config unless the active run requests a non-off thinking level.
+    const ollamaThinkValue =
+      runtimeThinkValue === false && configuredThinkValue !== undefined
+        ? undefined
+        : runtimeThinkValue;
+    if (ollamaThinkValue !== undefined) {
+      streamFn = createOllamaThinkingWrapper(streamFn, ollamaThinkValue);
+    }
   }
 
   if (normalizeProviderId(ctx.provider) === "ollama" && isOllamaCloudKimiModelRef(ctx.modelId)) {
@@ -433,6 +444,7 @@ export function buildOllamaChatRequest(params: {
     stream: params.stream ?? true,
     ...(params.tools && params.tools.length > 0 ? { tools: params.tools } : {}),
     ...(params.options ? { options: params.options } : {}),
+    keep_alive: "1h",
     ...params.requestParams,
   };
 }
diff --git a/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts b/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts
@@ -189,20 +189,18 @@ describe("resolveLlmIdleTimeoutMs", () => {
     expect(resolveLlmIdleTimeoutMs({ model: { baseUrl: "" } })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
   });
 
-  it("still honors an explicit provider request timeout for local providers", () => {
+  it("disables idle watchdog for local providers even with explicit modelRequestTimeoutMs", () => {
     expect(
       resolveLlmIdleTimeoutMs({
         model: { baseUrl: "http://127.0.0.1:11434" },
         modelRequestTimeoutMs: 600_000,
       }),
-    ).toBe(600_000);
+    ).toBe(0);
   });
 
-  it("still applies agents.defaults.timeoutSeconds cap for local providers", () => {
+  it("disables idle watchdog for local providers even with agents.defaults.timeoutSeconds", () => {
     const cfg = { agents: { defaults: { timeoutSeconds: 30 } } } as OpenClawConfig;
-    expect(resolveLlmIdleTimeoutMs({ cfg, model: { baseUrl: "http://127.0.0.1:11434" } })).toBe(
-      30_000,
-    );
+    expect(resolveLlmIdleTimeoutMs({ cfg, model: { baseUrl: "http://127.0.0.1:11434" } })).toBe(0);
   });
 });
 
diff --git a/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts b/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts
@@ -102,6 +102,17 @@ export function resolveLlmIdleTimeoutMs(params?: {
   modelRequestTimeoutMs?: number;
   model?: { baseUrl?: string };
 }): number {
+  // The default watchdog is a network-silence-as-hang guard for cloud providers.
+  // Local providers can legitimately stream nothing for many minutes during
+  // prompt evaluation or thinking. Disable idle timeout early, before any
+  // configured timeout clamping, so that local models (Ollama, LM Studio,
+  // llama.cpp) are never subject to the network-silence heuristic regardless
+  // of agent-level timeoutSeconds.
+  const baseUrl = params?.model?.baseUrl;
+  if (typeof baseUrl === "string" && baseUrl.length > 0 && isLocalProviderBaseUrl(baseUrl)) {
+    return 0;
+  }
+
   const clampTimeoutMs = (valueMs: number) => Math.min(Math.floor(valueMs), MAX_SAFE_TIMEOUT_MS);
   const clampImplicitTimeoutMs = (valueMs: number) =>
     clampTimeoutMs(Math.min(valueMs, DEFAULT_LLM_IDLE_TIMEOUT_MS));
@@ -152,16 +163,6 @@ export function resolveLlmIdleTimeoutMs(params?: {
     return 0;
   }
 
-  // The default watchdog is a network-silence-as-hang guard for cloud providers.
-  // Local providers can legitimately stream nothing for many minutes during
-  // prompt evaluation or thinking, so falling back to the default would abort
-  // valid local runs. Honor it only when the user has not opted out via the
-  // baseUrl pointing at loopback / private-network / `.local`.
-  const baseUrl = params?.model?.baseUrl;
-  if (typeof baseUrl === "string" && baseUrl.length > 0 && isLocalProviderBaseUrl(baseUrl)) {
-    return 0;
-  }
-
   return DEFAULT_LLM_IDLE_TIMEOUT_MS;
 }
 
diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts
@@ -38,6 +38,8 @@ export type AgentModelEntryConfig = {
   agentRuntime?: AgentRuntimePolicyConfig;
   /** Enable streaming for this model (default: true, false for Ollama to avoid SDK issue #1205). */
   streaming?: boolean;
+  /** Runtime compatibility overrides (e.g., supportsTools for models that support tool calls). */
+  compat?: import("./types.models.js").ModelCompatConfig;
 };
 
 export type AgentModelListConfig = {
diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts
@@ -14,6 +14,7 @@ import {
   BlockStreamingCoalesceSchema,
   CliBackendSchema,
   HumanDelaySchema,
+  ModelCompatSchema,
   TypingModeSchema,
 } from "./zod-schema.core.js";
 
@@ -73,6 +74,8 @@ export const AgentDefaultsSchema = z
             agentRuntime: AgentRuntimePolicySchema,
             /** Enable streaming for this model (default: true, false for Ollama to avoid SDK issue #1205). */
             streaming: z.boolean().optional(),
+            /** Runtime compatibility overrides (e.g., supportsTools for models that support tool calls). */
+            compat: ModelCompatSchema.optional(),
           })
           .strict(),
       )
diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts
@@ -184,7 +184,7 @@ export const SecretsConfigSchema = z
 
 const ModelApiSchema = z.enum(MODEL_APIS);
 
-const ModelCompatSchema = z
+export const ModelCompatSchema = z
   .object({
     supportsStore: z.boolean().optional(),
     supportsPromptCacheKey: z.boolean().optional(),