openclaw
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/.generated/config-baseline.sha256‎
Lines changed: 2 additions & 2 deletions b/‎docs/.generated/config-baseline.sha256‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/providers/ollama.md‎
Lines changed: 6 additions & 1 deletion b/‎docs/providers/ollama.md‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎extensions/ollama/ollama.live.test.ts‎
Lines changed: 3 additions & 0 deletions b/‎extensions/ollama/ollama.live.test.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎extensions/ollama/src/stream-runtime.test.ts‎
Lines changed: 67 additions & 1 deletion b/‎extensions/ollama/src/stream-runtime.test.ts‎
Lines changed: 67 additions & 1 deletion
diff --git a/‎extensions/ollama/src/stream.ts‎
Lines changed: 15 additions & 4 deletions b/‎extensions/ollama/src/stream.ts‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎src/agents/pi-embedded-runner/model.test.ts‎
Lines changed: 74 additions & 0 deletions b/‎src/agents/pi-embedded-runner/model.test.ts‎
Lines changed: 74 additions & 0 deletions
@@ -25,6 +25,7 @@ Docs: https://docs.openclaw.ai
 - Logging/sessions: apply configured redaction patterns to persisted session transcript text and accept escaped character classes in safe custom redaction regexes, so transcript JSONL no longer keeps matching sensitive text in the clear. Fixes #42982. Thanks @panpan0000.
 - Providers/Ollama: honor `/api/show` capabilities when registering local models so non-tool Ollama models no longer receive the agent tool surface, and keep native Ollama thinking opt-in instead of enabling it by default. Fixes #64710 and duplicate #65343. Thanks @yuan-b, @netherby, @xilopaint, and @Diyforfun2026.
 - Providers/Ollama: read larger custom Modelfile `PARAMETER num_ctx` values from `/api/show` so auto-discovered Ollama models with expanded context no longer stay pinned to the base model context. Fixes #68344. Thanks @neeravmakwana.
+- Providers/Ollama: honor configured model `params.num_ctx` in native and OpenAI-compatible Ollama requests so local models can cap runtime context without rebuilding Modelfiles. Fixes #44550 and #52206; supersedes #69464. Thanks @taitruong, @armi0024, and @LokiCode404.
 - Providers/Ollama: expose native Ollama thinking effort levels so `/think max` is accepted for reasoning-capable Ollama models and maps to Ollama's highest supported `think` effort. Fixes #71584. Thanks @g0st1n.
 - Providers/Ollama: strip the active custom Ollama provider prefix before native chat and embedding requests, so custom provider ids like `ollama-spark/qwen3:32b` reach Ollama as the real model name. Fixes #72353. Thanks @maximus-dss and @hclsys.
 - Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge.
 
@@ -1,4 +1,4 @@
-79fa6b9b9df5e22ac56a7edb9bfc25550131e285ce9f4868f468d957a8768240  config-baseline.json
-2722504ab6bd37eea9e7542689bd6dba5fb4e485c0eab9c1915427c49a5c5b66  config-baseline.core.json
+502a73267bd7195caf3fc4fb513e51a01bfd1c9567f8c22037ee10a11169a0bf  config-baseline.json
+2edac1da06bbb3709375bf82ae68890c67634f5ad3200a98a1d008b22c335e79  config-baseline.core.json
 7cd9c908f066c143eab2a201efbc9640f483ab28bba92ddeca1d18cc2b528bc3  config-baseline.channel.json
 74b74cb18ac37c0acaa765f398f1f9edbcee4c43567f02d45c89598a1e13afb4  config-baseline.plugin.json
@@ -401,7 +401,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
   <Accordion title="Context windows">
     For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, including larger `PARAMETER num_ctx` values from custom Modelfiles. Otherwise it falls back to the default Ollama context window used by OpenClaw.
 
-    You can override `contextWindow` and `maxTokens` in explicit provider config:
+    You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
 
     ```json5
     {
@@ -413,6 +413,9 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
                 id: "llama3.3",
                 contextWindow: 131072,
                 maxTokens: 65536,
+                params: {
+                  num_ctx: 32768,
+                },
               }
             ]
           }
@@ -421,6 +424,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
     }
     ```
 
+    Per-model `agents.defaults.models["ollama/<model>"].params.num_ctx` works too. If both are configured, the explicit provider model entry wins over the agent default.
+
   </Accordion>
 
   <Accordion title="Reasoning models">
 
@@ -26,6 +26,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
     let payload:
       | {
           model?: string;
+          options?: { num_ctx?: number };
           tools?: Array<{
             function?: {
               parameters?: {
@@ -42,6 +43,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
         api: "ollama",
         provider: PROVIDER_ID,
         contextWindow: 8192,
+        params: { num_ctx: 4096 },
       } as never,
       {
         messages: [{ role: "user", content: "Reply exactly OK." }],
@@ -79,6 +81,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
     expect(error).toBeUndefined();
     expect(events.some((event) => (event as { type?: string }).type === "done")).toBe(true);
     expect(payload?.model).toBe(CHAT_MODEL);
+    expect(payload?.options?.num_ctx).toBe(4096);
     const properties = payload?.tools?.[0]?.function?.parameters?.properties;
     expect(properties?.city?.type).toBe("string");
     expect(properties?.units?.type).toBe("string");
 
@@ -94,6 +94,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
       provider: "ollama",
       id: "kimi-k2.5:cloud",
       contextWindow: 262144,
+      params: { num_ctx: 65536 },
     };
 
     const wrapped = createConfiguredOllamaCompatStreamWrapper({
@@ -117,7 +118,43 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
 
     expect(patchedPayload).toMatchObject({
       thinking: { type: "enabled" },
-      options: { num_ctx: 262144 },
+      options: { num_ctx: 65536 },
+    });
+  });
+
+  it("falls back to contextWindow when configured num_ctx is invalid", async () => {
+    let patchedPayload: Record<string, unknown> | undefined;
+    const baseStreamFn = vi.fn((_model, _context, options) => {
+      options?.onPayload?.({});
+      return (async function* () {})();
+    });
+    const model = {
+      api: "openai-completions",
+      provider: "ollama",
+      id: "qwen3:32b",
+      contextWindow: 131072,
+      params: { num_ctx: 0 },
+    };
+
+    const wrapped = createConfiguredOllamaCompatStreamWrapper({
+      provider: "ollama",
+      modelId: "qwen3:32b",
+      model,
+      streamFn: baseStreamFn,
+    } as never);
+
+    await wrapped?.(
+      model as never,
+      { messages: [] } as never,
+      {
+        onPayload: (payload: unknown) => {
+          patchedPayload = payload as Record<string, unknown>;
+        },
+      } as never,
+    );
+
+    expect(patchedPayload).toMatchObject({
+      options: { num_ctx: 131072 },
     });
   });
 
@@ -878,6 +915,7 @@ function getGuardedFetchCall(fetchMock: typeof fetchWithSsrFGuardMock): GuardedF
 async function createOllamaTestStream(params: {
   baseUrl: string;
   defaultHeaders?: Record<string, string>;
+  model?: Record<string, unknown>;
   options?: {
     apiKey?: string;
     maxTokens?: number;
@@ -892,6 +930,7 @@ async function createOllamaTestStream(params: {
       api: "ollama",
       provider: "custom-ollama",
       contextWindow: 131072,
+      ...params.model,
     } as unknown as Parameters<typeof streamFn>[0],
     {
       messages: [{ role: "user", content: "hello" }],
@@ -1157,6 +1196,33 @@ describe("createOllamaStreamFn", () => {
     );
   });
 
+  it("uses configured params.num_ctx for native Ollama chat options", async () => {
+    await withMockNdjsonFetch(
+      [
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
+      ],
+      async (fetchMock) => {
+        const stream = await createOllamaTestStream({
+          baseUrl: "http://ollama-host:11434",
+          model: { params: { num_ctx: 32768 }, contextWindow: 131072 },
+        });
+
+        const events = await collectStreamEvents(stream);
+        expect(events.at(-1)?.type).toBe("done");
+
+        const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
+        if (typeof requestInit.body !== "string") {
+          throw new Error("Expected string request body");
+        }
+        const requestBody = JSON.parse(requestInit.body) as {
+          options: { num_ctx?: number };
+        };
+        expect(requestBody.options.num_ctx).toBe(32768);
+      },
+    );
+  });
+
   it("uses the default loopback policy when baseUrl is empty", async () => {
     await withMockNdjsonFetch(
       [
 
@@ -181,8 +181,19 @@ function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | und
   return undefined;
 }
 
-function resolveOllamaCompatNumCtx(model: ProviderRuntimeModel): number {
-  return Math.max(1, Math.floor(model.contextWindow ?? model.maxTokens ?? DEFAULT_CONTEXT_TOKENS));
+function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined {
+  const raw = model.params?.num_ctx;
+  if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
+    return undefined;
+  }
+  return Math.floor(raw);
+}
+
+function resolveOllamaNumCtx(model: ProviderRuntimeModel): number {
+  return (
+    resolveOllamaConfiguredNumCtx(model) ??
+    Math.max(1, Math.floor(model.contextWindow ?? model.maxTokens ?? DEFAULT_CONTEXT_TOKENS))
+  );
 }
 
 function isOllamaCloudKimiModelRef(modelId: string): boolean {
@@ -215,7 +226,7 @@ export function createConfiguredOllamaCompatStreamWrapper(
   }
 
   if (injectNumCtx && model) {
-    streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaCompatNumCtx(model));
+    streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaNumCtx(model));
   }
 
   const ollamaThinkValue = isNativeOllamaTransport
@@ -743,7 +754,7 @@ export function createOllamaStreamFn(
         );
         const ollamaTools = extractOllamaTools(context.tools);
 
-        const ollamaOptions: Record<string, unknown> = { num_ctx: model.contextWindow ?? 65536 };
+        const ollamaOptions: Record<string, unknown> = { num_ctx: resolveOllamaNumCtx(model) };
         if (typeof options?.temperature === "number") {
           ollamaOptions.temperature = options.temperature;
         }
 
@@ -369,6 +369,80 @@ describe("resolveModel", () => {
     expect(result.model?.maxTokens).toBe(32768);
   });
 
+  it("merges configured model params with agent defaults for resolved models", () => {
+    mockDiscoveredModel(discoverModels, {
+      provider: "ollama",
+      modelId: "qwen3:32b",
+      templateModel: {
+        ...makeModel("qwen3:32b"),
+        provider: "ollama",
+        params: { num_ctx: 4096, keep_alive: "1m" },
+      },
+    });
+    const cfg = {
+      agents: {
+        defaults: {
+          models: {
+            "OLLAMA/qwen3:32B": {
+              params: { num_ctx: 8192, thinking: "low" },
+            },
+          },
+        },
+      },
+      models: {
+        providers: {
+          ollama: {
+            baseUrl: "http://localhost:11434",
+            models: [
+              {
+                ...makeModel("qwen3:32b"),
+                params: { num_ctx: 16384 },
+              },
+            ],
+          },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const result = resolveModelForTest("ollama", "qwen3:32b", "/tmp/agent", cfg);
+
+    expect(result.error).toBeUndefined();
+    expect((result.model as { params?: Record<string, unknown> } | undefined)?.params).toEqual({
+      num_ctx: 16384,
+      keep_alive: "1m",
+      thinking: "low",
+    });
+  });
+
+  it("applies agent default model params without explicit provider config", () => {
+    mockDiscoveredModel(discoverModels, {
+      provider: "ollama",
+      modelId: "llama3.2",
+      templateModel: {
+        ...makeModel("llama3.2"),
+        provider: "ollama",
+      },
+    });
+    const cfg = {
+      agents: {
+        defaults: {
+          models: {
+            "ollama/llama3.2": {
+              params: { num_ctx: 32768 },
+            },
+          },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const result = resolveModelForTest("ollama", "llama3.2", "/tmp/agent", cfg);
+
+    expect(result.error).toBeUndefined();
+    expect((result.model as { params?: Record<string, unknown> } | undefined)?.params).toEqual({
+      num_ctx: 32768,
+    });
+  });
+
   it("propagates reasoning from matching configured fallback model", () => {
     const cfg = {
       models: {
Original file line number	Diff line number	Diff line change
`@@ -401,7 +401,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s`
`401`	`401`	`<Accordion title="Context windows">`
`402`	`402`	For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, including larger `PARAMETER num_ctx` values from custom Modelfiles. Otherwise it falls back to the default Ollama context window used by OpenClaw.
`403`	`403`
`404`		- You can override `contextWindow` and `maxTokens` in explicit provider config:
	`404`	+ You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
`405`	`405`
`406`	`406`	```json5
`407`	`407`	`{`
`@@ -413,6 +413,9 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s`
`413`	`413`	`id: "llama3.3",`
`414`	`414`	`contextWindow: 131072,`
`415`	`415`	`maxTokens: 65536,`
	`416`	`+ params: {`
	`417`	`+ num_ctx: 32768,`
	`418`	`+ },`
`416`	`419`	`}`
`417`	`420`	`]`
`418`	`421`	`}`
`@@ -421,6 +424,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s`
`421`	`424`	`}`
`422`	`425`	```
`423`	`426`
	`427`	+ Per-model `agents.defaults.models["ollama/<model>"].params.num_ctx` works too. If both are configured, the explicit provider model entry wins over the agent default.
	`428`	`+`
`424`	`429`	`</Accordion>`
`425`	`430`
`426`	`431`	`<Accordion title="Reasoning models">`