Skip to content

Commit 2dba9e6

Browse files
committed
fix(ollama): honor configured num_ctx params
1 parent fc3abc1 commit 2dba9e6

13 files changed

Lines changed: 305 additions & 14 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Docs: https://docs.openclaw.ai
2525
- Logging/sessions: apply configured redaction patterns to persisted session transcript text and accept escaped character classes in safe custom redaction regexes, so transcript JSONL no longer keeps matching sensitive text in the clear. Fixes #42982. Thanks @panpan0000.
2626
- Providers/Ollama: honor `/api/show` capabilities when registering local models so non-tool Ollama models no longer receive the agent tool surface, and keep native Ollama thinking opt-in instead of enabling it by default. Fixes #64710 and duplicate #65343. Thanks @yuan-b, @netherby, @xilopaint, and @Diyforfun2026.
2727
- Providers/Ollama: read larger custom Modelfile `PARAMETER num_ctx` values from `/api/show` so auto-discovered Ollama models with expanded context no longer stay pinned to the base model context. Fixes #68344. Thanks @neeravmakwana.
28+
- Providers/Ollama: honor configured model `params.num_ctx` in native and OpenAI-compatible Ollama requests so local models can cap runtime context without rebuilding Modelfiles. Fixes #44550 and #52206; supersedes #69464. Thanks @taitruong, @armi0024, and @LokiCode404.
2829
- Providers/Ollama: expose native Ollama thinking effort levels so `/think max` is accepted for reasoning-capable Ollama models and maps to Ollama's highest supported `think` effort. Fixes #71584. Thanks @g0st1n.
2930
- Providers/Ollama: strip the active custom Ollama provider prefix before native chat and embedding requests, so custom provider ids like `ollama-spark/qwen3:32b` reach Ollama as the real model name. Fixes #72353. Thanks @maximus-dss and @hclsys.
3031
- Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
79fa6b9b9df5e22ac56a7edb9bfc25550131e285ce9f4868f468d957a8768240 config-baseline.json
2-
2722504ab6bd37eea9e7542689bd6dba5fb4e485c0eab9c1915427c49a5c5b66 config-baseline.core.json
1+
502a73267bd7195caf3fc4fb513e51a01bfd1c9567f8c22037ee10a11169a0bf config-baseline.json
2+
2edac1da06bbb3709375bf82ae68890c67634f5ad3200a98a1d008b22c335e79 config-baseline.core.json
33
7cd9c908f066c143eab2a201efbc9640f483ab28bba92ddeca1d18cc2b528bc3 config-baseline.channel.json
44
74b74cb18ac37c0acaa765f398f1f9edbcee4c43567f02d45c89598a1e13afb4 config-baseline.plugin.json

docs/providers/ollama.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
401401
<Accordion title="Context windows">
402402
For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, including larger `PARAMETER num_ctx` values from custom Modelfiles. Otherwise it falls back to the default Ollama context window used by OpenClaw.
403403

404-
You can override `contextWindow` and `maxTokens` in explicit provider config:
404+
You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
405405

406406
```json5
407407
{
@@ -413,6 +413,9 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
413413
id: "llama3.3",
414414
contextWindow: 131072,
415415
maxTokens: 65536,
416+
params: {
417+
num_ctx: 32768,
418+
},
416419
}
417420
]
418421
}
@@ -421,6 +424,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
421424
}
422425
```
423426

427+
Per-model `agents.defaults.models["ollama/<model>"].params.num_ctx` works too. If both are configured, the explicit provider model entry wins over the agent default.
428+
424429
</Accordion>
425430

426431
<Accordion title="Reasoning models">

extensions/ollama/ollama.live.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
2626
let payload:
2727
| {
2828
model?: string;
29+
options?: { num_ctx?: number };
2930
tools?: Array<{
3031
function?: {
3132
parameters?: {
@@ -42,6 +43,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
4243
api: "ollama",
4344
provider: PROVIDER_ID,
4445
contextWindow: 8192,
46+
params: { num_ctx: 4096 },
4547
} as never,
4648
{
4749
messages: [{ role: "user", content: "Reply exactly OK." }],
@@ -79,6 +81,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
7981
expect(error).toBeUndefined();
8082
expect(events.some((event) => (event as { type?: string }).type === "done")).toBe(true);
8183
expect(payload?.model).toBe(CHAT_MODEL);
84+
expect(payload?.options?.num_ctx).toBe(4096);
8285
const properties = payload?.tools?.[0]?.function?.parameters?.properties;
8386
expect(properties?.city?.type).toBe("string");
8487
expect(properties?.units?.type).toBe("string");

extensions/ollama/src/stream-runtime.test.ts

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
9494
provider: "ollama",
9595
id: "kimi-k2.5:cloud",
9696
contextWindow: 262144,
97+
params: { num_ctx: 65536 },
9798
};
9899

99100
const wrapped = createConfiguredOllamaCompatStreamWrapper({
@@ -117,7 +118,43 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
117118

118119
expect(patchedPayload).toMatchObject({
119120
thinking: { type: "enabled" },
120-
options: { num_ctx: 262144 },
121+
options: { num_ctx: 65536 },
122+
});
123+
});
124+
125+
it("falls back to contextWindow when configured num_ctx is invalid", async () => {
126+
let patchedPayload: Record<string, unknown> | undefined;
127+
const baseStreamFn = vi.fn((_model, _context, options) => {
128+
options?.onPayload?.({});
129+
return (async function* () {})();
130+
});
131+
const model = {
132+
api: "openai-completions",
133+
provider: "ollama",
134+
id: "qwen3:32b",
135+
contextWindow: 131072,
136+
params: { num_ctx: 0 },
137+
};
138+
139+
const wrapped = createConfiguredOllamaCompatStreamWrapper({
140+
provider: "ollama",
141+
modelId: "qwen3:32b",
142+
model,
143+
streamFn: baseStreamFn,
144+
} as never);
145+
146+
await wrapped?.(
147+
model as never,
148+
{ messages: [] } as never,
149+
{
150+
onPayload: (payload: unknown) => {
151+
patchedPayload = payload as Record<string, unknown>;
152+
},
153+
} as never,
154+
);
155+
156+
expect(patchedPayload).toMatchObject({
157+
options: { num_ctx: 131072 },
121158
});
122159
});
123160

@@ -878,6 +915,7 @@ function getGuardedFetchCall(fetchMock: typeof fetchWithSsrFGuardMock): GuardedF
878915
async function createOllamaTestStream(params: {
879916
baseUrl: string;
880917
defaultHeaders?: Record<string, string>;
918+
model?: Record<string, unknown>;
881919
options?: {
882920
apiKey?: string;
883921
maxTokens?: number;
@@ -892,6 +930,7 @@ async function createOllamaTestStream(params: {
892930
api: "ollama",
893931
provider: "custom-ollama",
894932
contextWindow: 131072,
933+
...params.model,
895934
} as unknown as Parameters<typeof streamFn>[0],
896935
{
897936
messages: [{ role: "user", content: "hello" }],
@@ -1157,6 +1196,33 @@ describe("createOllamaStreamFn", () => {
11571196
);
11581197
});
11591198

1199+
it("uses configured params.num_ctx for native Ollama chat options", async () => {
1200+
await withMockNdjsonFetch(
1201+
[
1202+
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
1203+
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
1204+
],
1205+
async (fetchMock) => {
1206+
const stream = await createOllamaTestStream({
1207+
baseUrl: "http://ollama-host:11434",
1208+
model: { params: { num_ctx: 32768 }, contextWindow: 131072 },
1209+
});
1210+
1211+
const events = await collectStreamEvents(stream);
1212+
expect(events.at(-1)?.type).toBe("done");
1213+
1214+
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
1215+
if (typeof requestInit.body !== "string") {
1216+
throw new Error("Expected string request body");
1217+
}
1218+
const requestBody = JSON.parse(requestInit.body) as {
1219+
options: { num_ctx?: number };
1220+
};
1221+
expect(requestBody.options.num_ctx).toBe(32768);
1222+
},
1223+
);
1224+
});
1225+
11601226
it("uses the default loopback policy when baseUrl is empty", async () => {
11611227
await withMockNdjsonFetch(
11621228
[

extensions/ollama/src/stream.ts

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,19 @@ function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | und
181181
return undefined;
182182
}
183183

184-
function resolveOllamaCompatNumCtx(model: ProviderRuntimeModel): number {
185-
return Math.max(1, Math.floor(model.contextWindow ?? model.maxTokens ?? DEFAULT_CONTEXT_TOKENS));
184+
function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined {
185+
const raw = model.params?.num_ctx;
186+
if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
187+
return undefined;
188+
}
189+
return Math.floor(raw);
190+
}
191+
192+
function resolveOllamaNumCtx(model: ProviderRuntimeModel): number {
193+
return (
194+
resolveOllamaConfiguredNumCtx(model) ??
195+
Math.max(1, Math.floor(model.contextWindow ?? model.maxTokens ?? DEFAULT_CONTEXT_TOKENS))
196+
);
186197
}
187198

188199
function isOllamaCloudKimiModelRef(modelId: string): boolean {
@@ -215,7 +226,7 @@ export function createConfiguredOllamaCompatStreamWrapper(
215226
}
216227

217228
if (injectNumCtx && model) {
218-
streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaCompatNumCtx(model));
229+
streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaNumCtx(model));
219230
}
220231

221232
const ollamaThinkValue = isNativeOllamaTransport
@@ -743,7 +754,7 @@ export function createOllamaStreamFn(
743754
);
744755
const ollamaTools = extractOllamaTools(context.tools);
745756

746-
const ollamaOptions: Record<string, unknown> = { num_ctx: model.contextWindow ?? 65536 };
757+
const ollamaOptions: Record<string, unknown> = { num_ctx: resolveOllamaNumCtx(model) };
747758
if (typeof options?.temperature === "number") {
748759
ollamaOptions.temperature = options.temperature;
749760
}

src/agents/pi-embedded-runner/model.test.ts

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,80 @@ describe("resolveModel", () => {
369369
expect(result.model?.maxTokens).toBe(32768);
370370
});
371371

372+
it("merges configured model params with agent defaults for resolved models", () => {
373+
mockDiscoveredModel(discoverModels, {
374+
provider: "ollama",
375+
modelId: "qwen3:32b",
376+
templateModel: {
377+
...makeModel("qwen3:32b"),
378+
provider: "ollama",
379+
params: { num_ctx: 4096, keep_alive: "1m" },
380+
},
381+
});
382+
const cfg = {
383+
agents: {
384+
defaults: {
385+
models: {
386+
"OLLAMA/qwen3:32B": {
387+
params: { num_ctx: 8192, thinking: "low" },
388+
},
389+
},
390+
},
391+
},
392+
models: {
393+
providers: {
394+
ollama: {
395+
baseUrl: "http://localhost:11434",
396+
models: [
397+
{
398+
...makeModel("qwen3:32b"),
399+
params: { num_ctx: 16384 },
400+
},
401+
],
402+
},
403+
},
404+
},
405+
} as unknown as OpenClawConfig;
406+
407+
const result = resolveModelForTest("ollama", "qwen3:32b", "/tmp/agent", cfg);
408+
409+
expect(result.error).toBeUndefined();
410+
expect((result.model as { params?: Record<string, unknown> } | undefined)?.params).toEqual({
411+
num_ctx: 16384,
412+
keep_alive: "1m",
413+
thinking: "low",
414+
});
415+
});
416+
417+
it("applies agent default model params without explicit provider config", () => {
418+
mockDiscoveredModel(discoverModels, {
419+
provider: "ollama",
420+
modelId: "llama3.2",
421+
templateModel: {
422+
...makeModel("llama3.2"),
423+
provider: "ollama",
424+
},
425+
});
426+
const cfg = {
427+
agents: {
428+
defaults: {
429+
models: {
430+
"ollama/llama3.2": {
431+
params: { num_ctx: 32768 },
432+
},
433+
},
434+
},
435+
},
436+
} as unknown as OpenClawConfig;
437+
438+
const result = resolveModelForTest("ollama", "llama3.2", "/tmp/agent", cfg);
439+
440+
expect(result.error).toBeUndefined();
441+
expect((result.model as { params?: Record<string, unknown> } | undefined)?.params).toEqual({
442+
num_ctx: 32768,
443+
});
444+
});
445+
372446
it("propagates reasoning from matching configured fallback model", () => {
373447
const cfg = {
374448
models: {

0 commit comments

Comments
 (0)