fix(gateway): cap compaction reserve floor to context window for small models (#65671)

openperf · web-flow · commit 4bc46ccfedc4 · 2026-04-15T01:08:11.000+08:00
Fixes #65465. Caps the compaction reserveTokensFloor so that at least min(8 000, 50%) of the context window remains available for prompt content, preventing the default 20 000-token floor from exceeding the entire context window on small-context local models (e.g. Ollama 16K). The cap is only applied when contextTokenBudget is provided, preserving backward compatibility.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
 - Matrix/security: normalize sandboxed profile avatar params, preserve `mxc://` avatar URLs, and surface gmail watcher stop failures during reload. (#64701) Thanks @slepybear.
 - Telegram/documents: drop leaked binary caption bytes from inbound Telegram text handling so document uploads like `.mobi` or `.epub` no longer explode prompt token counts. (#66663) Thanks @joelnishanth.
 - Gateway/auth: resolve the active gateway bearer per-request on the HTTP server and the HTTP upgrade handler via `getResolvedAuth()`, mirroring the WebSocket path, so a secret rotated through `secrets.reload` or config hot-reload stops authenticating on `/v1/*`, `/tools/invoke`, plugin HTTP routes, and the canvas upgrade path immediately instead of remaining valid on HTTP until gateway restart. (#66651) Thanks @mmaps.
+- Agents/compaction: cap the compaction reserve-token floor to the model context window so small-context local models (e.g. Ollama with 16K tokens) no longer trigger context-overflow errors or infinite compaction loops on every prompt. (#65671) Thanks @openperf.
 
 ## 2026.4.14
 
diff --git a/src/agents/pi-compaction-constants.ts b/src/agents/pi-compaction-constants.ts
@@ -0,0 +1,12 @@
+/**
+ * Absolute minimum prompt budget in tokens.  When the context window is
+ * large enough that `contextTokenBudget * MIN_PROMPT_BUDGET_RATIO` exceeds
+ * this value, this absolute floor takes precedence.
+ */
+export const MIN_PROMPT_BUDGET_TOKENS = 8_000;
+
+/**
+ * Minimum share of the context window that must remain available for prompt
+ * content after reserve tokens are subtracted.
+ */
+export const MIN_PROMPT_BUDGET_RATIO = 0.5;
diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts
@@ -786,6 +786,7 @@ export async function compactEmbeddedPiSessionDirect(
         cwd: effectiveWorkspace,
         agentDir,
         cfg: params.config,
+        contextTokenBudget: ctxInfo.tokens,
       });
       // Sets compaction/pruning runtime state and returns extension factories
       // that must be passed to the resource loader for the safeguard to be active.
diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -913,6 +913,7 @@ export async function runEmbeddedAttempt(
         cwd: effectiveWorkspace,
         agentDir,
         cfg: params.config,
+        contextTokenBudget: params.contextTokenBudget,
       });
       applyPiAutoCompactionGuard({
         settingsManager,
diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.ts
@@ -3,14 +3,16 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent";
 import { SAFETY_MARGIN, estimateMessagesTokens } from "../../compaction.js";
 import { estimateToolResultReductionPotential } from "../tool-result-truncation.js";
 import type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js";
+import {
+  MIN_PROMPT_BUDGET_RATIO,
+  MIN_PROMPT_BUDGET_TOKENS,
+} from "../../pi-compaction-constants.js";
 
 export const PREEMPTIVE_OVERFLOW_ERROR_TEXT =
   "Context overflow: prompt too large for the model (precheck).";
 
 const ESTIMATED_CHARS_PER_TOKEN = 4;
 const TRUNCATION_ROUTE_BUFFER_TOKENS = 512;
-const MIN_PROMPT_BUDGET_TOKENS = 8_000;
-const MIN_PROMPT_BUDGET_RATIO = 0.5;
 
 export type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js";
 
diff --git a/src/agents/pi-project-settings.ts b/src/agents/pi-project-settings.ts
@@ -187,11 +187,14 @@ export function createPreparedEmbeddedPiSettingsManager(params: {
   cwd: string;
   agentDir: string;
   cfg?: OpenClawConfig;
+  /** Resolved context window budget so reserve-token floor can be capped for small models. */
+  contextTokenBudget?: number;
 }): SettingsManager {
   const settingsManager = createEmbeddedPiSettingsManager(params);
   applyPiCompactionSettingsFromConfig({
     settingsManager,
     cfg: params.cfg,
+    contextTokenBudget: params.contextTokenBudget,
   });
   return settingsManager;
 }
diff --git a/src/agents/pi-settings.test.ts b/src/agents/pi-settings.test.ts
@@ -1,4 +1,8 @@
 import { describe, expect, it, vi } from "vitest";
+import {
+  MIN_PROMPT_BUDGET_RATIO,
+  MIN_PROMPT_BUDGET_TOKENS,
+} from "./pi-compaction-constants.js";
 import {
   applyPiCompactionSettingsFromConfig,
   DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR,
@@ -120,6 +124,173 @@ describe("applyPiCompactionSettingsFromConfig", () => {
     expect(result.compaction.keepRecentTokens).toBe(20_000);
     expect(settingsManager.applyOverrides).not.toHaveBeenCalled();
   });
+
+  it("caps floor to context window ratio for small-context models", () => {
+    // Pi SDK default reserveTokens is 16 384.  With a 16 384 context window
+    // the default floor (20 000) exceeds the window.  The aligned cap
+    // computes: minPromptBudget = min(8_000, floor(16_384 * 0.5)) = 8_000,
+    // maxReserve = 16_384 - 8_000 = 8_384.  Since current (16_384) > capped
+    // floor (8_384), no override is needed.
+    const settingsManager = {
+      getCompactionReserveTokens: () => 16_384,
+      getCompactionKeepRecentTokens: () => 20_000,
+      applyOverrides: vi.fn(),
+    };
+
+    const result = applyPiCompactionSettingsFromConfig({
+      settingsManager,
+      contextTokenBudget: 16_384,
+    });
+
+    // Without the cap, reserveTokens would be bumped to 20_000.
+    // With the cap, it stays at 16_384 (the current value).
+    expect(result.compaction.reserveTokens).toBe(16_384);
+    expect(result.compaction.reserveTokens).toBeLessThan(
+      DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR,
+    );
+    expect(result.didOverride).toBe(false);
+    expect(settingsManager.applyOverrides).not.toHaveBeenCalled();
+  });
+
+  it("applies capped floor over user-configured reserveTokens when default floor exceeds context window", () => {
+    const settingsManager = {
+      getCompactionReserveTokens: () => 16_384,
+      getCompactionKeepRecentTokens: () => 20_000,
+      applyOverrides: vi.fn(),
+    };
+
+    // User sets reserveTokens=2048 but NOT reserveTokensFloor (default 20_000 applies).
+    // Pre-fix: target = max(2048, 20_000) = 20_000 → exceeds 16_384 context → infinite loop.
+    // Post-fix: floor capped to 8_384 → target = max(2048, 8_384) = 8_384 → works.
+    const result = applyPiCompactionSettingsFromConfig({
+      settingsManager,
+      cfg: {
+        agents: {
+          defaults: {
+            compaction: { reserveTokens: 2_048 },
+          },
+        },
+      },
+      contextTokenBudget: 16_384,
+    });
+
+    expect(result.didOverride).toBe(true);
+    expect(result.compaction.reserveTokens).toBe(8_384); // capped floor wins over user's 2_048
+    expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
+      compaction: { reserveTokens: 8_384 },
+    });
+  });
+
+  it("applies capped floor when current reserve is below it on small-context models", () => {
+    // Simulate a Pi SDK default of 4 096 with a 16 384 context window.
+    // minPromptBudget = min(8_000, floor(16_384 * 0.5)) = 8_000.
+    // maxReserve = 16_384 - 8_000 = 8_384.
+    // Capped floor = min(20_000, 8_384) = 8_384.
+    // targetReserveTokens = max(4_096, 8_384) = 8_384 → override applied.
+    const settingsManager = {
+      getCompactionReserveTokens: () => 4_096,
+      getCompactionKeepRecentTokens: () => 20_000,
+      applyOverrides: vi.fn(),
+    };
+
+    const result = applyPiCompactionSettingsFromConfig({
+      settingsManager,
+      contextTokenBudget: 16_384,
+    });
+
+    const minPromptBudget = Math.min(
+      MIN_PROMPT_BUDGET_TOKENS,
+      Math.max(1, Math.floor(16_384 * MIN_PROMPT_BUDGET_RATIO)),
+    );
+    const expectedReserve = Math.max(0, 16_384 - minPromptBudget);
+    expect(result.didOverride).toBe(true);
+    expect(result.compaction.reserveTokens).toBe(expectedReserve);
+    expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
+      compaction: { reserveTokens: expectedReserve },
+    });
+  });
+
+  it("respects user-configured reserveTokens below capped floor for small models", () => {
+    const settingsManager = {
+      getCompactionReserveTokens: () => 16_384,
+      getCompactionKeepRecentTokens: () => 20_000,
+      applyOverrides: vi.fn(),
+    };
+
+    // User explicitly sets reserveTokens=2048 and reserveTokensFloor=0.
+    // With contextTokenBudget=16384, the capped floor = min(0, 8192) = 0.
+    // targetReserveTokens = max(2048, 0) = 2048.
+    const result = applyPiCompactionSettingsFromConfig({
+      settingsManager,
+      cfg: {
+        agents: {
+          defaults: {
+            compaction: { reserveTokens: 2_048, reserveTokensFloor: 0 },
+          },
+        },
+      },
+      contextTokenBudget: 16_384,
+    });
+
+    expect(result.compaction.reserveTokens).toBe(2_048);
+    expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
+      compaction: { reserveTokens: 2_048 },
+    });
+  });
+
+  it("does not cap floor for mid-size models when maxReserve exceeds default floor", () => {
+    const settingsManager = {
+      getCompactionReserveTokens: () => 16_384,
+      getCompactionKeepRecentTokens: () => 20_000,
+      applyOverrides: vi.fn(),
+    };
+
+    // 32 768 context window → minPromptBudget = min(8_000, floor(32_768 * 0.5)) = 8_000.
+    // maxReserve = 32_768 - 8_000 = 24_768.
+    // Since 24_768 > 20_000 (DEFAULT_FLOOR), the floor is NOT capped and stays at 20_000.
+    const result = applyPiCompactionSettingsFromConfig({
+      settingsManager,
+      contextTokenBudget: 32_768,
+    });
+
+    expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR);
+    expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
+      compaction: { reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR },
+    });
+  });
+
+  it("does not cap floor when context window is large enough", () => {
+    const settingsManager = {
+      getCompactionReserveTokens: () => 16_384,
+      getCompactionKeepRecentTokens: () => 20_000,
+      applyOverrides: vi.fn(),
+    };
+
+    // 200 000 context window → maxReserve = 200_000 - 8_000 = 192_000.
+    // floor (20 000) is well within that cap.
+    const result = applyPiCompactionSettingsFromConfig({
+      settingsManager,
+      contextTokenBudget: 200_000,
+    });
+
+    expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR);
+    expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
+      compaction: { reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR },
+    });
+  });
+
+  it("falls back to uncapped floor when contextTokenBudget is not provided", () => {
+    const settingsManager = {
+      getCompactionReserveTokens: () => 16_384,
+      getCompactionKeepRecentTokens: () => 20_000,
+      applyOverrides: vi.fn(),
+    };
+
+    // No contextTokenBudget → backward-compatible behavior, floor = 20 000.
+    const result = applyPiCompactionSettingsFromConfig({ settingsManager });
+
+    expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR);
+  });
 });
 
 describe("resolveCompactionReserveTokensFloor", () => {
diff --git a/src/agents/pi-settings.ts b/src/agents/pi-settings.ts
@@ -1,5 +1,9 @@
 import type { OpenClawConfig } from "../config/types.openclaw.js";
 import type { ContextEngineInfo } from "../context-engine/types.js";
+import {
+  MIN_PROMPT_BUDGET_RATIO,
+  MIN_PROMPT_BUDGET_TOKENS,
+} from "./pi-compaction-constants.js";
 
 export const DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000;
 
@@ -15,6 +19,12 @@ type PiSettingsManagerLike = {
   setCompactionEnabled?: (enabled: boolean) => void;
 };
 
+/**
+ * Ensures the compaction reserve tokens are at least the specified minimum.
+ * Note: This function is not context-aware and uses an uncapped floor.
+ * If called for small-context models without threading `contextTokenBudget`,
+ * it may re-introduce context overflow issues.
+ */
 export function ensurePiCompactionReserveTokens(params: {
   settingsManager: PiSettingsManagerLike;
   minReserveTokens?: number;
@@ -58,6 +68,8 @@ function toPositiveInt(value: unknown): number | undefined {
 export function applyPiCompactionSettingsFromConfig(params: {
   settingsManager: PiSettingsManagerLike;
   cfg?: OpenClawConfig;
+  /** When known, the resolved context window budget for the current model. */
+  contextTokenBudget?: number;
 }): {
   didOverride: boolean;
   compaction: { reserveTokens: number; keepRecentTokens: number };
@@ -68,7 +80,22 @@ export function applyPiCompactionSettingsFromConfig(params: {
 
   const configuredReserveTokens = toNonNegativeInt(compactionCfg?.reserveTokens);
   const configuredKeepRecentTokens = toPositiveInt(compactionCfg?.keepRecentTokens);
-  const reserveTokensFloor = resolveCompactionReserveTokensFloor(params.cfg);
+  let reserveTokensFloor = resolveCompactionReserveTokensFloor(params.cfg);
+
+  // Cap the floor to a safe fraction of the context window so that
+  // small-context models (e.g. Ollama with 16 K tokens) are not starved of
+  // prompt budget.  Without this cap the default floor of 20 000 can exceed
+  // the entire context window, causing every prompt to be classified as an
+  // overflow and triggering an infinite compaction loop.
+  const ctxBudget = params.contextTokenBudget;
+  if (typeof ctxBudget === "number" && Number.isFinite(ctxBudget) && ctxBudget > 0) {
+    const minPromptBudget = Math.min(
+      MIN_PROMPT_BUDGET_TOKENS,
+      Math.max(1, Math.floor(ctxBudget * MIN_PROMPT_BUDGET_RATIO)),
+    );
+    const maxReserve = Math.max(0, ctxBudget - minPromptBudget);
+    reserveTokensFloor = Math.min(reserveTokensFloor, maxReserve);
+  }
 
   const targetReserveTokens = Math.max(
     configuredReserveTokens ?? currentReserveTokens,