Skip to content

Commit 75e7fc9

Browse files
authored
fix: preserve runtime token budget in deferred context-engine maintenance (#66820)
* fix(context-engine): pass deferred maintenance token budget Thread tokenBudget through the after-turn runtime context so background context-engine maintenance reuses the real model context window instead of falling back to 128k. Also pass through a best-effort currentTokenCount from the latest call total and make the runtime context type explicit about both fields. Regeneration-Prompt: | OpenClaw already passed the real context token budget into direct context-engine calls like afterTurn and assemble, but deferred maintain() reused only the runtimeContext object and that object did not carry tokenBudget. Lossless Claw therefore fell back to 128k during background maintenance, which made budget-trigger fire much more aggressively than the live model context warranted. Thread the real contextTokenBudget into buildAfterTurnRuntimeContext so deferred maintenance receives the same budget, and pass a straightforward best-effort currentTokenCount from the latest call total while the relevant data is already in scope. Keep the change additive, update the runtime-context type, and cover the background maintenance/runtime-context behavior with focused tests. * fix(context-engine): use prompt usage for deferred maintenance
1 parent 58d0c17 commit 75e7fc9

7 files changed

Lines changed: 90 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Docs: https://docs.openclaw.ai
3232
- Agents/fallback: preserve the original prompt body on model fallback retries with session history so the retrying model keeps the active task instead of only seeing a generic continue message. (#66029) Thanks @WuKongAI-CMU.
3333
- Reply/secrets: resolve active reply channel/account SecretRefs before reply-run message-action discovery so channel token SecretRefs (for example Discord) do not degrade into discovery-time unresolved-secret failures. (#66796) Thanks @joshavant.
3434
- Agents/Anthropic: ignore non-positive Anthropic Messages token overrides and fail locally when no positive token budget remains, so invalid `max_tokens` values no longer reach the provider API. (#66664) thanks @jalehman
35+
- Agents/context engines: preserve prompt-only token counts, not full request totals, when deferred maintenance reuses after-turn runtime context so background compaction bookkeeping matches the active prompt window. (#66820) thanks @jalehman.
3536

3637
## 2026.4.14
3738

src/agents/pi-embedded-runner/context-engine-maintenance.test.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,11 @@ describe("runContextEngineMaintenance", () => {
424424
sessionKey,
425425
sessionFile: "/tmp/session.jsonl",
426426
reason: "turn",
427-
runtimeContext: { workspaceDir: "/tmp/workspace" },
427+
runtimeContext: {
428+
workspaceDir: "/tmp/workspace",
429+
tokenBudget: 2048,
430+
currentTokenCount: 1536,
431+
},
428432
});
429433

430434
expect(result).toBeUndefined();
@@ -453,6 +457,8 @@ describe("runContextEngineMaintenance", () => {
453457
runtimeContext: expect.objectContaining({
454458
workspaceDir: "/tmp/workspace",
455459
allowDeferredCompactionExecution: true,
460+
tokenBudget: 2048,
461+
currentTokenCount: 1536,
456462
}),
457463
});
458464

src/agents/pi-embedded-runner/run/attempt.prompt-helpers.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ export function buildAfterTurnRuntimeContext(params: {
226226
>;
227227
workspaceDir: string;
228228
agentDir: string;
229+
tokenBudget?: number;
230+
currentTokenCount?: number;
229231
promptCache?: ContextEnginePromptCacheInfo;
230232
}): ContextEngineRuntimeContext {
231233
return {
@@ -252,6 +254,16 @@ export function buildAfterTurnRuntimeContext(params: {
252254
extraSystemPrompt: params.attempt.extraSystemPrompt,
253255
ownerNumbers: params.attempt.ownerNumbers,
254256
}),
257+
...(typeof params.tokenBudget === "number" &&
258+
Number.isFinite(params.tokenBudget) &&
259+
params.tokenBudget > 0
260+
? { tokenBudget: Math.floor(params.tokenBudget) }
261+
: {}),
262+
...(typeof params.currentTokenCount === "number" &&
263+
Number.isFinite(params.currentTokenCount) &&
264+
params.currentTokenCount > 0
265+
? { currentTokenCount: Math.floor(params.currentTokenCount) }
266+
: {}),
255267
...(params.promptCache ? { promptCache: params.promptCache } : {}),
256268
};
257269
}

src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import {
1414
runAttemptContextEngineBootstrap,
1515
} from "./attempt.context-engine-helpers.js";
1616
import {
17+
cleanupTempPaths,
18+
createContextEngineAttemptRunner,
1719
createContextEngineBootstrapAndAssemble,
1820
expectCalledWithSessionKey,
1921
getHoisted,
@@ -109,13 +111,15 @@ async function finalizeTurn(
109111

110112
describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
111113
const sessionKey = "agent:main:discord:channel:test-ctx-engine";
114+
const tempPaths: string[] = [];
112115
beforeEach(() => {
113116
resetEmbeddedAttemptHarness();
114117
clearMemoryPluginState();
115118
hoisted.runContextEngineMaintenanceMock.mockReset().mockResolvedValue(undefined);
116119
});
117120

118121
afterEach(async () => {
122+
await cleanupTempPaths(tempPaths);
119123
clearMemoryPluginState();
120124
vi.restoreAllMocks();
121125
});
@@ -395,6 +399,59 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
395399
);
396400
});
397401

402+
it("derives deferred maintenance currentTokenCount from prompt-only usage", async () => {
403+
const afterTurn = vi.fn(
404+
async (_params: {
405+
runtimeContext?: {
406+
currentTokenCount?: number;
407+
promptCache?: { lastCallUsage?: { total?: number } };
408+
};
409+
}) => {},
410+
);
411+
412+
await createContextEngineAttemptRunner({
413+
sessionKey,
414+
tempPaths,
415+
contextEngine: {
416+
assemble: async ({ messages }) => ({
417+
messages,
418+
estimatedTokens: 1,
419+
}),
420+
afterTurn,
421+
},
422+
sessionPrompt: async (session) => {
423+
session.messages = [
424+
...session.messages,
425+
{
426+
role: "assistant",
427+
content: "done",
428+
timestamp: 2,
429+
usage: {
430+
input: 10,
431+
output: 5,
432+
cacheRead: 40,
433+
cacheWrite: 2,
434+
total: 57,
435+
},
436+
} as unknown as AgentMessage,
437+
];
438+
},
439+
});
440+
441+
expect(afterTurn).toHaveBeenCalledWith(
442+
expect.objectContaining({
443+
runtimeContext: expect.objectContaining({
444+
currentTokenCount: 52,
445+
promptCache: expect.objectContaining({
446+
lastCallUsage: expect.objectContaining({
447+
total: 57,
448+
}),
449+
}),
450+
}),
451+
}),
452+
);
453+
});
454+
398455
it("skips maintenance when ingestBatch fails", async () => {
399456
const { bootstrap, assemble } = createContextEngineBootstrapAndAssemble();
400457
const ingestBatch = vi.fn(async () => {

src/agents/pi-embedded-runner/run/attempt.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2836,6 +2836,8 @@ describe("buildAfterTurnRuntimeContext", () => {
28362836
},
28372837
workspaceDir: "/tmp/workspace",
28382838
agentDir: "/tmp/agent",
2839+
tokenBudget: 1050000,
2840+
currentTokenCount: 232393,
28392841
});
28402842

28412843
expect(legacy).toMatchObject({
@@ -2844,6 +2846,8 @@ describe("buildAfterTurnRuntimeContext", () => {
28442846
model: "gpt-5.4",
28452847
workspaceDir: "/tmp/workspace",
28462848
agentDir: "/tmp/agent",
2849+
tokenBudget: 1050000,
2850+
currentTokenCount: 232393,
28472851
});
28482852
});
28492853

src/agents/pi-embedded-runner/run/attempt.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ import {
116116
resolveTranscriptPolicy,
117117
shouldAllowProviderOwnedThinkingReplay,
118118
} from "../../transcript-policy.js";
119-
import { normalizeUsage, type NormalizedUsage } from "../../usage.js";
119+
import { derivePromptTokens, normalizeUsage, type NormalizedUsage } from "../../usage.js";
120120
import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
121121
import { isRunnerAbortError } from "../abort.js";
122122
import { isCacheTtlEligibleProvider, readLastCacheTtlTimestamp } from "../cache-ttl.js";
@@ -887,6 +887,7 @@ export async function runEmbeddedAttempt(
887887
attempt: params,
888888
workspaceDir: effectiveWorkspace,
889889
agentDir,
890+
tokenBudget: params.contextTokenBudget,
890891
}),
891892
runMaintenance: async (contextParams) =>
892893
await runContextEngineMaintenance({
@@ -2201,10 +2202,13 @@ export async function runEmbeddedAttempt(
22012202

22022203
// Let the active context engine run its post-turn lifecycle.
22032204
if (params.contextEngine) {
2205+
const runtimeCurrentTokenCount = derivePromptTokens(lastCallUsage);
22042206
const afterTurnRuntimeContext = buildAfterTurnRuntimeContext({
22052207
attempt: params,
22062208
workspaceDir: effectiveWorkspace,
22072209
agentDir,
2210+
tokenBudget: params.contextTokenBudget,
2211+
currentTokenCount: runtimeCurrentTokenCount,
22082212
promptCache,
22092213
});
22102214
await finalizeAttemptContextEngineTurn({

src/context-engine/types.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ export type ContextEngineRuntimeContext = Record<string, unknown> & {
140140
* consuming deferred compaction debt.
141141
*/
142142
allowDeferredCompactionExecution?: boolean;
143+
/** Runtime-resolved context window budget for the active model call. */
144+
tokenBudget?: number;
145+
/** Best-effort current prompt/context token estimate for this turn. */
146+
currentTokenCount?: number;
143147
/** Optional prompt-cache telemetry for cache-aware engines. */
144148
promptCache?: ContextEnginePromptCacheInfo;
145149
/**

0 commit comments

Comments
 (0)