Skip to content

Commit 0701ef6

Browse files
fix(agents): derive overflow budgets from provider errors
1 parent cd1cae5 commit 0701ef6

5 files changed

Lines changed: 88 additions & 4 deletions

File tree

docs/reference/session-management-compaction.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,10 @@ In the embedded Pi agent, auto-compaction triggers in two cases:
274274
number of tokens`, `input token count exceeds the maximum number of input
275275
tokens`, `input is too long for the model`, `ollama error: context length
276276
exceeded`, and similar provider-shaped variants) → compact → retry.
277+
When the provider reports the attempted token count, OpenClaw forwards that
278+
observed count into overflow recovery compaction. If the provider confirms
279+
overflow but does not expose a parseable count, OpenClaw passes a minimally
280+
over-budget synthetic count to compaction engines and diagnostics.
277281
If overflow recovery still fails, OpenClaw surfaces explicit guidance to the
278282
user and preserves the current session mapping instead of silently rotating
279283
the session key to a fresh session id. The next step is operator-controlled:

src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,10 +575,30 @@ describe("extractObservedOverflowTokenCount", () => {
575575
"This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens.",
576576
),
577577
).toBe(145000);
578+
expect(
579+
extractObservedOverflowTokenCount(
580+
"400 The prompt is too long: 203557, model maximum context length: 196607",
581+
),
582+
).toBe(203557);
583+
expect(
584+
extractObservedOverflowTokenCount(
585+
"Invalid request: Your request exceeded model token limit: 262144 (requested: 291351)",
586+
),
587+
).toBe(291351);
588+
expect(
589+
extractObservedOverflowTokenCount(
590+
"input length and max_tokens exceed context limit (i.e 156321 + 48384 > 200000)",
591+
),
592+
).toBe(204705);
578593
});
579594

580595
it("returns undefined when overflow counts are not present", () => {
581596
expect(extractObservedOverflowTokenCount("Prompt too large for this model")).toBeUndefined();
597+
expect(
598+
extractObservedOverflowTokenCount(
599+
"The prompt is too long: 203557 characters, model maximum context length: 196607",
600+
),
601+
).toBeUndefined();
582602
expect(extractObservedOverflowTokenCount("rate limit exceeded")).toBeUndefined();
583603
});
584604
});

src/agents/pi-embedded-helpers/errors.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,15 +208,35 @@ export function isCompactionFailureError(errorMessage?: string): boolean {
208208

209209
const OBSERVED_OVERFLOW_TOKEN_PATTERNS = [
210210
/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i,
211+
/prompt is too long:\s*([\d,]+)\s*,\s*model maximum context length\s*:\s*[\d,]+/i,
211212
/requested\s+([\d,]+)\s+tokens/i,
213+
/token limit\s*:\s*[\d,]+\s*\(requested\s*:\s*([\d,]+)\)/i,
212214
/resulted in\s+([\d,]+)\s+tokens/i,
213215
];
214216

217+
const OBSERVED_OVERFLOW_TOKEN_SUM_PATTERNS = [
218+
/input length(?:\s+and\s+max_tokens)?\s+exceed\s+context(?:\s+limit|\s+window)?\s*\(i\.e\s*([\d,]+)\s*\+\s*([\d,]+)\s*>\s*[\d,]+\)/i,
219+
];
220+
215221
export function extractObservedOverflowTokenCount(errorMessage?: string): number | undefined {
216222
if (!errorMessage) {
217223
return undefined;
218224
}
219225

226+
for (const pattern of OBSERVED_OVERFLOW_TOKEN_SUM_PATTERNS) {
227+
const match = errorMessage.match(pattern);
228+
const rawLeft = match?.[1]?.replaceAll(",", "");
229+
const rawRight = match?.[2]?.replaceAll(",", "");
230+
if (!rawLeft || !rawRight) {
231+
continue;
232+
}
233+
const left = Number(rawLeft);
234+
const right = Number(rawRight);
235+
if (Number.isFinite(left) && left > 0 && Number.isFinite(right) && right >= 0) {
236+
return Math.floor(left + right);
237+
}
238+
}
239+
220240
for (const pattern of OBSERVED_OVERFLOW_TOKEN_PATTERNS) {
221241
const match = errorMessage.match(pattern);
222242
const rawCount = match?.[1]?.replaceAll(",", "");

src/agents/pi-embedded-runner/run.overflow-compaction.test.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import {
2020
mockedEvaluateContextWindowGuard,
2121
mockedEnsureAuthProfileStore,
2222
mockedEnsureAuthProfileStoreWithoutExternalProfiles,
23+
mockedExtractObservedOverflowTokenCount,
2324
mockedGlobalHookRunner,
2425
mockedGetApiKeyForModel,
2526
mockedMarkAuthProfileSuccess,
@@ -1511,6 +1512,37 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
15111512
expect(result.meta.error).toBeUndefined();
15121513
});
15131514

1515+
it("passes minimally over-budget count when overflow text is confirmed but unparseable", async () => {
1516+
mockedExtractObservedOverflowTokenCount.mockReturnValueOnce(undefined);
1517+
mockedRunEmbeddedAttempt
1518+
.mockResolvedValueOnce(
1519+
makeAttemptResult({
1520+
lastAssistant: {
1521+
role: "assistant",
1522+
content: [],
1523+
stopReason: "error",
1524+
errorMessage: "Context window exceeded for this request.",
1525+
usage: { totalTokens: 0 },
1526+
} as never,
1527+
}),
1528+
)
1529+
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
1530+
mockedCompactDirect.mockResolvedValueOnce(
1531+
makeCompactionSuccess({
1532+
summary: "Compacted session",
1533+
firstKeptEntryId: "entry-9",
1534+
tokensBefore: 200001,
1535+
}),
1536+
);
1537+
1538+
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
1539+
1540+
expectMockCallFields(mockedCompactDirect, {
1541+
currentTokenCount: 200001,
1542+
});
1543+
expect(result.meta.error).toBeUndefined();
1544+
});
1545+
15141546
it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
15151547
const overflowError = queueOverflowAttemptWithOversizedToolOutput(
15161548
mockedRunEmbeddedAttempt,

src/agents/pi-embedded-runner/run.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1857,12 +1857,20 @@ export async function runEmbeddedPiAgent(
18571857
const errorText = contextOverflowError.text;
18581858
const msgCount = attempt.messagesSnapshot?.length ?? 0;
18591859
const observedOverflowTokens = extractObservedOverflowTokenCount(errorText);
1860+
const overflowTokenCountForCompaction =
1861+
observedOverflowTokens ??
1862+
(ctxInfo.tokens > 0
1863+
? // Confirmed overflow with an unparseable provider message still carries a
1864+
// minimally over-budget count for compaction engines and diagnostics.
1865+
ctxInfo.tokens + 1
1866+
: undefined);
18601867
log.warn(
18611868
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
18621869
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
18631870
`messages=${msgCount} sessionFile=${activeSessionFile} ` +
18641871
`diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
18651872
`observedTokens=${observedOverflowTokens ?? "unknown"} ` +
1873+
`compactionTokens=${overflowTokenCountForCompaction ?? "unknown"} ` +
18661874
`error=${errorText.slice(0, 200)}`,
18671875
);
18681876
const isCompactionFailure = isCompactionFailureError(errorText);
@@ -1946,8 +1954,8 @@ export async function runEmbeddedPiAgent(
19461954
...(attempt.promptCache ? { promptCache: attempt.promptCache } : {}),
19471955
runId: params.runId,
19481956
trigger: "overflow",
1949-
...(observedOverflowTokens !== undefined
1950-
? { currentTokenCount: observedOverflowTokens }
1957+
...(overflowTokenCountForCompaction !== undefined
1958+
? { currentTokenCount: overflowTokenCountForCompaction }
19511959
: {}),
19521960
diagId: overflowDiagId,
19531961
attempt: overflowCompactionAttempts,
@@ -1965,8 +1973,8 @@ export async function runEmbeddedPiAgent(
19651973
sessionKey: params.sessionKey,
19661974
sessionFile: activeSessionFile,
19671975
tokenBudget: ctxInfo.tokens,
1968-
...(observedOverflowTokens !== undefined
1969-
? { currentTokenCount: observedOverflowTokens }
1976+
...(overflowTokenCountForCompaction !== undefined
1977+
? { currentTokenCount: overflowTokenCountForCompaction }
19701978
: {}),
19711979
force: true,
19721980
compactionTarget: "budget",

0 commit comments

Comments
 (0)