Skip to content

Commit cc8a8f1

Browse files
authored
fix(agents): keep state.messages intact across z.ai-style provider turns in embedded runs (#76056)
Merged via squash. Prepared head SHA: ef305bb Co-authored-by: openperf <80630709+openperf@users.noreply.github.com> Co-authored-by: openperf <80630709+openperf@users.noreply.github.com> Reviewed-by: @openperf
1 parent 3aaf30f commit cc8a8f1

7 files changed

Lines changed: 284 additions & 9 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai
2323
- CLI/update: treat inherited Gateway service markers as origin hints and only block package replacement when the managed Gateway is still live, so self-updates can stop the service and continue safely. (#75729) Thanks @hxy91819.
2424
- Agents/failover: exempt run-level timeouts that fire during tool execution from model fallback, timeout-triggered compaction, and generic timeout payload synthesis. Long `process(poll)`, browser, or `exec` tool calls that exceed `agents.defaults.timeoutSeconds` previously rotated auth profiles, switched to a fallback model, and surfaced a misleading "LLM request timed out" error even though the primary model had already responded. Mirrors the existing `timedOutDuringCompaction` precedent (#46889). Fixes #52147. (#75873) Thanks @simonusa.
2525
- Docker: copy Bun 1.3.13 from a digest-pinned image and keep CI on the same version. Fixes #74356. Thanks @fede-kamel and @sallyom.
26+
- Agents/compaction: keep prior context on consecutive turns against z.ai-style providers (z.ai direct, openrouter z-ai/*, in-house GLM gateways); Pi's internal auto-compaction was misfiring after successful turns and clearing state.messages before the next provider request. (#76056) Thanks @openperf.
2627

2728
## 2026.5.2
2829

src/agents/pi-embedded-runner/compact.hooks.harness.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,10 @@ export async function loadCompactHooksHarness(): Promise<{
301301
}));
302302

303303
vi.doMock("../pi-settings.js", () => ({
304+
applyPiAutoCompactionGuard: vi.fn(() => ({ supported: true, disabled: false })),
304305
applyPiCompactionSettingsFromConfig: vi.fn(),
305306
ensurePiCompactionReserveTokens: vi.fn(),
307+
isSilentOverflowProneModel: vi.fn(() => false),
306308
resolveCompactionReserveTokensFloor: vi.fn(() => 0),
307309
}));
308310

src/agents/pi-embedded-runner/compact.ts

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@ import {
7575
setCompactionSafeguardCancelReason,
7676
} from "../pi-hooks/compaction-safeguard-runtime.js";
7777
import { createPreparedEmbeddedPiSettingsManager } from "../pi-project-settings.js";
78-
import { applyPiCompactionSettingsFromConfig } from "../pi-settings.js";
78+
import {
79+
applyPiAutoCompactionGuard,
80+
applyPiCompactionSettingsFromConfig,
81+
isSilentOverflowProneModel,
82+
} from "../pi-settings.js";
7983
import { createOpenClawCodingTools } from "../pi-tools.js";
8084
import { wrapStreamFnTextTransforms } from "../plugin-text-transforms.js";
8185
import { registerProviderStreamForModel } from "../provider-stream.js";
@@ -960,12 +964,26 @@ async function compactEmbeddedPiSessionDirectOnce(
960964
});
961965
await resourceLoader.reload();
962966
// DefaultResourceLoader.reload() rehydrates settings from disk and can drop OpenClaw
963-
// compaction overrides applied in createPreparedEmbeddedPiSettingsManager.
967+
// compaction overrides applied in createPreparedEmbeddedPiSettingsManager — same
968+
// rehydration also restores Pi's auto-compaction (openclaw#75799), so re-apply
969+
// both guards. effectiveModel.baseUrl matches the surrounding scope so
970+
// auth-profile-injected baseUrls reach the endpoint-class detector.
964971
applyPiCompactionSettingsFromConfig({
965972
settingsManager,
966973
cfg: params.config,
967974
contextTokenBudget: ctxInfo.tokens,
968975
});
976+
// contextEngineInfo is intentionally omitted: this guard runs inside the
977+
// compaction LLM session, which is not the user-facing agent session and
978+
// has no associated context engine.
979+
applyPiAutoCompactionGuard({
980+
settingsManager,
981+
silentOverflowProneProvider: isSilentOverflowProneModel({
982+
provider,
983+
modelId,
984+
baseUrl: effectiveModel.baseUrl ?? undefined,
985+
}),
986+
});
969987

970988
const { customTools } = splitSdkTools({
971989
tools: effectiveTools,

src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ vi.mock("../../pi-settings.js", () => ({
321321
keepRecentTokens: 40_000,
322322
},
323323
}),
324+
isSilentOverflowProneModel: () => false,
324325
}));
325326

326327
vi.mock("../extensions.js", () => ({

src/agents/pi-embedded-runner/run/attempt.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ import { createPreparedEmbeddedPiSettingsManager } from "../../pi-project-settin
107107
import {
108108
applyPiAutoCompactionGuard,
109109
applyPiCompactionSettingsFromConfig,
110+
isSilentOverflowProneModel,
110111
} from "../../pi-settings.js";
111112
import {
112113
createClientToolNameConflictError,
@@ -1474,10 +1475,16 @@ export async function runEmbeddedAttempt(
14741475
cfg: params.config,
14751476
contextTokenBudget: params.contextTokenBudget,
14761477
});
1477-
applyPiAutoCompactionGuard({
1478+
const piAutoCompactionGuardArgs = {
14781479
settingsManager,
14791480
contextEngineInfo: activeContextEngine?.info,
1480-
});
1481+
silentOverflowProneProvider: isSilentOverflowProneModel({
1482+
provider: params.provider,
1483+
modelId: params.modelId,
1484+
baseUrl: params.model.baseUrl ?? undefined,
1485+
}),
1486+
};
1487+
applyPiAutoCompactionGuard(piAutoCompactionGuardArgs);
14811488

14821489
// Sets compaction/pruning runtime state and returns extension factories
14831490
// that must be passed to the resource loader for the safeguard to be active.
@@ -1496,12 +1503,15 @@ export async function runEmbeddedAttempt(
14961503
});
14971504
await resourceLoader.reload();
14981505
// DefaultResourceLoader.reload() rehydrates settings from disk and can drop OpenClaw
1499-
// compaction overrides applied in createPreparedEmbeddedPiSettingsManager.
1506+
// compaction overrides applied in createPreparedEmbeddedPiSettingsManager — same
1507+
// rehydration also restores Pi's auto-compaction (openclaw#75799), so re-apply
1508+
// both guards.
15001509
applyPiCompactionSettingsFromConfig({
15011510
settingsManager,
15021511
cfg: params.config,
15031512
contextTokenBudget: params.contextTokenBudget,
15041513
});
1514+
applyPiAutoCompactionGuard(piAutoCompactionGuardArgs);
15051515
prepStages.mark("session-resource-loader");
15061516

15071517
// Get hook runner early so it's available when creating tools

src/agents/pi-settings.test.ts

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import { describe, expect, it, vi } from "vitest";
22
import { MIN_PROMPT_BUDGET_RATIO, MIN_PROMPT_BUDGET_TOKENS } from "./pi-compaction-constants.js";
33
import {
4+
applyPiAutoCompactionGuard,
45
applyPiCompactionSettingsFromConfig,
56
DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR,
7+
isSilentOverflowProneModel,
68
resolveCompactionReserveTokensFloor,
79
} from "./pi-settings.js";
810

@@ -345,3 +347,179 @@ describe("resolveCompactionReserveTokensFloor", () => {
345347
).toBe(0);
346348
});
347349
});
350+
351+
describe("isSilentOverflowProneModel", () => {
352+
// Reporter's repro shape: openrouter routing to z-ai/glm. Both the bare
353+
// `z-ai/...` form and the `openrouter/z-ai/...` qualified form must hit.
354+
it("flags z-ai-prefixed model ids regardless of qualifier", () => {
355+
expect(isSilentOverflowProneModel({ provider: "openrouter", modelId: "z-ai/glm-5.1" })).toBe(
356+
true,
357+
);
358+
expect(
359+
isSilentOverflowProneModel({ provider: "openrouter", modelId: "openrouter/z-ai/glm-5" }),
360+
).toBe(true);
361+
});
362+
363+
it("flags a config-set z.ai provider regardless of model id", () => {
364+
expect(isSilentOverflowProneModel({ provider: "z.ai", modelId: "glm-5.1" })).toBe(true);
365+
expect(isSilentOverflowProneModel({ provider: "z-ai", modelId: "glm-5.1" })).toBe(true);
366+
});
367+
368+
it("flags a direct api.z.ai baseUrl via endpointClass", () => {
369+
expect(
370+
isSilentOverflowProneModel({
371+
provider: "openai",
372+
modelId: "glm-5.1",
373+
baseUrl: "https://api.z.ai/api/coding/paas/v4",
374+
}),
375+
).toBe(true);
376+
});
377+
378+
// openclaw#75799 reporter's setup: an OpenAI-compatible in-house gateway
379+
// exposing Zhipu's GLM family directly (model id `glm-5.1`, no `z-ai/`
380+
// qualifier, custom baseUrl that is not api.z.ai). Catch the bare GLM
381+
// family name so direct gateway deployments hit the guard regardless of
382+
// what `provider` field the user picked — gateways relabel the upstream
383+
// identity, so `provider` here can be anything from `openai` to a custom
384+
// string. False positives only disable Pi's secondary compaction path;
385+
// OpenClaw's preemptive compaction continues to handle real overflow.
386+
it("flags bare glm- model ids without a namespace prefix, regardless of provider", () => {
387+
expect(isSilentOverflowProneModel({ provider: "custom", modelId: "glm-5.1" })).toBe(true);
388+
expect(isSilentOverflowProneModel({ provider: "custom", modelId: "glm-4.7" })).toBe(true);
389+
expect(isSilentOverflowProneModel({ provider: "openai", modelId: "glm-5.1" })).toBe(true);
390+
expect(isSilentOverflowProneModel({ provider: "openrouter", modelId: "glm-5.1" })).toBe(true);
391+
});
392+
393+
// Detection is intentionally narrow to z.ai-style accounting. Namespaced GLM
394+
// ids that route through providers with their own overflow accounting must
395+
// NOT be flagged — those hosts may not exhibit the z.ai silent-overflow
396+
// shape, and disabling Pi auto-compaction for them would over-broaden the
397+
// kill surface beyond the reproducible repro.
398+
it("does not flag namespaced GLM ids routed through non-z.ai hosts", () => {
399+
expect(
400+
isSilentOverflowProneModel({ provider: "ollama", modelId: "ollama/glm-5.1:cloud" }),
401+
).toBe(false);
402+
expect(
403+
isSilentOverflowProneModel({ provider: "opencode-go", modelId: "opencode-go/glm-5.1" }),
404+
).toBe(false);
405+
});
406+
407+
// pi-ai's overflow.ts only documents z.ai as the silent-overflow style. We
408+
// intentionally do NOT extend the guard to anthropic/openai/google/openrouter-
409+
// anthropic routes — adding them without a reproducible repro would broaden
410+
// the kill surface and regress baseline behavior for those providers.
411+
it("does not flag anthropic, openai, google or other routes", () => {
412+
expect(
413+
isSilentOverflowProneModel({ provider: "anthropic", modelId: "claude-sonnet-4.6" }),
414+
).toBe(false);
415+
expect(isSilentOverflowProneModel({ provider: "openai", modelId: "gpt-5.5" })).toBe(false);
416+
expect(
417+
isSilentOverflowProneModel({
418+
provider: "openrouter",
419+
modelId: "anthropic/claude-sonnet-4.6",
420+
}),
421+
).toBe(false);
422+
expect(isSilentOverflowProneModel({ provider: "google", modelId: "gemini-2.5-pro" })).toBe(
423+
false,
424+
);
425+
});
426+
427+
it("treats missing fields as not silent-overflow-prone", () => {
428+
expect(isSilentOverflowProneModel({})).toBe(false);
429+
expect(
430+
isSilentOverflowProneModel({ provider: undefined, modelId: undefined, baseUrl: null }),
431+
).toBe(false);
432+
});
433+
});
434+
435+
describe("applyPiAutoCompactionGuard", () => {
436+
// Direct repro of openclaw#75799: pi-ai's silent-overflow detection misfires
437+
// on a successful turn against z.ai-style providers, triggering Pi's
438+
// _runAutoCompaction from inside Session.prompt() and reassigning
439+
// agent.state.messages between the runner's prompt.submitted trajectory
440+
// event and the provider request. Disabling Pi auto-compaction here keeps
441+
// state.messages intact; OpenClaw's preemptive compaction continues to
442+
// handle real overflow on its own path.
443+
it("disables Pi auto-compaction for silent-overflow-prone providers", () => {
444+
const setCompactionEnabled = vi.fn();
445+
const settingsManager = {
446+
getCompactionReserveTokens: () => 20_000,
447+
getCompactionKeepRecentTokens: () => 4_000,
448+
applyOverrides: () => {},
449+
setCompactionEnabled,
450+
};
451+
452+
const result = applyPiAutoCompactionGuard({
453+
settingsManager,
454+
silentOverflowProneProvider: true,
455+
});
456+
457+
expect(result).toEqual({ supported: true, disabled: true });
458+
expect(setCompactionEnabled).toHaveBeenCalledWith(false);
459+
});
460+
461+
it("disables Pi auto-compaction when a context engine plugin owns compaction", () => {
462+
const setCompactionEnabled = vi.fn();
463+
const settingsManager = {
464+
getCompactionReserveTokens: () => 20_000,
465+
getCompactionKeepRecentTokens: () => 4_000,
466+
applyOverrides: () => {},
467+
setCompactionEnabled,
468+
};
469+
470+
const result = applyPiAutoCompactionGuard({
471+
settingsManager,
472+
contextEngineInfo: {
473+
id: "third-party",
474+
name: "Third-party Context Engine",
475+
version: "0.1.0",
476+
ownsCompaction: true,
477+
},
478+
});
479+
480+
expect(result).toEqual({ supported: true, disabled: true });
481+
expect(setCompactionEnabled).toHaveBeenCalledWith(false);
482+
});
483+
484+
// Default-mode runs against ordinary providers must keep Pi's auto-compaction
485+
// enabled. Disabling it across the board would silently remove Pi's
486+
// overflow-recovery path inside Session.prompt() for users who are not
487+
// affected by z.ai's silent-overflow accounting.
488+
it("leaves Pi auto-compaction alone for non-z.ai providers without engine ownership", () => {
489+
const setCompactionEnabled = vi.fn();
490+
const settingsManager = {
491+
getCompactionReserveTokens: () => 20_000,
492+
getCompactionKeepRecentTokens: () => 4_000,
493+
applyOverrides: () => {},
494+
setCompactionEnabled,
495+
};
496+
497+
const result = applyPiAutoCompactionGuard({
498+
settingsManager,
499+
contextEngineInfo: {
500+
id: "legacy",
501+
name: "Legacy Context Engine",
502+
version: "1.0.0",
503+
},
504+
silentOverflowProneProvider: false,
505+
});
506+
507+
expect(result).toEqual({ supported: true, disabled: false });
508+
expect(setCompactionEnabled).not.toHaveBeenCalled();
509+
});
510+
511+
it("reports unsupported when the settings manager has no setCompactionEnabled hook", () => {
512+
const settingsManager = {
513+
getCompactionReserveTokens: () => 20_000,
514+
getCompactionKeepRecentTokens: () => 4_000,
515+
applyOverrides: () => {},
516+
};
517+
518+
const result = applyPiAutoCompactionGuard({
519+
settingsManager,
520+
silentOverflowProneProvider: true,
521+
});
522+
523+
expect(result).toEqual({ supported: false, disabled: false });
524+
});
525+
});

src/agents/pi-settings.ts

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import type { OpenClawConfig } from "../config/types.openclaw.js";
22
import type { ContextEngineInfo } from "../context-engine/types.js";
33
import { MIN_PROMPT_BUDGET_RATIO, MIN_PROMPT_BUDGET_TOKENS } from "./pi-compaction-constants.js";
4+
import { resolveProviderEndpoint } from "./provider-attribution.js";
5+
import { normalizeProviderId } from "./provider-id.js";
46

57
export const DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000;
68

@@ -122,18 +124,81 @@ export function applyPiCompactionSettingsFromConfig(params: {
122124
};
123125
}
124126

125-
/** Decide whether Pi's internal auto-compaction should be disabled for this run. */
126-
function shouldDisablePiAutoCompaction(params: { contextEngineInfo?: ContextEngineInfo }): boolean {
127-
return params.contextEngineInfo?.ownsCompaction === true;
127+
/**
128+
* Detect providers whose pi-ai `isContextOverflow` Case 2 (silent overflow)
129+
* fires on a successful turn and triggers Pi's `_runAutoCompaction` from
130+
* inside `Session.prompt()`, collapsing `agent.state.messages` before the
131+
* provider call (openclaw#75799).
132+
*
133+
* True on any of: `zai-native` endpoint class, normalized provider id `zai`,
134+
* a `z-ai/` / `openrouter/z-ai/` model-id namespace prefix, or a bare `glm-`
135+
* model id (no namespace prefix) — the latter covers in-house gateways that
136+
* expose Zhipu's GLM family directly without a `z-ai/` qualifier. Intentionally
137+
* narrow: namespaced GLM ids that route through other providers (e.g.
138+
* `ollama/glm-*`, `opencode-go/glm-*`) are NOT included because their hosts
139+
* have their own overflow accounting and may not exhibit the z.ai silent-
140+
* overflow shape. Other providers documented as silently truncating are not
141+
* added without a reproducible repro.
142+
*/
143+
export function isSilentOverflowProneModel(model: {
144+
provider?: string | null;
145+
modelId?: string | null;
146+
baseUrl?: string | null;
147+
}): boolean {
148+
const provider = normalizeProviderId(typeof model.provider === "string" ? model.provider : "");
149+
if (provider === "zai") {
150+
return true;
151+
}
152+
if (typeof model.baseUrl === "string" && model.baseUrl.length > 0) {
153+
if (resolveProviderEndpoint(model.baseUrl).endpointClass === "zai-native") {
154+
return true;
155+
}
156+
}
157+
if (typeof model.modelId === "string" && model.modelId.length > 0) {
158+
const normalized = model.modelId.toLowerCase();
159+
if (
160+
normalized.startsWith("z-ai/") ||
161+
normalized.startsWith("openrouter/z-ai/") ||
162+
normalized.startsWith("glm-")
163+
) {
164+
return true;
165+
}
166+
}
167+
return false;
128168
}
129169

130-
/** Disable Pi auto-compaction via settings when a context engine owns compaction. */
170+
/**
171+
* Disable Pi's `_checkCompaction → _runAutoCompaction` (which would otherwise
172+
* fire from inside `Session.prompt()` and reassign `agent.state.messages`
173+
* before the provider call) when OpenClaw or a plugin owns compaction:
174+
* `contextEngineInfo.ownsCompaction === true`, or the active model is
175+
* silent-overflow-prone (openclaw#75799). Default-mode runs against ordinary
176+
* providers keep Pi's auto-compaction as the existing baseline.
177+
*/
178+
function shouldDisablePiAutoCompaction(params: {
179+
contextEngineInfo?: ContextEngineInfo;
180+
silentOverflowProneProvider?: boolean;
181+
}): boolean {
182+
return (
183+
params.contextEngineInfo?.ownsCompaction === true || params.silentOverflowProneProvider === true
184+
);
185+
}
186+
187+
/**
188+
* Apply the auto-compaction guard. Callers that reload a `DefaultResourceLoader`
189+
* MUST call this AGAIN after each `reload()` — `settingsManager.reload()`
190+
* rehydrates `compaction.enabled` from disk and silently restores Pi's
191+
* default-on behavior, undoing the guard. Mirrors the existing
192+
* `applyPiCompactionSettingsFromConfig` re-call pattern at the same sites.
193+
*/
131194
export function applyPiAutoCompactionGuard(params: {
132195
settingsManager: PiSettingsManagerLike;
133196
contextEngineInfo?: ContextEngineInfo;
197+
silentOverflowProneProvider?: boolean;
134198
}): { supported: boolean; disabled: boolean } {
135199
const disable = shouldDisablePiAutoCompaction({
136200
contextEngineInfo: params.contextEngineInfo,
201+
silentOverflowProneProvider: params.silentOverflowProneProvider,
137202
});
138203
const hasMethod = typeof params.settingsManager.setCompactionEnabled === "function";
139204
if (!disable || !hasMethod) {

0 commit comments

Comments
 (0)