Skip to content

Commit a327b67

Browse files
authored
fix: stabilize context engine prompt cache touches (#67767)
* fix: stabilize context engine prompt cache touches * fix(changelog): document context-engine prompt cache touch stabilization
1 parent ac717a9 commit a327b67

6 files changed

Lines changed: 273 additions & 39 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
5151
- BlueBubbles/inbound: restore inbound image attachment downloads on Node 22+ by stripping incompatible bundled-undici dispatchers from the non-SSRF fetch path, accept `updated-message` webhooks carrying attachments, use event-type-aware dedup keys so attachment follow-ups are not rejected as duplicates, and retry attachment fetch from the BB API when the initial webhook arrives with an empty array. (#64105, #61861, #65430, #67510) Thanks @omarshahine.
5252
- Agents/skills: sort prompt-facing `available_skills` entries by skill name after merging sources so `skills.load.extraDirs` order no longer changes prompt-cache prefixes. (#64198) Thanks @Bartok9.
5353
- Agents/OpenAI Responses: add `models.providers.*.models.*.compat.supportsPromptCacheKey` so OpenAI-compatible proxies that forward `prompt_cache_key` can keep prompt caching enabled while incompatible endpoints can still force stripping. (#67427) Thanks @damselem.
54+
- Agents/context engines: keep loop-hook and final `afterTurn` prompt-cache touch metadata aligned with the current assistant turn so cache-aware context engines retain accurate cache TTL state during tool loops. (#67767) thanks @jalehman.
5455
- Memory/dreaming: strip AI-facing inbound metadata envelopes from session-corpus user turns before normalization so REM topic extraction sees the user's actual message text, including array-shaped split envelopes. (#66548) Thanks @zqchris.
5556

5657
## 2026.4.15-beta.1

src/agents/pi-embedded-runner/run/attempt.context-engine-helpers.ts

Lines changed: 89 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
22
import type { AssistantMessage } from "@mariozechner/pi-ai";
33
import type { MemoryCitationsMode } from "../../../config/types.memory.js";
44
import type { ContextEngine, ContextEngineRuntimeContext } from "../../../context-engine/types.js";
5-
import type { NormalizedUsage } from "../../usage.js";
5+
import { normalizeUsage, type NormalizedUsage } from "../../usage.js";
66
import type { PromptCacheChange } from "../prompt-cache-observability.js";
77
import type { EmbeddedRunAttemptResult } from "./types.js";
88

@@ -103,6 +103,61 @@ export function findCurrentAttemptAssistantMessage(params: {
103103
.find((message): message is AssistantMessage => message.role === "assistant");
104104
}
105105

106+
function parsePromptCacheTouchTimestamp(value: unknown): number | null {
107+
if (typeof value === "number" && Number.isFinite(value)) {
108+
return value;
109+
}
110+
if (typeof value === "string") {
111+
const parsed = Date.parse(value);
112+
if (Number.isFinite(parsed)) {
113+
return parsed;
114+
}
115+
}
116+
return null;
117+
}
118+
119+
/** Resolve the effective prompt-cache touch timestamp for the current assistant turn. */
120+
export function resolvePromptCacheTouchTimestamp(params: {
121+
lastCallUsage?: NormalizedUsage;
122+
assistantTimestamp?: unknown;
123+
fallbackLastCacheTouchAt?: number | null;
124+
}): number | null {
125+
const hasCacheUsage =
126+
typeof params.lastCallUsage?.cacheRead === "number" ||
127+
typeof params.lastCallUsage?.cacheWrite === "number";
128+
if (!hasCacheUsage) {
129+
return params.fallbackLastCacheTouchAt ?? null;
130+
}
131+
return (
132+
parsePromptCacheTouchTimestamp(params.assistantTimestamp) ??
133+
params.fallbackLastCacheTouchAt ??
134+
null
135+
);
136+
}
137+
138+
export function buildLoopPromptCacheInfo(params: {
139+
messagesSnapshot: AgentMessage[];
140+
prePromptMessageCount: number;
141+
retention?: "none" | "short" | "long";
142+
fallbackLastCacheTouchAt?: number | null;
143+
}): EmbeddedRunAttemptResult["promptCache"] {
144+
const currentAttemptAssistant = findCurrentAttemptAssistantMessage({
145+
messagesSnapshot: params.messagesSnapshot,
146+
prePromptMessageCount: params.prePromptMessageCount,
147+
});
148+
const lastCallUsage = normalizeUsage(currentAttemptAssistant?.usage);
149+
150+
return buildContextEnginePromptCacheInfo({
151+
retention: params.retention,
152+
lastCallUsage,
153+
lastCacheTouchAt: resolvePromptCacheTouchTimestamp({
154+
lastCallUsage,
155+
assistantTimestamp: currentAttemptAssistant?.timestamp,
156+
fallbackLastCacheTouchAt: params.fallbackLastCacheTouchAt,
157+
}),
158+
});
159+
}
160+
106161
export async function runAttemptContextEngineBootstrap(params: {
107162
hadSessionFile: boolean;
108163
contextEngine?: AttemptContextEngine;
@@ -207,51 +262,50 @@ export async function finalizeAttemptContextEngineTurn(params: {
207262
let postTurnFinalizationSucceeded = true;
208263

209264
if (typeof params.contextEngine.afterTurn === "function") {
210-
try {
211-
await params.contextEngine.afterTurn({
212-
sessionId: params.sessionIdUsed,
213-
sessionKey: params.sessionKey,
214-
sessionFile: params.sessionFile,
215-
messages: params.messagesSnapshot,
216-
prePromptMessageCount: params.prePromptMessageCount,
217-
tokenBudget: params.tokenBudget,
218-
runtimeContext: params.runtimeContext,
219-
});
220-
} catch (afterTurnErr) {
221-
postTurnFinalizationSucceeded = false;
222-
params.warn(`context engine afterTurn failed: ${String(afterTurnErr)}`);
223-
}
224-
} else {
225-
const newMessages = params.messagesSnapshot.slice(params.prePromptMessageCount);
226-
if (newMessages.length > 0) {
227-
if (typeof params.contextEngine.ingestBatch === "function") {
265+
try {
266+
await params.contextEngine.afterTurn({
267+
sessionId: params.sessionIdUsed,
268+
sessionKey: params.sessionKey,
269+
sessionFile: params.sessionFile,
270+
messages: params.messagesSnapshot,
271+
prePromptMessageCount: params.prePromptMessageCount,
272+
tokenBudget: params.tokenBudget,
273+
runtimeContext: params.runtimeContext,
274+
});
275+
} catch (afterTurnErr) {
276+
postTurnFinalizationSucceeded = false;
277+
params.warn(`context engine afterTurn failed: ${String(afterTurnErr)}`);
278+
}
279+
} else {
280+
const newMessages = params.messagesSnapshot.slice(params.prePromptMessageCount);
281+
if (newMessages.length > 0) {
282+
if (typeof params.contextEngine.ingestBatch === "function") {
283+
try {
284+
await params.contextEngine.ingestBatch({
285+
sessionId: params.sessionIdUsed,
286+
sessionKey: params.sessionKey,
287+
messages: newMessages,
288+
});
289+
} catch (ingestErr) {
290+
postTurnFinalizationSucceeded = false;
291+
params.warn(`context engine ingest failed: ${String(ingestErr)}`);
292+
}
293+
} else {
294+
for (const msg of newMessages) {
228295
try {
229-
await params.contextEngine.ingestBatch({
296+
await params.contextEngine.ingest?.({
230297
sessionId: params.sessionIdUsed,
231298
sessionKey: params.sessionKey,
232-
messages: newMessages,
299+
message: msg,
233300
});
234301
} catch (ingestErr) {
235302
postTurnFinalizationSucceeded = false;
236303
params.warn(`context engine ingest failed: ${String(ingestErr)}`);
237304
}
238-
} else {
239-
for (const msg of newMessages) {
240-
try {
241-
await params.contextEngine.ingest?.({
242-
sessionId: params.sessionIdUsed,
243-
sessionKey: params.sessionKey,
244-
message: msg,
245-
});
246-
} catch (ingestErr) {
247-
postTurnFinalizationSucceeded = false;
248-
params.warn(`context engine ingest failed: ${String(ingestErr)}`);
249-
}
250-
}
251305
}
252306
}
253307
}
254-
308+
}
255309

256310
if (
257311
!params.promptError &&

src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ import {
77
} from "../../../plugins/memory-state.js";
88
import {
99
type AttemptContextEngine,
10+
buildLoopPromptCacheInfo,
1011
assembleAttemptContextEngine,
1112
buildContextEnginePromptCacheInfo,
1213
findCurrentAttemptAssistantMessage,
1314
finalizeAttemptContextEngineTurn,
15+
resolvePromptCacheTouchTimestamp,
1416
runAttemptContextEngineBootstrap,
1517
} from "./attempt.context-engine-helpers.js";
1618
import {
@@ -367,6 +369,88 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
367369
expect(promptCache).toEqual({ retention: "short" });
368370
});
369371

372+
it("derives live loop prompt-cache info from the current attempt assistant", () => {
373+
const toolUseAssistant = {
374+
role: "assistant",
375+
content: "tool use",
376+
timestamp: "2026-04-16T16:49:59.536Z",
377+
usage: {
378+
input: 1,
379+
output: 2,
380+
cacheRead: 39036,
381+
cacheWrite: 59934,
382+
total: 98973,
383+
},
384+
} as unknown as AgentMessage;
385+
386+
expect(
387+
buildLoopPromptCacheInfo({
388+
messagesSnapshot: [seedMessage, toolUseAssistant],
389+
prePromptMessageCount: 1,
390+
retention: "short",
391+
fallbackLastCacheTouchAt: 123,
392+
}),
393+
).toEqual(
394+
expect.objectContaining({
395+
retention: "short",
396+
lastCallUsage: expect.objectContaining({
397+
cacheRead: 39036,
398+
cacheWrite: 59934,
399+
total: 98973,
400+
}),
401+
lastCacheTouchAt: Date.parse("2026-04-16T16:49:59.536Z"),
402+
}),
403+
);
404+
});
405+
406+
it("falls back to the persisted cache touch when loop usage has no cache metrics", () => {
407+
const toolUseAssistant = {
408+
role: "assistant",
409+
content: "tool use",
410+
timestamp: "2026-04-16T16:49:59.536Z",
411+
usage: {
412+
input: 1,
413+
output: 2,
414+
total: 3,
415+
},
416+
} as unknown as AgentMessage;
417+
418+
expect(
419+
buildLoopPromptCacheInfo({
420+
messagesSnapshot: [seedMessage, toolUseAssistant],
421+
prePromptMessageCount: 1,
422+
retention: "short",
423+
fallbackLastCacheTouchAt: 123,
424+
}),
425+
).toEqual(
426+
expect.objectContaining({
427+
retention: "short",
428+
lastCallUsage: expect.objectContaining({
429+
total: 3,
430+
}),
431+
lastCacheTouchAt: 123,
432+
}),
433+
);
434+
});
435+
436+
it("derives a live cache touch timestamp for final afterTurn usage snapshots", () => {
437+
const lastCallUsage = {
438+
input: 1,
439+
output: 2,
440+
cacheRead: 39036,
441+
cacheWrite: 0,
442+
total: 39039,
443+
};
444+
445+
expect(
446+
resolvePromptCacheTouchTimestamp({
447+
lastCallUsage,
448+
assistantTimestamp: "2026-04-16T17:04:46.974Z",
449+
fallbackLastCacheTouchAt: 123,
450+
}),
451+
).toBe(Date.parse("2026-04-16T17:04:46.974Z"));
452+
});
453+
370454
it("threads prompt-cache break observations into afterTurn", async () => {
371455
const afterTurn = vi.fn(async (_params: AfterTurnPromptCacheCall) => {});
372456

src/agents/pi-embedded-runner/run/attempt.ts

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,11 @@ import { mapThinkingLevel } from "../utils.js";
182182
import { flushPendingToolResultsAfterIdle } from "../wait-for-idle-before-flush.js";
183183
import {
184184
assembleAttemptContextEngine,
185+
buildLoopPromptCacheInfo,
185186
buildContextEnginePromptCacheInfo,
186187
findCurrentAttemptAssistantMessage,
187188
finalizeAttemptContextEngineTurn,
189+
resolvePromptCacheTouchTimestamp,
188190
resolveAttemptBootstrapContext,
189191
runAttemptContextEngineBootstrap,
190192
} from "./attempt.context-engine-helpers.js";
@@ -1071,6 +1073,24 @@ export async function runEmbeddedAttempt(
10711073
tokenBudget: params.contextTokenBudget,
10721074
modelId: params.modelId,
10731075
getPrePromptMessageCount: () => prePromptMessageCount,
1076+
getRuntimeContext: ({ messages, prePromptMessageCount: loopPrePromptMessageCount }) =>
1077+
buildAfterTurnRuntimeContext({
1078+
attempt: params,
1079+
workspaceDir: effectiveWorkspace,
1080+
agentDir,
1081+
tokenBudget: params.contextTokenBudget,
1082+
promptCache:
1083+
promptCache ??
1084+
buildLoopPromptCacheInfo({
1085+
messagesSnapshot: messages,
1086+
prePromptMessageCount: loopPrePromptMessageCount,
1087+
retention: effectivePromptCacheRetention,
1088+
fallbackLastCacheTouchAt: readLastCacheTtlTimestamp(sessionManager, {
1089+
provider: params.provider,
1090+
modelId: params.modelId,
1091+
}),
1092+
}),
1093+
}),
10741094
});
10751095
}
10761096
const cacheTrace = createCacheTrace({
@@ -2235,13 +2255,18 @@ export async function runEmbeddedAttempt(
22352255
changes: cacheBreak?.changes ?? promptCacheChangesForTurn,
22362256
}
22372257
: undefined;
2258+
const fallbackLastCacheTouchAt = readLastCacheTtlTimestamp(sessionManager, {
2259+
provider: params.provider,
2260+
modelId: params.modelId,
2261+
});
22382262
promptCache = buildContextEnginePromptCacheInfo({
22392263
retention: effectivePromptCacheRetention,
22402264
lastCallUsage,
22412265
observation: promptCacheObservation,
2242-
lastCacheTouchAt: readLastCacheTtlTimestamp(sessionManager, {
2243-
provider: params.provider,
2244-
modelId: params.modelId,
2266+
lastCacheTouchAt: resolvePromptCacheTouchTimestamp({
2267+
lastCallUsage,
2268+
assistantTimestamp: currentAttemptAssistant?.timestamp,
2269+
fallbackLastCacheTouchAt,
22452270
}),
22462271
});
22472272

0 commit comments

Comments
 (0)