Skip to content

Commit 0071f6c

Browse files
BetalphaBetalpha
authored andcommitted
fix(active-memory): bound recall latency and jitter qmd startup
1 parent 230f712 commit 0071f6c

4 files changed

Lines changed: 58 additions & 29 deletions

File tree

docs/concepts/active-memory.md

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ when available:
3737
modelFallback: "google/gemini-3-flash",
3838
queryMode: "recent",
3939
promptStyle: "balanced",
40-
timeoutMs: 15000,
40+
timeoutMs: 3000,
4141
maxSummaryChars: 220,
4242
persistTranscripts: false,
4343
logging: true,
@@ -382,7 +382,7 @@ timeout budgets should grow with context size (`message` < `recent` < `full`).
382382
- you want a better balance of speed and conversational grounding
383383
- follow-up questions often depend on the last few turns
384384

385-
Start around `15000` ms for `config.timeoutMs`.
385+
Start around `3000` to `5000` ms for `config.timeoutMs`.
386386

387387
</Tab>
388388

@@ -402,7 +402,7 @@ timeout budgets should grow with context size (`message` < `recent` < `full`).
402402
- the strongest recall quality matters more than latency
403403
- the conversation contains important setup far back in the thread
404404

405-
Start around `15000` ms or higher depending on thread size.
405+
Start around `5000` ms or higher depending on thread size, but avoid making Active Memory part of the critical path for normal replies.
406406

407407
</Tab>
408408
</Tabs>
@@ -558,24 +558,24 @@ plugins.entries.active-memory
558558

559559
The most important fields are:
560560

561-
| Key | Type | Meaning |
562-
| --------------------------- | ---------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ |
563-
| `enabled` | `boolean` | Enables the plugin itself |
564-
| `config.agents` | `string[]` | Agent ids that may use active memory |
565-
| `config.model` | `string` | Optional blocking memory sub-agent model ref; when unset, active memory uses the current session model |
566-
| `config.allowedChatTypes` | `("direct" \| "group" \| "channel")[]` | Session types that may run Active Memory; defaults to direct-message style sessions |
567-
| `config.allowedChatIds` | `string[]` | Optional per-conversation allowlist applied after `allowedChatTypes`; non-empty lists fail closed |
568-
| `config.deniedChatIds` | `string[]` | Optional per-conversation denylist that overrides allowed session types and allowed ids |
569-
| `config.queryMode` | `"message" \| "recent" \| "full"` | Controls how much conversation the blocking memory sub-agent sees |
570-
| `config.promptStyle` | `"balanced" \| "strict" \| "contextual" \| "recall-heavy" \| "precision-heavy" \| "preference-only"` | Controls how eager or strict the blocking memory sub-agent is when deciding whether to return memory |
571-
| `config.thinking` | `"off" \| "minimal" \| "low" \| "medium" \| "high" \| "xhigh" \| "adaptive" \| "max"` | Advanced thinking override for the blocking memory sub-agent; default `off` for speed |
572-
| `config.promptOverride` | `string` | Advanced full prompt replacement; not recommended for normal use |
573-
| `config.promptAppend` | `string` | Advanced extra instructions appended to the default or overridden prompt |
574-
| `config.timeoutMs` | `number` | Hard timeout for the blocking memory sub-agent, capped at 120000 ms |
575-
| `config.maxSummaryChars` | `number` | Maximum total characters allowed in the active-memory summary |
576-
| `config.logging` | `boolean` | Emits active memory logs while tuning |
577-
| `config.persistTranscripts` | `boolean` | Keeps blocking memory sub-agent transcripts on disk instead of deleting temp files |
578-
| `config.transcriptDir` | `string` | Relative blocking memory sub-agent transcript directory under the agent sessions folder |
561+
| Key | Type | Meaning |
562+
| --------------------------- | ---------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
563+
| `enabled` | `boolean` | Enables the plugin itself |
564+
| `config.agents` | `string[]` | Agent ids that may use active memory |
565+
| `config.model` | `string` | Optional blocking memory sub-agent model ref; when unset, active memory uses the current session model |
566+
| `config.allowedChatTypes` | `("direct" \| "group" \| "channel")[]` | Session types that may run Active Memory; defaults to direct-message style sessions |
567+
| `config.allowedChatIds` | `string[]` | Optional per-conversation allowlist applied after `allowedChatTypes`; non-empty lists fail closed |
568+
| `config.deniedChatIds` | `string[]` | Optional per-conversation denylist that overrides allowed session types and allowed ids |
569+
| `config.queryMode` | `"message" \| "recent" \| "full"` | Controls how much conversation the blocking memory sub-agent sees |
570+
| `config.promptStyle` | `"balanced" \| "strict" \| "contextual" \| "recall-heavy" \| "precision-heavy" \| "preference-only"` | Controls how eager or strict the blocking memory sub-agent is when deciding whether to return memory |
571+
| `config.thinking` | `"off" \| "minimal" \| "low" \| "medium" \| "high" \| "xhigh" \| "adaptive" \| "max"` | Advanced thinking override for the blocking memory sub-agent; default `off` for speed |
572+
| `config.promptOverride` | `string` | Advanced full prompt replacement; not recommended for normal use |
573+
| `config.promptAppend` | `string` | Advanced extra instructions appended to the default or overridden prompt |
574+
| `config.timeoutMs` | `number` | Hard timeout for the deadline-bounded memory sub-agent; defaults to 3000 ms and falls back silently on timeout |
575+
| `config.maxSummaryChars` | `number` | Maximum total characters allowed in the active-memory summary |
576+
| `config.logging` | `boolean` | Emits active memory logs while tuning |
577+
| `config.persistTranscripts` | `boolean` | Keeps blocking memory sub-agent transcripts on disk instead of deleting temp files |
578+
| `config.transcriptDir` | `string` | Relative blocking memory sub-agent transcript directory under the agent sessions folder |
579579

580580
Useful tuning fields:
581581

@@ -602,7 +602,7 @@ Start with `recent`.
602602
agents: ["main"],
603603
queryMode: "recent",
604604
promptStyle: "balanced",
605-
timeoutMs: 15000,
605+
timeoutMs: 3000,
606606
maxSummaryChars: 220,
607607
logging: true,
608608
},

extensions/active-memory/index.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,9 @@ describe("active-memory plugin", () => {
185185

186186
it("registers a before_prompt_build hook", () => {
187187
expect(api.on).toHaveBeenCalledWith("before_prompt_build", expect.any(Function), {
188-
timeoutMs: 150_000,
188+
timeoutMs: 4_000,
189189
});
190-
expect(hookOptions.before_prompt_build?.timeoutMs).toBe(150_000);
190+
expect(hookOptions.before_prompt_build?.timeoutMs).toBe(4_000);
191191
});
192192

193193
it("runs recall without recording shared auth-profile failures", async () => {

extensions/active-memory/index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import {
2424
} from "openclaw/plugin-sdk/session-store-runtime";
2525
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path";
2626

27-
const DEFAULT_TIMEOUT_MS = 15_000;
27+
const DEFAULT_TIMEOUT_MS = 3_000;
2828
const DEFAULT_AGENT_ID = "main";
2929
const DEFAULT_MAX_SUMMARY_CHARS = 220;
3030
const DEFAULT_RECENT_USER_TURNS = 2;
@@ -35,7 +35,7 @@ const DEFAULT_CACHE_TTL_MS = 15_000;
3535
const DEFAULT_MAX_CACHE_ENTRIES = 1000;
3636
const CACHE_SWEEP_INTERVAL_MS = 1000;
3737
const DEFAULT_MIN_TIMEOUT_MS = 250;
38-
const DEFAULT_SETUP_GRACE_TIMEOUT_MS = 30_000;
38+
const DEFAULT_SETUP_GRACE_TIMEOUT_MS = 0;
3939
const DEFAULT_QUERY_MODE = "recent" as const;
4040
const DEFAULT_QMD_SEARCH_MODE = "search" as const;
4141
const DEFAULT_TRANSCRIPT_DIR = "active-memory";
@@ -2431,7 +2431,7 @@ export default definePluginEntry({
24312431
},
24322432
});
24332433

2434-
const beforePromptBuildTimeoutMs = 120_000 + setupGraceTimeoutMs;
2434+
const beforePromptBuildTimeoutMs = DEFAULT_TIMEOUT_MS + setupGraceTimeoutMs + 1_000;
24352435
api.on(
24362436
"before_prompt_build",
24372437
async (event, ctx) => {

src/gateway/server-startup-memory.ts

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import {
88
import { getActiveMemorySearchManager } from "../plugins/memory-runtime.js";
99
import { normalizeAgentId } from "../routing/session-key.js";
1010

11+
const DEFAULT_QMD_STARTUP_JITTER_MS = 100;
12+
1113
function shouldStartQmdBackgroundWork(qmd: ResolvedQmdConfig): boolean {
1214
return qmd.update.onBoot || qmd.update.intervalMs > 0 || qmd.update.embedIntervalMs > 0;
1315
}
@@ -35,12 +37,29 @@ function shouldEagerlyStartAgentMemory(params: {
3537
return hasExplicitAgentMemorySearchConfig(params.cfg, params.agentId);
3638
}
3739

40+
function delay(ms: number): Promise<void> {
41+
if (ms <= 0) {
42+
return Promise.resolve();
43+
}
44+
return new Promise((resolve) => {
45+
const timeout = setTimeout(resolve, ms);
46+
timeout.unref?.();
47+
});
48+
}
49+
50+
function resolveStartupJitterMs(params: { index: number; total: number }): number {
51+
if (params.index <= 0 || params.total <= 1) {
52+
return 0;
53+
}
54+
return Math.floor(Math.random() * DEFAULT_QMD_STARTUP_JITTER_MS);
55+
}
56+
3857
export async function startGatewayMemoryBackend(params: {
3958
cfg: OpenClawConfig;
4059
log: { info?: (msg: string) => void; warn: (msg: string) => void };
4160
}): Promise<void> {
4261
const agentIds = listAgentIds(params.cfg);
43-
const armedAgentIds: string[] = [];
62+
const eagerAgentIds: string[] = [];
4463
const deferredAgentIds: string[] = [];
4564
for (const agentId of agentIds) {
4665
if (!resolveMemorySearchConfig(params.cfg, agentId)) {
@@ -67,7 +86,17 @@ export async function startGatewayMemoryBackend(params: {
6786
continue;
6887
}
6988

70-
const { manager, error } = await getActiveMemorySearchManager({ cfg: params.cfg, agentId });
89+
eagerAgentIds.push(agentId);
90+
}
91+
const startupResults = await Promise.all(
92+
eagerAgentIds.map(async (agentId, index) => {
93+
await delay(resolveStartupJitterMs({ index, total: eagerAgentIds.length }));
94+
const { manager, error } = await getActiveMemorySearchManager({ cfg: params.cfg, agentId });
95+
return { agentId, manager, error };
96+
}),
97+
);
98+
const armedAgentIds: string[] = [];
99+
for (const { agentId, manager, error } of startupResults) {
71100
if (!manager) {
72101
params.log.warn(
73102
`qmd memory startup initialization failed for agent "${agentId}": ${error ?? "unknown error"}`,

0 commit comments

Comments
 (0)