Skip to content

Commit 98846dc

Browse files
committed
fix(heartbeat): remap cron-run exec events to session keys
Exec completion events enqueued from cron-run or channel-specific session keys can be missed by the heartbeat runner when it drains the agent's durable session queue. Propagate session config through exec, ACP, gateway, and CLI watchdog paths, then remap cron-run event keys to the correct main/global queue before enqueueing. Preserve global-scope heartbeat behavior while still waking the originating agent, tighten exec-completion event matching, and cover the routing paths with focused regression tests. Closes #52305. Related: #18237.
1 parent c90fd7e commit 98846dc

20 files changed

Lines changed: 451 additions & 42 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ Docs: https://docs.openclaw.ai
426426
- OpenRouter: keep the default `openrouter/auto` model ref canonical while preventing TUI and Control UI catalog pickers from displaying or submitting `openrouter/openrouter/auto`. Fixes #62655.
427427
- Status/Claude CLI: show `oauth (claude-cli)` for working Claude CLI OAuth runtime sessions instead of `unknown` when no local auth profile exists. Fixes #78632. Thanks @gorkem2020.
428428
- Memory search: preserve keyword-only hybrid FTS matches when vector scoring is unavailable or below the configured minimum score, so exact lexical hits are not dropped by weighted min-score filtering.
429+
- Heartbeat/async exec: remap cron-run session keys to agent-main (or `"global"` under `session.scope=global`) at the bash exec, ACP, gateway node-event, and CLI watchdog enqueue sites, and treat cron-run descendants as ephemeral for retention pruning, so async exec completion events land in the same queue the heartbeat drains instead of being stranded under the ephemeral cron-run key. Refs #52305. Thanks @Kaspre.
429430
- Exec approvals/node: let trusted backend node invokes complete no-device Control UI approvals after the original request connection changes, while keeping node, command, cwd, env, and allow-once replay bindings enforced. Fixes #78569. Thanks @naturedogdog.
430431
- Agents/subagents: keep background completion delivery on the requester-agent handoff/queue-retry path instead of raw-sending child results directly, and strip child-result wrapper or OpenClaw runtime-context scaffolding from queued outbound retries. Fixes #78531. Thanks @EthanSK.
431432
- Sandbox: recreate cached browser bridges when JavaScript-evaluation permission changes, keep failed prune removals tracked for retry, and make cross-device directory moves copy-then-commit without partially emptying the source on failure.

src/agents/acp-spawn-parent-stream.test.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,45 @@ describe("startAcpSpawnParentStreamRelay", () => {
152152
relay.dispose();
153153
});
154154

155+
it("remaps cron-run parent session keys while relaying stream events", () => {
156+
const relay = startAcpSpawnParentStreamRelay({
157+
runId: "run-cron",
158+
parentSessionKey: "agent:ops:cron:nightly:run:run-1:subagent:worker",
159+
childSessionKey: "agent:codex:acp:child-cron",
160+
agentId: "codex",
161+
mainKey: "primary",
162+
sessionScope: "global",
163+
streamFlushMs: 10,
164+
noOutputNoticeMs: 120_000,
165+
});
166+
167+
emitAgentEvent({
168+
runId: "run-cron",
169+
stream: "assistant",
170+
data: {
171+
delta: "hello from child",
172+
},
173+
});
174+
vi.advanceTimersByTime(15);
175+
176+
expect(enqueueSystemEventMock).toHaveBeenCalledWith(
177+
expect.stringContaining("codex: hello from child"),
178+
expect.objectContaining({
179+
contextKey: "acp-spawn:run-cron:progress",
180+
sessionKey: "global",
181+
trusted: false,
182+
}),
183+
);
184+
expect(requestHeartbeatMock).toHaveBeenCalledWith(
185+
expect.objectContaining({
186+
agentId: "ops",
187+
reason: "acp:spawn:stream",
188+
}),
189+
);
190+
expect(requestHeartbeatMock.mock.calls[0]?.[0]).not.toHaveProperty("sessionKey");
191+
relay.dispose();
192+
});
193+
155194
it("emits a no-output notice and a resumed notice when output returns", () => {
156195
const relay = startAcpSpawnParentStreamRelay({
157196
runId: "run-2",

src/agents/acp-spawn-parent-stream.ts

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { onAgentEvent } from "../infra/agent-events.js";
66
import { requestHeartbeat } from "../infra/heartbeat-wake.js";
77
import { appendRegularFile } from "../infra/regular-file.js";
88
import { enqueueSystemEvent } from "../infra/system-events.js";
9-
import { scopedHeartbeatWakeOptions } from "../routing/session-key.js";
9+
import { resolveEventSessionKey, scopedHeartbeatWakeOptions } from "../routing/session-key.js";
1010
import { normalizeAssistantPhase } from "../shared/chat-message-content.js";
1111
import { normalizeOptionalString } from "../shared/string-coerce.js";
1212
import { recordTaskRunProgressByRunId } from "../tasks/detached-task-runtime.js";
@@ -75,6 +75,21 @@ export function startAcpSpawnParentStreamRelay(params: {
7575
parentSessionKey: string;
7676
childSessionKey: string;
7777
agentId: string;
78+
/**
79+
* Optional `session.mainKey` from the runtime config. Used to remap
80+
* cron-run parent session keys to the agent's main queue when relaying
81+
* events. Caller passes the spawn-time `cfg.session?.mainKey`; pass-through
82+
* of `undefined` falls back to the literal "main" default. Long-running
83+
* relays keep using that start-time value if config changes while the child
84+
* session is still streaming.
85+
*/
86+
mainKey?: string;
87+
/**
88+
* Optional `session.scope` from the runtime config. Required so global-scope
89+
* agents route cron-run events to the "global" queue instead of agent-main.
90+
* Snapshotted with `mainKey` for the same start-time routing reason.
91+
*/
92+
sessionScope?: "per-sender" | "global";
7893
logPath?: string;
7994
deliveryContext?: DeliveryContext;
8095
surfaceUpdates?: boolean;
@@ -180,11 +195,16 @@ export function startAcpSpawnParentStreamRelay(params: {
180195
return;
181196
}
182197
requestHeartbeat(
183-
scopedHeartbeatWakeOptions(parentSessionKey, {
184-
source: "acp-spawn",
185-
intent: "event",
186-
reason: "acp:spawn:stream",
187-
}),
198+
scopedHeartbeatWakeOptions(
199+
parentSessionKey,
200+
{
201+
source: "acp-spawn",
202+
intent: "event",
203+
reason: "acp:spawn:stream",
204+
},
205+
params.mainKey,
206+
params.sessionScope,
207+
),
188208
);
189209
};
190210
const emit = (text: string, contextKey: string) => {
@@ -197,7 +217,7 @@ export function startAcpSpawnParentStreamRelay(params: {
197217
return;
198218
}
199219
enqueueSystemEvent(cleaned, {
200-
sessionKey: parentSessionKey,
220+
sessionKey: resolveEventSessionKey(parentSessionKey, params.mainKey, params.sessionScope),
201221
contextKey,
202222
deliveryContext: params.deliveryContext,
203223
trusted: false,

src/agents/acp-spawn.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,6 +1371,8 @@ export async function spawnAcpDirect(
13711371
parentSessionKey,
13721372
childSessionKey: sessionKey,
13731373
agentId: targetAgentId,
1374+
mainKey: cfg.session?.mainKey,
1375+
sessionScope: cfg.session?.scope,
13741376
logPath: streamLogPath,
13751377
deliveryContext: parentDeliveryCtx,
13761378
emitStartNotice: false,
@@ -1424,6 +1426,8 @@ export async function spawnAcpDirect(
14241426
parentSessionKey,
14251427
childSessionKey: sessionKey,
14261428
agentId: targetAgentId,
1429+
mainKey: cfg.session?.mainKey,
1430+
sessionScope: cfg.session?.scope,
14271431
logPath: streamLogPath,
14281432
deliveryContext: parentDeliveryCtx,
14291433
emitStartNotice: false,

src/agents/bash-process-registry.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,17 @@ export interface ProcessSession {
3535
command: string;
3636
scopeKey?: string;
3737
sessionKey?: string;
38+
/** `session.mainKey` from the runtime config, snapshotted at exec start.
39+
* Used by background-exit notifications to remap cron-run keys to the
40+
* agent's main queue without an ambient config load. If config changes
41+
* while the process runs, the exit notification follows the start-time
42+
* session contract. */
43+
mainKey?: string;
44+
/** `session.scope` from the runtime config; required so the cron-run remap
45+
* can route global-scope agents to the literal "global" queue instead
46+
* of an agent-main queue the heartbeat never drains. Snapshotted with
47+
* `mainKey` for the same start-time routing reason. */
48+
sessionScope?: "per-sender" | "global";
3849
notifyDeliveryContext?: DeliveryContext;
3950
notifyOnExit?: boolean;
4051
notifyOnExitEmptySuccess?: boolean;

src/agents/bash-tools.exec-runtime.test.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,49 @@ describe("emitExecSystemEvent", () => {
452452
);
453453
});
454454

455+
it("remaps cron-run event enqueue and wake targets to the drained agent main session", () => {
456+
emitExecSystemEvent("Exec finished", {
457+
sessionKey: "agent:ops:cron:nightly:run:run-1",
458+
contextKey: "exec:run-cron",
459+
mainKey: "primary",
460+
});
461+
462+
expect(enqueueSystemEventMock).toHaveBeenCalledWith("Exec finished", {
463+
sessionKey: "agent:ops:primary",
464+
contextKey: "exec:run-cron",
465+
trusted: false,
466+
});
467+
expect(requestHeartbeatMock).toHaveBeenCalledWith(
468+
expect.objectContaining({
469+
coalesceMs: 0,
470+
reason: "exec-event",
471+
sessionKey: "agent:ops:primary",
472+
}),
473+
);
474+
});
475+
476+
it("routes global-scope cron-run events to the global queue and preserves the agent wake target", () => {
477+
emitExecSystemEvent("Exec finished", {
478+
sessionKey: "agent:ops:cron:nightly:run:run-1:subagent:worker",
479+
contextKey: "exec:run-global",
480+
sessionScope: "global",
481+
});
482+
483+
expect(enqueueSystemEventMock).toHaveBeenCalledWith("Exec finished", {
484+
sessionKey: "global",
485+
contextKey: "exec:run-global",
486+
trusted: false,
487+
});
488+
expect(requestHeartbeatMock).toHaveBeenCalledWith(
489+
expect.objectContaining({
490+
agentId: "ops",
491+
coalesceMs: 0,
492+
reason: "exec-event",
493+
}),
494+
);
495+
expect(requestHeartbeatMock.mock.calls[0]?.[0]).not.toHaveProperty("sessionKey");
496+
});
497+
455498
it("keeps wake unscoped for non-agent session keys", () => {
456499
emitExecSystemEvent("Exec finished", {
457500
sessionKey: "global",

src/agents/bash-tools.exec-runtime.ts

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import { requestHeartbeat } from "../infra/heartbeat-wake.js";
1212
import { isDangerousHostInheritedEnvVarName } from "../infra/host-env-security.js";
1313
import { findPathKey, mergePathPrepend } from "../infra/path-prepend.js";
1414
import { enqueueSystemEvent } from "../infra/system-events.js";
15-
import { scopedHeartbeatWakeOptions } from "../routing/session-key.js";
15+
import { resolveEventSessionKey, scopedHeartbeatWakeOptions } from "../routing/session-key.js";
1616
import type { ProcessSession } from "./bash-process-registry.js";
1717
import type { ExecToolDetails } from "./bash-tools.exec-types.js";
1818
import type { BashSandboxConfig } from "./bash-tools.shared.js";
@@ -340,17 +340,22 @@ function maybeNotifyOnExit(session: ProcessSession, status: "completed" | "faile
340340
? `Exec ${status} (${session.id.slice(0, 8)}, ${exitLabel}) :: ${output}`
341341
: `Exec ${status} (${session.id.slice(0, 8)}, ${exitLabel})`;
342342
enqueueSystemEvent(summary, {
343-
sessionKey,
343+
sessionKey: resolveEventSessionKey(sessionKey, session.mainKey, session.sessionScope),
344344
deliveryContext: session.notifyDeliveryContext,
345345
trusted: false,
346346
});
347347
requestHeartbeat(
348-
scopedHeartbeatWakeOptions(sessionKey, {
349-
source: "exec-event",
350-
intent: "event",
351-
reason: "exec-event",
352-
coalesceMs: 0,
353-
}),
348+
scopedHeartbeatWakeOptions(
349+
sessionKey,
350+
{
351+
source: "exec-event",
352+
intent: "event",
353+
reason: "exec-event",
354+
coalesceMs: 0,
355+
},
356+
session.mainKey,
357+
session.sessionScope,
358+
),
354359
);
355360
}
356361

@@ -416,25 +421,40 @@ export function resolveApprovalRunningNoticeMs(value?: number) {
416421

417422
export function emitExecSystemEvent(
418423
text: string,
419-
opts: { sessionKey?: string; contextKey?: string; deliveryContext?: DeliveryContext },
424+
opts: {
425+
sessionKey?: string;
426+
contextKey?: string;
427+
deliveryContext?: DeliveryContext;
428+
/** `session.mainKey` from the runtime config; pass-through of `undefined`
429+
* falls back to the literal "main" default in `resolveEventSessionKey`. */
430+
mainKey?: string;
431+
/** `session.scope` from the runtime config; needed so global-scope
432+
* agents route cron-run events to the "global" queue. */
433+
sessionScope?: "per-sender" | "global";
434+
},
420435
) {
421436
const sessionKey = opts.sessionKey?.trim();
422437
if (!sessionKey) {
423438
return;
424439
}
425440
enqueueSystemEvent(text, {
426-
sessionKey,
441+
sessionKey: resolveEventSessionKey(sessionKey, opts.mainKey, opts.sessionScope),
427442
contextKey: opts.contextKey,
428443
deliveryContext: opts.deliveryContext,
429444
trusted: false,
430445
});
431446
requestHeartbeat(
432-
scopedHeartbeatWakeOptions(sessionKey, {
433-
source: "exec-event",
434-
intent: "event",
435-
reason: "exec-event",
436-
coalesceMs: 0,
437-
}),
447+
scopedHeartbeatWakeOptions(
448+
sessionKey,
449+
{
450+
source: "exec-event",
451+
intent: "event",
452+
reason: "exec-event",
453+
coalesceMs: 0,
454+
},
455+
opts.mainKey,
456+
opts.sessionScope,
457+
),
438458
);
439459
}
440460

@@ -568,6 +588,15 @@ export async function runExecProcess(opts: {
568588
notifyOnExitEmptySuccess?: boolean;
569589
scopeKey?: string;
570590
sessionKey?: string;
591+
/** `session.mainKey` from the runtime config; snapshotted onto the
592+
* ProcessSession so background-exit notifications can remap cron-run
593+
* keys without an ambient config load. Long-running background exits use
594+
* this start-time value even if config changes while the process runs. */
595+
mainKey?: string;
596+
/** `session.scope` from the runtime config; snapshotted alongside
597+
* `mainKey` so the cron-run remap can route global-scope agents to
598+
* the "global" queue instead of agent-main. */
599+
sessionScope?: "per-sender" | "global";
571600
notifyDeliveryContext?: DeliveryContext;
572601
timeoutSec: number | null;
573602
onUpdate?: (partialResult: AgentToolResult<ExecToolDetails>) => void;
@@ -587,6 +616,8 @@ export async function runExecProcess(opts: {
587616
command: opts.command,
588617
scopeKey: opts.scopeKey,
589618
sessionKey: opts.sessionKey,
619+
mainKey: opts.mainKey,
620+
sessionScope: opts.sessionScope,
590621
notifyDeliveryContext: normalizeDeliveryContext(opts.notifyDeliveryContext),
591622
notifyOnExit: opts.notifyOnExit,
592623
notifyOnExitEmptySuccess: opts.notifyOnExitEmptySuccess === true,

src/agents/bash-tools.exec-types.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ export type ExecToolDefaults = {
2929
allowBackground?: boolean;
3030
scopeKey?: string;
3131
sessionKey?: string;
32+
/** `session.mainKey` from the runtime config; passed through into
33+
* runExecProcess so background-exit notifications can remap cron-run
34+
* session keys to the agent's main queue without an ambient config load. */
35+
mainKey?: string;
36+
/** `session.scope` from the runtime config; passed alongside `mainKey`
37+
* so the cron-run remap can route global-scope agents to the "global"
38+
* queue instead of agent-main. */
39+
sessionScope?: "per-sender" | "global";
3240
messageProvider?: string;
3341
currentChannelId?: string;
3442
currentThreadTs?: string;

src/agents/bash-tools.exec.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,8 @@ export function createExecTool(
15721572
notifyOnExitEmptySuccess,
15731573
scopeKey: defaults?.scopeKey,
15741574
sessionKey: notifySessionKey,
1575+
mainKey: defaults?.mainKey,
1576+
sessionScope: defaults?.sessionScope,
15751577
notifyDeliveryContext,
15761578
timeoutSec: effectiveTimeout,
15771579
onUpdate,

src/agents/cli-runner/execute.ts

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { requestHeartbeat as requestHeartbeatImpl } from "../../infra/heartbeat-
66
import { sanitizeHostExecEnv } from "../../infra/host-env-security.js";
77
import { enqueueSystemEvent as enqueueSystemEventImpl } from "../../infra/system-events.js";
88
import { getProcessSupervisor as getProcessSupervisorImpl } from "../../process/supervisor/index.js";
9-
import { scopedHeartbeatWakeOptions } from "../../routing/session-key.js";
9+
import { resolveEventSessionKey, scopedHeartbeatWakeOptions } from "../../routing/session-key.js";
1010
import { appendBootstrapPromptWarning } from "../bootstrap-budget.js";
1111
import {
1212
createCliJsonlStreamingParser,
@@ -640,13 +640,26 @@ export async function executePreparedCliRun(
640640
"It may have been waiting for interactive input or an approval prompt.",
641641
"For Claude Code, prefer --permission-mode bypassPermissions --print.",
642642
].join(" ");
643-
executeDeps.enqueueSystemEvent(stallNotice, { sessionKey: params.sessionKey });
643+
const watchdogMainKey = params.config?.session?.mainKey;
644+
const watchdogScope = params.config?.session?.scope;
645+
executeDeps.enqueueSystemEvent(stallNotice, {
646+
sessionKey: resolveEventSessionKey(
647+
params.sessionKey,
648+
watchdogMainKey,
649+
watchdogScope,
650+
),
651+
});
644652
executeDeps.requestHeartbeat(
645-
scopedHeartbeatWakeOptions(params.sessionKey, {
646-
source: "cli-watchdog",
647-
intent: "event",
648-
reason: "cli:watchdog:stall",
649-
}),
653+
scopedHeartbeatWakeOptions(
654+
params.sessionKey,
655+
{
656+
source: "cli-watchdog",
657+
intent: "event",
658+
reason: "cli:watchdog:stall",
659+
},
660+
watchdogMainKey,
661+
watchdogScope,
662+
),
650663
);
651664
}
652665
throw new FailoverError(timeoutReason, {

0 commit comments

Comments
 (0)