Skip to content

Commit bbc4bee

Browse files
authored
fix(heartbeat): advance stale scheduler deferrals
Fix stale heartbeat scheduler deferrals so disabled/non-retry skips and flood deferrals advance the due slot instead of rearming a 0 ms timer loop. Fixes #79380. Supersedes #79418. Proof: - pnpm test src/infra/heartbeat-runner.scheduler.test.ts -- --reporter=verbose - pnpm check:changed via Testbox tbx_01ksxfavykc7qyve4ysnxg3smh - autoreview clean - GitHub CI green for 213003a, including Real behavior proof
1 parent ef9e9bf commit bbc4bee

2 files changed

Lines changed: 79 additions & 7 deletions

File tree

src/infra/heartbeat-runner.scheduler.test.ts

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,65 @@ describe("startHeartbeatRunner", () => {
363363
runner.stop();
364364
});
365365

366+
it("advances cadence after non-retryable disabled skips", async () => {
367+
useFakeHeartbeatTime();
368+
const timeoutSpy = vi.spyOn(globalThis, "setTimeout");
369+
const runSpy = vi.fn().mockResolvedValue({ status: "skipped", reason: "disabled" } as const);
370+
371+
const intervalMs = 10 * 60_000;
372+
const runner = startHeartbeatRunner({
373+
cfg: heartbeatConfig([{ id: "main", heartbeat: { every: "10m" } }]),
374+
runOnce: runSpy,
375+
stableSchedulerSeed: TEST_SCHEDULER_SEED,
376+
});
377+
const firstDueMs = resolveDueFromNow(0, intervalMs, "main");
378+
379+
await vi.advanceTimersByTimeAsync(firstDueMs + 1);
380+
expect(runSpy).toHaveBeenCalledTimes(1);
381+
382+
const delays = timeoutSpy.mock.calls
383+
.map((call) => call[1])
384+
.filter((delay): delay is number => typeof delay === "number");
385+
expect(delays[delays.length - 1]).toBeGreaterThan(5_000);
386+
387+
await vi.advanceTimersByTimeAsync(2_000);
388+
expect(runSpy).toHaveBeenCalledTimes(1);
389+
390+
timeoutSpy.mockRestore();
391+
runner.stop();
392+
});
393+
394+
it("advances cadence after flood deferrals without wake-layer retry", async () => {
395+
useFakeHeartbeatTime();
396+
const timeoutSpy = vi.spyOn(globalThis, "setTimeout");
397+
const runSpy = vi.fn().mockResolvedValue({ status: "ran", durationMs: 1 } as const);
398+
399+
const intervalMs = 1_000;
400+
const runner = startHeartbeatRunner({
401+
cfg: heartbeatConfig([{ id: "main", heartbeat: { every: "1s" } }]),
402+
runOnce: runSpy,
403+
stableSchedulerSeed: TEST_SCHEDULER_SEED,
404+
});
405+
const firstDueMs = resolveDueFromNow(0, intervalMs, "main");
406+
407+
await vi.advanceTimersByTimeAsync(firstDueMs + 1);
408+
for (let i = 0; i < 4; i++) {
409+
await vi.advanceTimersByTimeAsync(intervalMs);
410+
}
411+
expect(runSpy).toHaveBeenCalledTimes(5);
412+
413+
await vi.advanceTimersByTimeAsync(intervalMs);
414+
expect(runSpy).toHaveBeenCalledTimes(5);
415+
416+
const delays = timeoutSpy.mock.calls
417+
.map((call) => call[1])
418+
.filter((delay): delay is number => typeof delay === "number");
419+
expect(delays[delays.length - 1]).toBeGreaterThan(0);
420+
421+
timeoutSpy.mockRestore();
422+
runner.stop();
423+
});
424+
366425
it("does not push nextDueMs forward on repeated requests-in-flight skips", async () => {
367426
useFakeHeartbeatTime();
368427

src/infra/heartbeat-runner.ts

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2171,6 +2171,20 @@ export function startHeartbeatRunner(opts: {
21712171
agent.nextDueMs = seekActiveSlotForAgent(agent, rawDueMs);
21722172
};
21732173

2174+
const advanceStaleScheduleAfterDeferral = (
2175+
agent: HeartbeatAgentState,
2176+
now: number,
2177+
reason?: string,
2178+
decision?: DeferDecision,
2179+
) => {
2180+
if (!decision?.defer || decision.reason === "not-due" || agent.nextDueMs > now) {
2181+
return;
2182+
}
2183+
// Deferrals that do not have wake-layer retry ownership still need to move
2184+
// the due slot forward; otherwise scheduleNext() will keep rearming at 0ms.
2185+
advanceAgentSchedule(agent, now, reason);
2186+
};
2187+
21742188
// Centralized cooldown gate. Both targeted and broadcast dispatch branches
21752189
// call this before invoking `runOnce`. Manual wakes are never deferred.
21762190
// Everything else respects `nextDueMs`, the min-spacing floor, and the flood
@@ -2366,6 +2380,7 @@ export function startHeartbeatRunner(opts: {
23662380
}
23672381
const deferral = evaluateWakeDeferral(targetAgent, now, reason, intent);
23682382
if (deferral.defer) {
2383+
advanceStaleScheduleAfterDeferral(targetAgent, now, reason, deferral);
23692384
return { status: "skipped", reason: deferral.reason };
23702385
}
23712386
try {
@@ -2395,11 +2410,10 @@ export function startHeartbeatRunner(opts: {
23952410
return res;
23962411
}
23972412
// Non-retryable outcome (ran, disabled, failed-but-not-busy). Record
2398-
// bookkeeping so subsequent wakes within the cooldown window defer.
2413+
// bookkeeping and move the due slot so scheduleNext() cannot hot-loop
2414+
// on a stale past-due agent.
23992415
recordRunBookkeeping(targetAgent, now);
2400-
if (res.status !== "skipped" || res.reason !== "disabled") {
2401-
advanceAgentSchedule(targetAgent, now, reason);
2402-
}
2416+
advanceAgentSchedule(targetAgent, now, reason);
24032417
return res.status === "ran" ? { status: "ran", durationMs: Date.now() - startedAt } : res;
24042418
} catch (err) {
24052419
const errMsg = formatErrorMessage(err);
@@ -2428,6 +2442,7 @@ export function startHeartbeatRunner(opts: {
24282442
const runOneAgent = async (agent: HeartbeatAgentState): Promise<AgentWakeOutcome> => {
24292443
const deferral = evaluateWakeDeferral(agent, now, reason, intent);
24302444
if (deferral.defer) {
2445+
advanceStaleScheduleAfterDeferral(agent, now, reason, deferral);
24312446
return { ran: false };
24322447
}
24332448

@@ -2462,9 +2477,7 @@ export function startHeartbeatRunner(opts: {
24622477
}
24632478
// Non-retryable outcome — record bookkeeping for cooldown gates.
24642479
recordRunBookkeeping(agent, now);
2465-
if (res.status !== "skipped" || res.reason !== "disabled") {
2466-
advanceAgentSchedule(agent, now, reason);
2467-
}
2480+
advanceAgentSchedule(agent, now, reason);
24682481
let agentRan = res.status === "ran";
24692482

24702483
const defaultSessionKey = resolveHeartbeatSession(

0 commit comments

Comments
 (0)