@@ -63,6 +63,8 @@ vi.mock("../packages/cli/src/logger.js", () => ({
6363// ---- Imports after mocks are set up -----------------------------------------
6464
6565import { DaemonLoop } from "../packages/cli/src/daemon/loop.js" ;
66+ import { _setSessionManagerForTest , SessionManager } from "../packages/cli/src/session/manager.js" ;
67+ import { clearAllSessions , writeSession } from "../packages/cli/src/session/store.js" ;
6668import type { SessionFile } from "../packages/cli/src/session/types.js" ;
6769
6870// ---- Minimal fakes ----------------------------------------------------------
@@ -230,6 +232,111 @@ describe("DaemonLoop — RATE_LIMIT_RESUME_PROMPT is non-empty", () => {
230232 } ) ;
231233 } ) ;
232234
235+ describe ( "post-crash restart: resumeAfter on disk → fresh DaemonLoop resumes (full chain)" , ( ) => {
236+ /**
237+ * Integration test for the daemon-restart scenario described in the bug fix:
238+ *
239+ * 1. routeRateLimit writes a session file to disk with status="rate_limited"
240+ * and resumeAfter=<past epoch ms> — exactly what happens when an agent hits
241+ * the rate limit and then the daemon process is killed.
242+ * 2. A fresh DaemonLoop is constructed with a brand-new SessionManager (no
243+ * in-memory rate-limit state) — simulating a daemon restart.
244+ * 3. resumeBackoffSessions() scans disk, finds the expired session, and
245+ * calls resumeOneSession.
246+ *
247+ * This verifies the FULL CHAIN, not just either half in isolation.
248+ * Note: DaemonLoop is constructed AFTER _setSessionManagerForTest so the
249+ * class field `private sessions = getSessionManager()` picks up the real
250+ * SessionManager from the singleton. No internal stomping is used.
251+ */
252+
253+ afterEach ( ( ) => {
254+ clearAllSessions ( ) ;
255+ _setSessionManagerForTest ( null ) ;
256+ } ) ;
257+
258+ it ( "resumes a rate_limited session whose resumeAfter was written to disk before daemon restart" , async ( ) => {
259+ // Step 1: write a rate_limited session to disk (simulating post-crash state)
260+ const sessionId = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" ;
261+ const onDiskSession : SessionFile = {
262+ type : "worker" ,
263+ agentId : "agent-restart-test" ,
264+ sessionId,
265+ runtime : "claude" as any ,
266+ startedAt : Date . now ( ) - 120_000 ,
267+ apiUrl : "http://localhost" ,
268+ privateKeyJwk : { } as any ,
269+ taskId : "task-restart-test" ,
270+ status : "rate_limited" ,
271+ resumeAfter : Date . now ( ) - 5_000 , // expired 5 seconds ago
272+ } ;
273+ writeSession ( onDiskSession ) ;
274+
275+ // Step 2: seed the singleton with a fresh SessionManager, then construct
276+ // DaemonLoop so its class field `sessions = getSessionManager()` binds to it.
277+ _setSessionManagerForTest ( new SessionManager ( ) ) ;
278+
279+ const pool = {
280+ activeCount : 0 ,
281+ hasTask : ( _id : string ) => false ,
282+ getActiveTaskIds : ( ) => [ ] ,
283+ } as any ;
284+
285+ const freshLoop = new DaemonLoop ( makeClient ( ) , pool , makeRateLimiter ( ) , makePrMonitor ( ) , {
286+ maxConcurrent : 4 ,
287+ pollInterval : 1000 ,
288+ } ) ;
289+ ( freshLoop as any ) . running = true ;
290+
291+ // Step 3: call resumeBackoffSessions — must find the on-disk session
292+ await ( freshLoop as any ) . resumeBackoffSessions ( ) ;
293+
294+ // Assert: resumeOneSession was called for the on-disk session
295+ expect ( resumeOneSessionMock ) . toHaveBeenCalledTimes ( 1 ) ;
296+ const sessionArg : SessionFile = resumeOneSessionMock . mock . calls [ 0 ] [ 0 ] ;
297+ expect ( sessionArg . sessionId ) . toBe ( sessionId ) ;
298+ expect ( sessionArg . taskId ) . toBe ( "task-restart-test" ) ;
299+ expect ( sessionArg . status ) . toBe ( "rate_limited" ) ;
300+ } ) ;
301+
302+ it ( "does NOT resume a rate_limited session whose resumeAfter is still in the future after restart" , async ( ) => {
303+ // Write a session whose backoff has NOT expired yet
304+ const sessionId = "ffffffff-aaaa-bbbb-cccc-dddddddddddd" ;
305+ const onDiskSession : SessionFile = {
306+ type : "worker" ,
307+ agentId : "agent-future-test" ,
308+ sessionId,
309+ runtime : "claude" as any ,
310+ startedAt : Date . now ( ) - 60_000 ,
311+ apiUrl : "http://localhost" ,
312+ privateKeyJwk : { } as any ,
313+ taskId : "task-future-test" ,
314+ status : "rate_limited" ,
315+ resumeAfter : Date . now ( ) + 600_000 , // still 10 minutes away
316+ } ;
317+ writeSession ( onDiskSession ) ;
318+
319+ // Seed singleton before constructing DaemonLoop so the class field binds correctly.
320+ _setSessionManagerForTest ( new SessionManager ( ) ) ;
321+
322+ const pool = {
323+ activeCount : 0 ,
324+ hasTask : ( _id : string ) => false ,
325+ getActiveTaskIds : ( ) => [ ] ,
326+ } as any ;
327+
328+ const freshLoop = new DaemonLoop ( makeClient ( ) , pool , makeRateLimiter ( ) , makePrMonitor ( ) , {
329+ maxConcurrent : 4 ,
330+ pollInterval : 1000 ,
331+ } ) ;
332+ ( freshLoop as any ) . running = true ;
333+
334+ await ( freshLoop as any ) . resumeBackoffSessions ( ) ;
335+
336+ expect ( resumeOneSessionMock ) . not . toHaveBeenCalled ( ) ;
337+ } ) ;
338+ } ) ;
339+
233340 describe ( "both callers pass the same non-empty prompt string" , ( ) => {
234341 it ( "resumeRateLimitedSessions and resumeBackoffSessions pass identical message text" , async ( ) => {
235342 // Call each function in isolation so call indices are unambiguous.
0 commit comments