Skip to content

Commit ccb2708

Browse files
jayzalowitzclaude
andcommitted
fix(#187 AC#2): cancel through verify/install + stale partial + dead code
Four findings from Copilot's re-review of the security/correctness fixes: - **Cancel couldn't stop verify/install**: the runner removed the inFlight handle BEFORE verification, so cancelDownload() couldn't abort SHA-256 or rename. After verify completed, the runner would overwrite the user's cancelled status with complete/failed. Now the handle stays in inFlight through verify+install, and the runner checks handle.cancelled at every phase boundary (pre-verify, post-hash, pre-install, post-rename) and bails without writing terminal status if cancel fired. inFlight.delete moved to the final boundary. - **Stale .partial corrupting fresh resume**: a previous attempt that failed to delete .partial (e.g., Windows file lock during cancel) could cause a brand-new download row to "resume" from stale bytes. Now we only resume when bytes_downloaded > 0 on the row; otherwise we delete any pre-existing partial before starting fresh. - **UI showed Pause for verifying/installing but backend rejected it**: pauseDownload() returns ok:false for those statuses. Added PAUSABLE_STATUSES set (pending + downloading only); the action bar shows Cancel-only during verify/install. Cancel now actually works through those phases (per the runner change above). - **ensureDirectory() dead code**: exported but unused. Removed, along with the now-unused `dirname` import. Build clean. 25 route tests passing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 255d446 commit ccb2708

2 files changed

Lines changed: 44 additions & 13 deletions

File tree

apps/api/src/embedded-llm/downloader.ts

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import {
88
statSync,
99
unlinkSync,
1010
} from 'node:fs';
11-
import { dirname, join } from 'node:path';
11+
import { join } from 'node:path';
1212
import { homedir } from 'node:os';
1313
import { Readable } from 'node:stream';
1414
import { pipeline } from 'node:stream/promises';
@@ -218,13 +218,20 @@ async function runDownload(download: ModelDownloadRow): Promise<void> {
218218
}
219219

220220
const partialPath = partialPathFor(download.target_path);
221+
// Only resume from the partial if THIS row had progress recorded.
222+
// A fresh row (bytes_downloaded === 0) finding a partial on disk
223+
// means the previous attempt didn't clean up — e.g., a cancel that
224+
// failed to unlink on Windows. Resuming from those stale bytes
225+
// would corrupt the download.
221226
let resumeFrom = 0;
222-
if (existsSync(partialPath)) {
227+
if (download.bytes_downloaded > 0 && existsSync(partialPath)) {
223228
try {
224229
resumeFrom = statSync(partialPath).size;
225230
} catch {
226231
resumeFrom = 0;
227232
}
233+
} else if (existsSync(partialPath)) {
234+
try { unlinkSync(partialPath); } catch { /* best effort */ }
228235
}
229236

230237
const controller = new AbortController();
@@ -336,7 +343,13 @@ async function runDownload(download: ModelDownloadRow): Promise<void> {
336343
return;
337344
}
338345

339-
inFlight.delete(download.id);
346+
// Bail if the user clicked Cancel during the byte transfer. The
347+
// network stream finishes after a controller.abort(), so we may
348+
// arrive here with handle.cancelled already true.
349+
if (handle.cancelled) {
350+
inFlight.delete(download.id);
351+
return;
352+
}
340353
await modelDownloadRepository.setStatus(download.id, 'verifying', {
341354
bytesDownloaded: totalBytes,
342355
});
@@ -352,10 +365,18 @@ async function runDownload(download: ModelDownloadRow): Promise<void> {
352365
// Stream the file through the hash — multi-GB GGUFs would OOM the
353366
// API process if we readFile()'d the whole thing into memory.
354367
const actual = await computeSha256(partialPath);
368+
// Cancel can fire mid-hash. If it did, cancelDownload() already
369+
// marked the row 'cancelled' and removed the partial — don't
370+
// overwrite that with verify's outcome.
371+
if (handle.cancelled) {
372+
inFlight.delete(download.id);
373+
return;
374+
}
355375
if (actual !== expectedHash) {
356376
// Corrupt download. Delete the partial — user should retry,
357377
// and we don't want a half-bad GGUF lingering on disk.
358378
try { unlinkSync(partialPath); } catch { /* best effort */ }
379+
inFlight.delete(download.id);
359380
await modelDownloadRepository.setStatus(download.id, 'failed', {
360381
error: `sha256 mismatch: expected ${expectedHash}, got ${actual}`,
361382
bytesDownloaded: 0,
@@ -364,18 +385,26 @@ async function runDownload(download: ModelDownloadRow): Promise<void> {
364385
}
365386
}
366387

388+
if (handle.cancelled) {
389+
inFlight.delete(download.id);
390+
return;
391+
}
367392
await modelDownloadRepository.setStatus(download.id, 'installing');
368393
// Atomic rename — partial → final. Same filesystem so this is O(1).
369394
try {
370395
if (existsSync(download.target_path)) unlinkSync(download.target_path);
371396
renameSync(partialPath, download.target_path);
372397
} catch (err) {
398+
inFlight.delete(download.id);
399+
if (handle.cancelled) return;
373400
await modelDownloadRepository.setStatus(download.id, 'failed', {
374401
error: `install (rename) failed: ${err instanceof Error ? err.message : String(err)}`,
375402
});
376403
return;
377404
}
378405

406+
inFlight.delete(download.id);
407+
if (handle.cancelled) return;
379408
await modelDownloadRepository.setStatus(download.id, 'complete');
380409
log.info('Model download complete', {
381410
downloadId: download.id,
@@ -403,15 +432,6 @@ export async function recoverOnBoot(): Promise<void> {
403432
}
404433
}
405434

406-
/**
407-
* Helper used by the dirname-only check in tests. Exported so the
408-
* route handler doesn't need to import `node:path` separately.
409-
*/
410-
export function ensureDirectory(filePath: string): void {
411-
const dir = dirname(filePath);
412-
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
413-
}
414-
415435
/**
416436
* Stream the file through a SHA-256 hash. Multi-GB GGUFs would OOM
417437
* the API process if we loaded them into memory whole.

apps/web/public/js/components/embedded-llm-card.js

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,14 @@ function getCurrentUserId() {
3535
}
3636
}
3737

38+
// Statuses that mean "something is in flight" — drives the polling
39+
// loop and keeps the card showing a progress UI rather than the picker.
3840
const ACTIVE_STATUSES = new Set(['pending', 'downloading', 'verifying', 'installing']);
41+
// Subset where the backend can actually pause. pauseDownload() returns
42+
// ok:false for verifying/installing, so we hide the Pause button there.
43+
// Cancel still works through verify/install — the runner checks the
44+
// cancelled flag at phase boundaries.
45+
const PAUSABLE_STATUSES = new Set(['pending', 'downloading']);
3946

4047
const POLL_INTERVAL_MS = 1000;
4148

@@ -94,12 +101,16 @@ function downloadCardHtml(download, modelName) {
94101
const bytesSoFar = formatBytes(download.bytesDownloaded);
95102
const totalSize = formatBytes(download.totalBytes);
96103
const isActive = ACTIVE_STATUSES.has(status);
104+
const isPausable = PAUSABLE_STATUSES.has(status);
97105
const isPaused = status === 'paused';
98106
const isError = status === 'failed';
99107

100108
const actionButtons = (() => {
101109
if (isActive) {
102-
return `<button class="btn btn-outline btn-sm" data-action="embedded-pause-download" data-download-id="${escapeHtml(download.id)}">Pause</button>
110+
const pauseBtn = isPausable
111+
? `<button class="btn btn-outline btn-sm" data-action="embedded-pause-download" data-download-id="${escapeHtml(download.id)}">Pause</button>`
112+
: '';
113+
return `${pauseBtn}
103114
<button class="btn btn-outline btn-sm" data-action="embedded-cancel-download" data-download-id="${escapeHtml(download.id)}">Cancel</button>`;
104115
}
105116
if (isPaused) {

0 commit comments

Comments
 (0)