11import { createHash } from "node:crypto" ;
2+ import { constants as FS_CONSTANTS , createReadStream } from "node:fs" ;
23import fs from "node:fs/promises" ;
34import path from "node:path" ;
45import {
@@ -52,6 +53,14 @@ const AUTH_PROFILE_VOLATILE_FIELDS: ReadonlySet<string> = new Set([
5253 */
5354const MAX_AUTH_PROFILES_BYTES = 8 * 1024 * 1024 ;
5455
56+ /**
57+ * Hard cap on the bytes we will read + hash from models.json (Aisle
58+ * medium #2 on PR #73260). Realistic models.json sizes are dominated
59+ * by listed models per provider; ~1 MiB is plenty of headroom while
60+ * bounding the worst-case allocation.
61+ */
62+ const MAX_MODELS_JSON_BYTES = 1 * 1024 * 1024 ;
63+
5564/**
5665 * Maximum recursion depth when stripping volatile fields. Bounds the
5766 * recursive walk so deeply-nested JSON cannot stack-overflow the gateway
@@ -72,41 +81,105 @@ const DANGEROUS_PROTO_KEYS: ReadonlySet<string> = new Set([
7281] ) ;
7382
7483/**
75- * Compute a content-based fingerprint for auth-profiles.json that is
76- * stable across OAuth token rotations. Returns null if the file does
77- * not exist; falls back to hashing raw bytes if JSON parsing fails (so
78- * structural changes still register, just without canonicalization).
84+ * Stream-hash a regular file with bounded memory. Closes a family of
85+ * issues raised on PR #73260:
86+ * - Codex P1 "Enforce size limit when hashing oversized auth-profiles":
87+ * the previous oversize-branch did fs.readFile(path) which pulled the
88+ * entire file into memory regardless of MAX_AUTH_PROFILES_BYTES.
89+ * - Aisle medium #2 (CWE-400 unbounded read): same problem on
90+ * models.json hashing.
91+ * - Aisle medium #3 (CWE-59 symlink-following reads): rejects symlinks
92+ * and non-regular files via lstat before opening. Uses O_NOFOLLOW
93+ * where supported so a symlink swap-in between lstat and open also
94+ * fails closed.
95+ * - Returns null on any abnormality so callers force a re-plan.
96+ *
97+ * The streaming reader is destroyed if accumulated bytes exceed maxBytes,
98+ * so an attacker cannot grow the file between lstat and read past the
99+ * cap.
79100 */
80- async function readAuthProfilesStableHash ( pathname : string ) : Promise < string | null > {
81- // Bound the read by file size before pulling it into memory + parsing.
82- // Above the cap we hash raw bytes (already-streamed by readFile) instead
83- // of running JSON.parse + the recursive transform.
84- const stat = await fs . stat ( pathname ) . catch ( ( ) => null ) ;
85- if ( ! stat ) {
101+ async function safeHashRegularFile (
102+ pathname : string ,
103+ maxBytes : number ,
104+ ) : Promise < { hash : string ; raw : Buffer | null } | null > {
105+ // lstat + isFile() + isSymbolicLink() rejects symlinks and any
106+ // non-regular file (directory, socket, FIFO, device).
107+ const lst = await fs . lstat ( pathname ) . catch ( ( ) => null ) ;
108+ if ( ! lst || lst . isSymbolicLink ( ) || ! lst . isFile ( ) ) {
86109 return null ;
87110 }
88- if ( stat . size > MAX_AUTH_PROFILES_BYTES ) {
89- let raw : Buffer ;
90- try {
91- raw = await fs . readFile ( pathname ) ;
92- } catch {
93- return null ;
94- }
95- return createHash ( "sha256" ) . update ( raw ) . digest ( "hex" ) ;
111+ if ( lst . size > maxBytes ) {
112+ // File too large at lstat time — don't even open it. Caller forces
113+ // re-plan. We return a deterministic sentinel hash for size-
114+ // exceeded so size changes still invalidate the cache.
115+ return { hash : `oversize:${ lst . size } ` , raw : null } ;
96116 }
97- let raw : string ;
117+ // Open with O_NOFOLLOW (where the platform supports it) to close a
118+ // narrow TOCTOU window between lstat and open: if a symlink is
119+ // swapped in after lstat succeeds, the open will fail (ELOOP) instead
120+ // of following the link.
121+ const flags =
122+ typeof FS_CONSTANTS . O_NOFOLLOW === "number"
123+ ? FS_CONSTANTS . O_RDONLY | FS_CONSTANTS . O_NOFOLLOW
124+ : FS_CONSTANTS . O_RDONLY ;
125+ let fh : Awaited < ReturnType < typeof fs . open > > | null = null ;
98126 try {
99- raw = await fs . readFile ( pathname , "utf8" ) ;
127+ fh = await fs . open ( pathname , flags ) ;
128+ // fstat after open: if the open raced past a symlink swap (only
129+ // possible on platforms without O_NOFOLLOW), the fd should still
130+ // refer to a regular file.
131+ const fst = await fh . stat ( ) ;
132+ if ( ! fst . isFile ( ) || fst . size > maxBytes ) {
133+ return null ;
134+ }
135+ const stream = createReadStream ( "" , { fd : fh . fd , autoClose : false , highWaterMark : 64 * 1024 } ) ;
136+ const hash = createHash ( "sha256" ) ;
137+ let seen = 0 ;
138+ const chunks : Buffer [ ] = [ ] ;
139+ await new Promise < void > ( ( resolve , reject ) => {
140+ stream . on ( "data" , ( chunk : Buffer ) => {
141+ seen += chunk . length ;
142+ if ( seen > maxBytes ) {
143+ stream . destroy ( new Error ( "file grew past cap during read" ) ) ;
144+ return ;
145+ }
146+ hash . update ( chunk ) ;
147+ chunks . push ( chunk ) ;
148+ } ) ;
149+ stream . on ( "error" , reject ) ;
150+ stream . on ( "end" , ( ) => resolve ( ) ) ;
151+ } ) ;
152+ return { hash : hash . digest ( "hex" ) , raw : Buffer . concat ( chunks ) } ;
100153 } catch {
101154 return null ;
155+ } finally {
156+ await fh ?. close ( ) . catch ( ( ) => undefined ) ;
157+ }
158+ }
159+
160+ /**
161+ * Compute a content-based fingerprint for auth-profiles.json that is
162+ * stable across OAuth token rotations. Returns null if the file does
163+ * not exist or fails the safe-read checks (symlink, non-regular,
164+ * oversize). Falls back to a raw-content hash if JSON parsing fails
165+ * (so structural changes still register, just without canonicalization).
166+ */
167+ async function readAuthProfilesStableHash ( pathname : string ) : Promise < string | null > {
168+ const safe = await safeHashRegularFile ( pathname , MAX_AUTH_PROFILES_BYTES ) ;
169+ if ( ! safe ) {
170+ return null ;
171+ }
172+ if ( safe . raw === null ) {
173+ // Oversize sentinel — caller invalidates cache by mismatch.
174+ return safe . hash ;
102175 }
103176 let parsed : unknown ;
104177 try {
105- parsed = JSON . parse ( raw ) ;
178+ parsed = JSON . parse ( safe . raw . toString ( "utf8" ) ) ;
106179 } catch {
107- // File exists but is unparseable; hash the raw bytes so we still
108- // detect changes.
109- return createHash ( "sha256" ) . update ( raw ) . digest ( "hex" ) ;
180+ // File exists but is unparseable; the raw-content hash already
181+ // reflects this. Return it as-is so structural changes register .
182+ return safe . hash ;
110183 }
111184 const stable = stripAuthProfilesVolatileFields ( parsed , 0 ) ;
112185 return createHash ( "sha256" ) . update ( stableStringify ( stable ) ) . digest ( "hex" ) ;
@@ -143,22 +216,27 @@ function stripAuthProfilesVolatileFields(value: unknown, depth: number): unknown
143216}
144217
145218/**
146- * Hash the contents of models.json so external edits / partial corruption /
147- * manual tampering invalidate the readyCache. The fingerprint alone
148- * cannot catch external edits because it does not include models.json
149- * state (its contents are the OUTPUT, not an input). Instead we capture
150- * a content hash AT WRITE TIME and verify it on every cache hit.
219+ * Hash the contents of models.json so external edits / partial
220+ * corruption / manual tampering invalidate the readyCache. The
221+ * fingerprint alone cannot catch external edits because it does not
222+ * include models.json state (its contents are the OUTPUT, not an
223+ * input). Instead we capture a content hash AT WRITE TIME and verify
224+ * it on every cache hit.
151225 *
152- * Returns null when the file does not exist — the caller treats this as
153- * "no captured state" and forces a re-plan.
226+ * Returns null when the file is absent OR fails the safe-read checks
227+ * (symlink, non-regular, oversize, or any I/O error). Two consecutive
228+ * absent reads (write-time and check-time) compare equal as `null ===
229+ * null`, which is a valid steady-state cache hit (file legitimately
230+ * does not exist). Any disagreement — including a captured non-null
231+ * hash followed by a null read, or a string hash followed by a different
232+ * string — forces re-plan. This is the intended skip-and-noop
233+ * semantics: stable absence means stable result, drift means re-plan.
234+ * (Greptile P2 on PR #73260 noted the previous JSDoc was the opposite
235+ * of this behaviour.)
154236 */
155237async function readModelsJsonContentHash ( pathname : string ) : Promise < string | null > {
156- try {
157- const raw = await fs . readFile ( pathname ) ;
158- return createHash ( "sha256" ) . update ( raw ) . digest ( "hex" ) ;
159- } catch {
160- return null ;
161- }
238+ const safe = await safeHashRegularFile ( pathname , MAX_MODELS_JSON_BYTES ) ;
239+ return safe ? safe . hash : null ;
162240}
163241
164242function stableStringify ( value : unknown ) : string {
@@ -240,24 +318,36 @@ async function readExistingModelsFile(pathname: string): Promise<{
240318}
241319
242320export async function ensureModelsFileModeForModelsJson ( pathname : string ) : Promise < void > {
243- // CWE-59 (symlink-following chmod) hardening: refuse to chmod a
244- // symlink. fs.chmod follows links, so if an attacker can replace
245- // ${agentDir}/models.json with a symlink pointing at a sensitive file
246- // owned by the gateway user, this best-effort chmod would change
247- // permissions on the link target instead. lstat first; if the path
248- // is a symlink (or anything other than a regular file), bail.
249- let stat : Awaited < ReturnType < typeof fs . lstat > > ;
321+ // CWE-59 + CWE-367 hardening (Aisle high #1 on #72869 + Aisle medium
322+ // #1 on #73260): the previous lstat-then-chmod sequence was racy —
323+ // an attacker who could rename/replace ${agentDir}/models.json
324+ // between lstat() and chmod() could win the race and have chmod()
325+ // follow a swapped-in symlink to an arbitrary file owned by the
326+ // gateway user.
327+ //
328+ // Instead, open the file with O_NOFOLLOW (where supported) so the
329+ // open itself refuses symlinks atomically, then fchmod() through the
330+ // resulting file descriptor. This collapses the check-and-act into a
331+ // single kernel-mediated operation, eliminating the race.
332+ const flags =
333+ typeof FS_CONSTANTS . O_NOFOLLOW === "number"
334+ ? FS_CONSTANTS . O_RDONLY | FS_CONSTANTS . O_NOFOLLOW
335+ : FS_CONSTANTS . O_RDONLY ;
336+ let fh : Awaited < ReturnType < typeof fs . open > > | null = null ;
250337 try {
251- stat = await fs . lstat ( pathname ) ;
338+ fh = await fs . open ( pathname , flags ) ;
339+ const fst = await fh . stat ( ) ;
340+ if ( ! fst . isFile ( ) ) {
341+ return ;
342+ }
343+ await fh . chmod ( 0o600 ) ;
252344 } catch {
253- return ; // best-effort — file may not exist yet
254- }
255- if ( stat . isSymbolicLink ( ) || ! stat . isFile ( ) ) {
256- return ;
345+ // best-effort — file may not exist yet, may be a symlink (open
346+ // fails with ELOOP under O_NOFOLLOW), or may have been deleted
347+ // between checks. Any of these are acceptable no-ops.
348+ } finally {
349+ await fh ?. close ( ) . catch ( ( ) => undefined ) ;
257350 }
258- await fs . chmod ( pathname , 0o600 ) . catch ( ( ) => {
259- // best-effort
260- } ) ;
261351}
262352
263353export async function writeModelsFileAtomicForModelsJson (
0 commit comments