@@ -148,12 +148,15 @@ const HOST_READ_ALLOWED_DOCUMENT_MIMES = new Set([
148148 "application/x-tar" ,
149149 "application/zip" ,
150150 "text/csv" ,
151- "text/html" ,
152151 "text/markdown" ,
153152] ) ;
154- // file-type returns undefined (no magic bytes) for plain-text formats like CSV,
155- // HTML, and Markdown, so host-read needs an explicit text validation fallback.
156- const HOST_READ_TEXT_PLAIN_ALIASES = new Set ( [ "text/csv" , "text/html" , "text/markdown" ] ) ;
153+ // file-type returns undefined (no magic bytes) for plain-text formats like CSV
154+ // and Markdown, so host-read needs an explicit text validation fallback.
155+ const HOST_READ_TEXT_PLAIN_ALIASES = new Set ( [ "text/csv" , "text/markdown" ] ) ;
156+ // HTML remains deliberately outside the host-read allowlist pending a separate
157+ // security-boundary review, but extension-declared .html files still need to
158+ // fail closed instead of falling through to binary/media sniffing.
159+ const HOST_READ_DECLARED_TEXT_MIMES = new Set ( [ ...HOST_READ_TEXT_PLAIN_ALIASES , "text/html" ] ) ;
157160const MB = 1024 * 1024 ;
158161
159162function getTextStats ( text : string ) : { printableRatio : number } {
@@ -272,13 +275,18 @@ function assertHostReadMediaAllowed(params: {
272275 // text validator path. Some opaque blobs can still produce bogus binary MIME
273276 // hits (for example BOM-prefixed 0xFF data sniffing as audio/mpeg), and
274277 // host-read should reject those instead of returning early on the sniff.
275- if ( declaredMime && HOST_READ_TEXT_PLAIN_ALIASES . has ( declaredMime ) ) {
276- if ( ! params . sniffedContentType && params . buffer && isValidatedHostReadText ( params . buffer ) ) {
278+ if ( declaredMime && HOST_READ_DECLARED_TEXT_MIMES . has ( declaredMime ) ) {
279+ if (
280+ HOST_READ_TEXT_PLAIN_ALIASES . has ( declaredMime ) &&
281+ ! params . sniffedContentType &&
282+ params . buffer &&
283+ isValidatedHostReadText ( params . buffer )
284+ ) {
277285 return ;
278286 }
279287 throw new LocalMediaAccessError (
280288 "path-not-allowed" ,
281- "hostReadCapability permits only validated plain-text CSV/HTML/ Markdown documents for local reads" ,
289+ "hostReadCapability permits only validated plain-text CSV/Markdown documents for local reads" ,
282290 ) ;
283291 }
284292 const sniffedKind = kindFromMime ( params . sniffedContentType ) ;
@@ -299,10 +307,10 @@ function assertHostReadMediaAllowed(params: {
299307 ) {
300308 return ;
301309 }
302- // CSV / HTML / Markdown exception: file-type v22 returns undefined (not "text/plain") for
310+ // CSV / Markdown exception: file-type v22 returns undefined (not "text/plain") for
303311 // plain-text buffers that have no binary magic bytes. Allow these formats when:
304312 // - sniffedMime is undefined (no binary signature detected by file-type)
305- // - The extension-derived MIME is text/csv, text/html, or text/markdown (operator intent)
313+ // - The extension-derived MIME is text/csv or text/markdown (operator intent)
306314 // - The buffer decodes as actual text instead of opaque binary bytes
307315 if (
308316 ! sniffedMime &&
@@ -325,7 +333,7 @@ function assertHostReadMediaAllowed(params: {
325333 }
326334 throw new LocalMediaAccessError (
327335 "path-not-allowed" ,
328- `Host-local media sends only allow buffer-verified images, audio, video, PDF, Office documents, archives, CSV, HTML, and Markdown (got ${ sniffedMime ?? normalizedMime ?? "unknown" } ).` ,
336+ `Host-local media sends only allow buffer-verified images, audio, video, PDF, Office documents, archives, CSV, and Markdown (got ${ sniffedMime ?? normalizedMime ?? "unknown" } ).` ,
329337 ) ;
330338}
331339
0 commit comments