1+ import { lstat , realpath } from "node:fs/promises" ;
12import path from "node:path" ;
23import { logVerbose , shouldLogVerbose } from "../globals.js" ;
34import { formatErrorMessage } from "../infra/errors.js" ;
45import { FsSafeError , readLocalFileSafely } from "../infra/fs-safe.js" ;
56import { assertNoWindowsNetworkPath , safeFileURLToPath } from "../infra/local-file-access.js" ;
67import type { PinnedDispatcherPolicy , SsrFPolicy } from "../infra/net/ssrf.js" ;
8+ import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js" ;
79import { getActivePluginRegistry } from "../plugins/runtime.js" ;
810import { uniqueValues } from "../shared/string-normalization.js" ;
911import { resolveUserPath } from "../utils.js" ;
@@ -157,6 +159,9 @@ const HOST_READ_TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]);
157159// security-boundary review, but extension-declared .html files still need to
158160// fail closed instead of falling through to binary/media sniffing.
159161const HOST_READ_DECLARED_TEXT_MIMES = new Set ( [ ...HOST_READ_TEXT_PLAIN_ALIASES , "text/html" ] ) ;
162+ const HOST_READ_DECLARED_TEXT_ERROR =
163+ "hostReadCapability permits only validated plain-text CSV/Markdown documents " +
164+ "and trusted generated HTML reports for local reads" ;
160165const MB = 1024 * 1024 ;
161166
162167function getTextStats ( text : string ) : { printableRatio : number } {
@@ -226,18 +231,71 @@ function decodeHostReadText(buffer: Buffer): string | undefined {
226231}
227232
228233function isValidatedHostReadText ( buffer ?: Buffer ) : boolean {
234+ return getValidatedHostReadText ( buffer ) !== undefined ;
235+ }
236+
237+ function getValidatedHostReadText ( buffer ?: Buffer ) : string | undefined {
229238 if ( ! buffer ) {
230- return false ;
239+ return undefined ;
231240 }
232241 if ( buffer . length === 0 ) {
233- return true ;
242+ return "" ;
234243 }
235244 const text = decodeHostReadText ( buffer ) ;
236245 if ( text === undefined ) {
237- return false ;
246+ return undefined ;
238247 }
239248 const { printableRatio } = getTextStats ( text ) ;
240- return printableRatio > 0.95 ;
249+ return printableRatio > 0.95 ? text : undefined ;
250+ }
251+
252+ function isPathInsideRoot ( filePath : string | undefined , root : string ) : boolean {
253+ if ( ! filePath ) {
254+ return false ;
255+ }
256+ const relative = path . relative ( path . resolve ( root ) , path . resolve ( filePath ) ) ;
257+ return (
258+ relative === "" || ( ! ! relative && ! relative . startsWith ( ".." ) && ! path . isAbsolute ( relative ) )
259+ ) ;
260+ }
261+
262+ function hasHtmlDocumentShape ( text : string ) : boolean {
263+ const sample = text . trimStart ( ) . slice ( 0 , 8192 ) ;
264+ return / ^ (?: < ! d o c t y p e \s + h t m l \b | < h t m l \b ) / iu. test ( sample ) || / < \/ (?: h t m l | b o d y ) > / iu. test ( sample ) ;
265+ }
266+
267+ async function isTrustedGeneratedHostReadHtmlPath ( filePath : string | undefined ) : Promise < boolean > {
268+ if ( ! filePath ) {
269+ return false ;
270+ }
271+ const info = await lstat ( filePath ) . catch ( ( ) => undefined ) ;
272+ if ( ! info ?. isFile ( ) || info . isSymbolicLink ( ) || info . nlink !== 1 ) {
273+ return false ;
274+ }
275+ const [ resolvedFilePath , resolvedTmpRoot ] = await Promise . all ( [
276+ realpath ( filePath ) . catch ( ( ) => undefined ) ,
277+ realpath ( resolvePreferredOpenClawTmpDir ( ) ) . catch ( ( ) => undefined ) ,
278+ ] ) ;
279+ return Boolean (
280+ resolvedFilePath && resolvedTmpRoot && isPathInsideRoot ( resolvedFilePath , resolvedTmpRoot ) ,
281+ ) ;
282+ }
283+
284+ function isTrustedGeneratedHostReadHtml ( params : {
285+ filePath ?: string ;
286+ sniffedContentType ?: string ;
287+ buffer ?: Buffer ;
288+ trustedGeneratedHtmlPath ?: boolean ;
289+ } ) : boolean {
290+ const sniffedMime = normalizeMimeType ( params . sniffedContentType ) ;
291+ if ( sniffedMime && sniffedMime !== "text/html" ) {
292+ return false ;
293+ }
294+ if ( ! params . trustedGeneratedHtmlPath ) {
295+ return false ;
296+ }
297+ const text = getValidatedHostReadText ( params . buffer ) ;
298+ return text !== undefined && hasHtmlDocumentShape ( text ) ;
241299}
242300
243301function formatMb ( bytes : number , digits = 2 ) : string {
@@ -268,6 +326,7 @@ function assertHostReadMediaAllowed(params: {
268326 filePath ?: string ;
269327 kind : MediaKind | undefined ;
270328 buffer ?: Buffer ;
329+ trustedGeneratedHtmlPath ?: boolean ;
271330} ) : void {
272331 const declaredMime = normalizeMimeType ( mimeTypeFromFilePath ( params . filePath ) ) ;
273332 const normalizedMime = normalizeMimeType ( params . contentType ) ;
@@ -276,6 +335,17 @@ function assertHostReadMediaAllowed(params: {
276335 // hits (for example BOM-prefixed 0xFF data sniffing as audio/mpeg), and
277336 // host-read should reject those instead of returning early on the sniff.
278337 if ( declaredMime && HOST_READ_DECLARED_TEXT_MIMES . has ( declaredMime ) ) {
338+ if (
339+ declaredMime === "text/html" &&
340+ isTrustedGeneratedHostReadHtml ( {
341+ filePath : params . filePath ,
342+ sniffedContentType : params . sniffedContentType ,
343+ buffer : params . buffer ,
344+ trustedGeneratedHtmlPath : params . trustedGeneratedHtmlPath ,
345+ } )
346+ ) {
347+ return ;
348+ }
279349 if (
280350 HOST_READ_TEXT_PLAIN_ALIASES . has ( declaredMime ) &&
281351 ! params . sniffedContentType &&
@@ -284,10 +354,7 @@ function assertHostReadMediaAllowed(params: {
284354 ) {
285355 return ;
286356 }
287- throw new LocalMediaAccessError (
288- "path-not-allowed" ,
289- "hostReadCapability permits only validated plain-text CSV/Markdown documents for local reads" ,
290- ) ;
357+ throw new LocalMediaAccessError ( "path-not-allowed" , HOST_READ_DECLARED_TEXT_ERROR ) ;
291358 }
292359 const sniffedKind = kindFromMime ( params . sniffedContentType ) ;
293360 if ( sniffedKind === "image" || sniffedKind === "audio" || sniffedKind === "video" ) {
@@ -915,6 +982,17 @@ async function loadWebMediaInternal(
915982 await assertLocalMediaAllowed ( mediaUrl , localRoots , { inboundRoots } ) ;
916983 }
917984
985+ const hostReadDeclaredMime = hostReadCapability
986+ ? normalizeMimeType ( mimeTypeFromFilePath ( mediaUrl ) )
987+ : undefined ;
988+ const trustedGeneratedHtmlPath =
989+ hostReadDeclaredMime === "text/html"
990+ ? await isTrustedGeneratedHostReadHtmlPath ( mediaUrl )
991+ : false ;
992+ if ( hostReadDeclaredMime === "text/html" && ! trustedGeneratedHtmlPath ) {
993+ throw new LocalMediaAccessError ( "path-not-allowed" , HOST_READ_DECLARED_TEXT_ERROR ) ;
994+ }
995+
918996 // Local path
919997 let data : Buffer ;
920998 if ( readFileOverride ) {
@@ -955,6 +1033,7 @@ async function loadWebMediaInternal(
9551033 filePath : mediaUrl ,
9561034 kind,
9571035 buffer : data ,
1036+ trustedGeneratedHtmlPath,
9581037 } ) ;
9591038 }
9601039 let fileName = basenameFromAnyPath ( mediaUrl ) || undefined ;
0 commit comments