Skip to content

Commit aef7903

Browse files
committed
fix(media): allow trusted generated html attachments
1 parent 182d605 commit aef7903

2 files changed

Lines changed: 191 additions & 9 deletions

File tree

src/media/web-media.test.ts

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import fs from "node:fs/promises";
2+
import os from "node:os";
23
import path from "node:path";
34
import { pathToFileURL } from "node:url";
45
import JSZip from "jszip";
@@ -650,9 +651,111 @@ describe("loadWebMedia", () => {
650651
expect(result.contentType).toBe("text/markdown");
651652
});
652653

653-
it("rejects host-read HTML files without a separate security-boundary approval", async () => {
654+
it("allows trusted generated host-read HTML reports under OpenClaw temp root", async () => {
654655
const htmlFile = path.join(fixtureRoot, "report.html");
655656
await fs.writeFile(htmlFile, "<!doctype html><title>Report</title><h1>Report</h1>\n", "utf8");
657+
const result = await loadWebMedia(htmlFile, {
658+
maxBytes: 1024 * 1024,
659+
localRoots: "any",
660+
readFile: async (filePath) => await fs.readFile(filePath),
661+
hostReadCapability: true,
662+
});
663+
expect(result.kind).toBe("document");
664+
expect(result.contentType).toBe("text/html");
665+
});
666+
667+
it("rejects host-read HTML files outside the trusted OpenClaw temp root", async () => {
668+
const outsideRoot = await fs.mkdtemp(path.join(os.tmpdir(), "web-media-host-html-"));
669+
const htmlFile = path.join(outsideRoot, "report.html");
670+
await fs.writeFile(htmlFile, "<!doctype html><title>Report</title><h1>Report</h1>\n", "utf8");
671+
try {
672+
await expectLoadWebMediaErrorCode(
673+
loadWebMedia(htmlFile, {
674+
maxBytes: 1024 * 1024,
675+
localRoots: "any",
676+
readFile: async (filePath) => await fs.readFile(filePath),
677+
hostReadCapability: true,
678+
}),
679+
"path-not-allowed",
680+
);
681+
} finally {
682+
await fs.rm(outsideRoot, { recursive: true, force: true });
683+
}
684+
});
685+
686+
it("rejects trusted host-read HTML symlinks that resolve outside OpenClaw temp root", async () => {
687+
const outsideRoot = await fs.mkdtemp(path.join(os.tmpdir(), "web-media-host-html-"));
688+
const outsideHtml = path.join(outsideRoot, "report.html");
689+
const htmlLink = path.join(fixtureRoot, "linked-report.html");
690+
await fs.writeFile(
691+
outsideHtml,
692+
"<!doctype html><title>Outside</title><body>secret</body>\n",
693+
"utf8",
694+
);
695+
try {
696+
await fs.symlink(outsideHtml, htmlLink);
697+
} catch (error) {
698+
await fs.rm(outsideRoot, { recursive: true, force: true });
699+
if ((error as NodeJS.ErrnoException).code === "EPERM") {
700+
return;
701+
}
702+
throw error;
703+
}
704+
try {
705+
await expectLoadWebMediaErrorCode(
706+
loadWebMedia(htmlLink, {
707+
maxBytes: 1024 * 1024,
708+
localRoots: "any",
709+
readFile: async (filePath) => await fs.readFile(filePath),
710+
hostReadCapability: true,
711+
}),
712+
"path-not-allowed",
713+
);
714+
} finally {
715+
await fs.rm(htmlLink, { force: true });
716+
await fs.rm(outsideRoot, { recursive: true, force: true });
717+
}
718+
});
719+
720+
it("rejects trusted host-read HTML hardlinks to files outside OpenClaw temp root", async () => {
721+
const outsideRoot = await fs.mkdtemp(
722+
path.join(path.dirname(resolvePreferredOpenClawTmpDir()), "web-media-host-html-"),
723+
);
724+
const outsideHtml = path.join(outsideRoot, "report.html");
725+
const htmlLink = path.join(fixtureRoot, "hardlinked-report.html");
726+
await fs.writeFile(
727+
outsideHtml,
728+
"<!doctype html><title>Outside</title><body>secret</body>\n",
729+
"utf8",
730+
);
731+
try {
732+
await fs.link(outsideHtml, htmlLink);
733+
} catch (error) {
734+
await fs.rm(outsideRoot, { recursive: true, force: true });
735+
if ((error as NodeJS.ErrnoException).code === "EXDEV") {
736+
return;
737+
}
738+
throw error;
739+
}
740+
try {
741+
await expectLoadWebMediaErrorCode(
742+
loadWebMedia(htmlLink, {
743+
maxBytes: 1024 * 1024,
744+
localRoots: "any",
745+
readFile: async (filePath) => await fs.readFile(filePath),
746+
hostReadCapability: true,
747+
}),
748+
"path-not-allowed",
749+
);
750+
} finally {
751+
await fs.rm(htmlLink, { force: true });
752+
await fs.rm(outsideRoot, { recursive: true, force: true });
753+
}
754+
});
755+
756+
it("rejects trusted host-read HTML paths without HTML document shape", async () => {
757+
const htmlFile = path.join(fixtureRoot, "report.html");
758+
await fs.writeFile(htmlFile, "status,value\nok,1\n", "utf8");
656759
await expectLoadWebMediaErrorCode(
657760
loadWebMedia(htmlFile, {
658761
maxBytes: 1024 * 1024,

src/media/web-media.ts

Lines changed: 87 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import { lstat, realpath } from "node:fs/promises";
12
import path from "node:path";
23
import { logVerbose, shouldLogVerbose } from "../globals.js";
34
import { formatErrorMessage } from "../infra/errors.js";
45
import { FsSafeError, readLocalFileSafely } from "../infra/fs-safe.js";
56
import { assertNoWindowsNetworkPath, safeFileURLToPath } from "../infra/local-file-access.js";
67
import type { PinnedDispatcherPolicy, SsrFPolicy } from "../infra/net/ssrf.js";
8+
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
79
import { getActivePluginRegistry } from "../plugins/runtime.js";
810
import { uniqueValues } from "../shared/string-normalization.js";
911
import { resolveUserPath } from "../utils.js";
@@ -157,6 +159,9 @@ const HOST_READ_TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]);
157159
// security-boundary review, but extension-declared .html files still need to
158160
// fail closed instead of falling through to binary/media sniffing.
159161
const HOST_READ_DECLARED_TEXT_MIMES = new Set([...HOST_READ_TEXT_PLAIN_ALIASES, "text/html"]);
162+
const HOST_READ_DECLARED_TEXT_ERROR =
163+
"hostReadCapability permits only validated plain-text CSV/Markdown documents " +
164+
"and trusted generated HTML reports for local reads";
160165
const MB = 1024 * 1024;
161166

162167
function getTextStats(text: string): { printableRatio: number } {
@@ -226,18 +231,71 @@ function decodeHostReadText(buffer: Buffer): string | undefined {
226231
}
227232

228233
function isValidatedHostReadText(buffer?: Buffer): boolean {
234+
return getValidatedHostReadText(buffer) !== undefined;
235+
}
236+
237+
function getValidatedHostReadText(buffer?: Buffer): string | undefined {
229238
if (!buffer) {
230-
return false;
239+
return undefined;
231240
}
232241
if (buffer.length === 0) {
233-
return true;
242+
return "";
234243
}
235244
const text = decodeHostReadText(buffer);
236245
if (text === undefined) {
237-
return false;
246+
return undefined;
238247
}
239248
const { printableRatio } = getTextStats(text);
240-
return printableRatio > 0.95;
249+
return printableRatio > 0.95 ? text : undefined;
250+
}
251+
252+
function isPathInsideRoot(filePath: string | undefined, root: string): boolean {
253+
if (!filePath) {
254+
return false;
255+
}
256+
const relative = path.relative(path.resolve(root), path.resolve(filePath));
257+
return (
258+
relative === "" || (!!relative && !relative.startsWith("..") && !path.isAbsolute(relative))
259+
);
260+
}
261+
262+
function hasHtmlDocumentShape(text: string): boolean {
263+
const sample = text.trimStart().slice(0, 8192);
264+
return /^(?:<!doctype\s+html\b|<html\b)/iu.test(sample) || /<\/(?:html|body)>/iu.test(sample);
265+
}
266+
267+
async function isTrustedGeneratedHostReadHtmlPath(filePath: string | undefined): Promise<boolean> {
268+
if (!filePath) {
269+
return false;
270+
}
271+
const info = await lstat(filePath).catch(() => undefined);
272+
if (!info?.isFile() || info.isSymbolicLink() || info.nlink !== 1) {
273+
return false;
274+
}
275+
const [resolvedFilePath, resolvedTmpRoot] = await Promise.all([
276+
realpath(filePath).catch(() => undefined),
277+
realpath(resolvePreferredOpenClawTmpDir()).catch(() => undefined),
278+
]);
279+
return Boolean(
280+
resolvedFilePath && resolvedTmpRoot && isPathInsideRoot(resolvedFilePath, resolvedTmpRoot),
281+
);
282+
}
283+
284+
function isTrustedGeneratedHostReadHtml(params: {
285+
filePath?: string;
286+
sniffedContentType?: string;
287+
buffer?: Buffer;
288+
trustedGeneratedHtmlPath?: boolean;
289+
}): boolean {
290+
const sniffedMime = normalizeMimeType(params.sniffedContentType);
291+
if (sniffedMime && sniffedMime !== "text/html") {
292+
return false;
293+
}
294+
if (!params.trustedGeneratedHtmlPath) {
295+
return false;
296+
}
297+
const text = getValidatedHostReadText(params.buffer);
298+
return text !== undefined && hasHtmlDocumentShape(text);
241299
}
242300

243301
function formatMb(bytes: number, digits = 2): string {
@@ -268,6 +326,7 @@ function assertHostReadMediaAllowed(params: {
268326
filePath?: string;
269327
kind: MediaKind | undefined;
270328
buffer?: Buffer;
329+
trustedGeneratedHtmlPath?: boolean;
271330
}): void {
272331
const declaredMime = normalizeMimeType(mimeTypeFromFilePath(params.filePath));
273332
const normalizedMime = normalizeMimeType(params.contentType);
@@ -276,6 +335,17 @@ function assertHostReadMediaAllowed(params: {
276335
// hits (for example BOM-prefixed 0xFF data sniffing as audio/mpeg), and
277336
// host-read should reject those instead of returning early on the sniff.
278337
if (declaredMime && HOST_READ_DECLARED_TEXT_MIMES.has(declaredMime)) {
338+
if (
339+
declaredMime === "text/html" &&
340+
isTrustedGeneratedHostReadHtml({
341+
filePath: params.filePath,
342+
sniffedContentType: params.sniffedContentType,
343+
buffer: params.buffer,
344+
trustedGeneratedHtmlPath: params.trustedGeneratedHtmlPath,
345+
})
346+
) {
347+
return;
348+
}
279349
if (
280350
HOST_READ_TEXT_PLAIN_ALIASES.has(declaredMime) &&
281351
!params.sniffedContentType &&
@@ -284,10 +354,7 @@ function assertHostReadMediaAllowed(params: {
284354
) {
285355
return;
286356
}
287-
throw new LocalMediaAccessError(
288-
"path-not-allowed",
289-
"hostReadCapability permits only validated plain-text CSV/Markdown documents for local reads",
290-
);
357+
throw new LocalMediaAccessError("path-not-allowed", HOST_READ_DECLARED_TEXT_ERROR);
291358
}
292359
const sniffedKind = kindFromMime(params.sniffedContentType);
293360
if (sniffedKind === "image" || sniffedKind === "audio" || sniffedKind === "video") {
@@ -915,6 +982,17 @@ async function loadWebMediaInternal(
915982
await assertLocalMediaAllowed(mediaUrl, localRoots, { inboundRoots });
916983
}
917984

985+
const hostReadDeclaredMime = hostReadCapability
986+
? normalizeMimeType(mimeTypeFromFilePath(mediaUrl))
987+
: undefined;
988+
const trustedGeneratedHtmlPath =
989+
hostReadDeclaredMime === "text/html"
990+
? await isTrustedGeneratedHostReadHtmlPath(mediaUrl)
991+
: false;
992+
if (hostReadDeclaredMime === "text/html" && !trustedGeneratedHtmlPath) {
993+
throw new LocalMediaAccessError("path-not-allowed", HOST_READ_DECLARED_TEXT_ERROR);
994+
}
995+
918996
// Local path
919997
let data: Buffer;
920998
if (readFileOverride) {
@@ -955,6 +1033,7 @@ async function loadWebMediaInternal(
9551033
filePath: mediaUrl,
9561034
kind,
9571035
buffer: data,
1036+
trustedGeneratedHtmlPath,
9581037
});
9591038
}
9601039
let fileName = basenameFromAnyPath(mediaUrl) || undefined;

0 commit comments

Comments
 (0)