Skip to content

Commit 6b1fe51

Browse files
committed
fix(auto-reply): split html host-read boundary
1 parent f688749 commit 6b1fe51

3 files changed

Lines changed: 32 additions & 33 deletions

File tree

src/auto-reply/reply/reply-media-paths.test.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ describe("createReplyMediaPathNormalizer", () => {
257257
workspaceDir: "/tmp/sandboxes/session-1",
258258
containerWorkdir: "/workspace",
259259
});
260-
const absolutePath = "/Users/peter/.openclaw/workspace/reports/report.html";
260+
const absolutePath = "/Users/peter/.openclaw/workspace/reports/screenshot.png";
261261
const normalize = createReplyMediaPathNormalizer({
262262
cfg: {},
263263
sessionKey: "session-key",
@@ -268,7 +268,9 @@ describe("createReplyMediaPathNormalizer", () => {
268268
mediaUrls: [absolutePath],
269269
});
270270

271-
expectMedia(result, "/tmp/outbound-media/report.html", ["/tmp/outbound-media/report.html"]);
271+
expectMedia(result, "/tmp/outbound-media/screenshot.png", [
272+
"/tmp/outbound-media/screenshot.png",
273+
]);
272274
expectOutboundAttachmentCall(0, absolutePath, 5 * 1024 * 1024);
273275
});
274276

src/media/web-media.test.ts

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -759,17 +759,18 @@ describe("loadWebMedia", () => {
759759
expect(result.contentType).toBe("text/markdown");
760760
});
761761

762-
it("allows host-read HTML files when the buffer validates as text", async () => {
762+
it("rejects host-read HTML files without a separate security-boundary approval", async () => {
763763
const htmlFile = path.join(fixtureRoot, "report.html");
764764
await fs.writeFile(htmlFile, "<!doctype html><title>Report</title><h1>Report</h1>\n", "utf8");
765-
const result = await loadWebMedia(htmlFile, {
766-
maxBytes: 1024 * 1024,
767-
localRoots: "any",
768-
readFile: async (filePath) => await fs.readFile(filePath),
769-
hostReadCapability: true,
770-
});
771-
expect(result.kind).toBe("document");
772-
expect(result.contentType).toBe("text/html");
765+
await expectLoadWebMediaErrorCode(
766+
loadWebMedia(htmlFile, {
767+
maxBytes: 1024 * 1024,
768+
localRoots: "any",
769+
readFile: async (filePath) => await fs.readFile(filePath),
770+
hostReadCapability: true,
771+
}),
772+
"path-not-allowed",
773+
);
773774
});
774775

775776
it.each([
@@ -883,12 +884,6 @@ describe("loadWebMedia", () => {
883884
contentType: "text/csv",
884885
body: ",,,,,,,,,,\n",
885886
},
886-
{
887-
label: "HTML",
888-
fileName: "punctuation.html",
889-
contentType: "text/html",
890-
body: "<!doctype html><hr><br>\n",
891-
},
892887
{
893888
label: "Markdown",
894889
fileName: "punctuation.md",
@@ -911,12 +906,6 @@ describe("loadWebMedia", () => {
911906
contentType: "text/csv",
912907
body: Buffer.from("caf\xe9,ni\xf1o\n", "latin1"),
913908
},
914-
{
915-
label: "HTML",
916-
fileName: "legacy.html",
917-
contentType: "text/html",
918-
body: Buffer.from("<p>caf\xe9</p>\n", "latin1"),
919-
},
920909
{
921910
label: "Markdown",
922911
fileName: "legacy.md",

src/media/web-media.ts

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,15 @@ const HOST_READ_ALLOWED_DOCUMENT_MIMES = new Set([
148148
"application/x-tar",
149149
"application/zip",
150150
"text/csv",
151-
"text/html",
152151
"text/markdown",
153152
]);
154-
// file-type returns undefined (no magic bytes) for plain-text formats like CSV,
155-
// HTML, and Markdown, so host-read needs an explicit text validation fallback.
156-
const HOST_READ_TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/html", "text/markdown"]);
153+
// file-type returns undefined (no magic bytes) for plain-text formats like CSV
154+
// and Markdown, so host-read needs an explicit text validation fallback.
155+
const HOST_READ_TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]);
156+
// HTML remains deliberately outside the host-read allowlist pending a separate
157+
// security-boundary review, but extension-declared .html files still need to
158+
// fail closed instead of falling through to binary/media sniffing.
159+
const HOST_READ_DECLARED_TEXT_MIMES = new Set([...HOST_READ_TEXT_PLAIN_ALIASES, "text/html"]);
157160
const MB = 1024 * 1024;
158161

159162
function getTextStats(text: string): { printableRatio: number } {
@@ -272,13 +275,18 @@ function assertHostReadMediaAllowed(params: {
272275
// text validator path. Some opaque blobs can still produce bogus binary MIME
273276
// hits (for example BOM-prefixed 0xFF data sniffing as audio/mpeg), and
274277
// host-read should reject those instead of returning early on the sniff.
275-
if (declaredMime && HOST_READ_TEXT_PLAIN_ALIASES.has(declaredMime)) {
276-
if (!params.sniffedContentType && params.buffer && isValidatedHostReadText(params.buffer)) {
278+
if (declaredMime && HOST_READ_DECLARED_TEXT_MIMES.has(declaredMime)) {
279+
if (
280+
HOST_READ_TEXT_PLAIN_ALIASES.has(declaredMime) &&
281+
!params.sniffedContentType &&
282+
params.buffer &&
283+
isValidatedHostReadText(params.buffer)
284+
) {
277285
return;
278286
}
279287
throw new LocalMediaAccessError(
280288
"path-not-allowed",
281-
"hostReadCapability permits only validated plain-text CSV/HTML/Markdown documents for local reads",
289+
"hostReadCapability permits only validated plain-text CSV/Markdown documents for local reads",
282290
);
283291
}
284292
const sniffedKind = kindFromMime(params.sniffedContentType);
@@ -299,10 +307,10 @@ function assertHostReadMediaAllowed(params: {
299307
) {
300308
return;
301309
}
302-
// CSV / HTML / Markdown exception: file-type v22 returns undefined (not "text/plain") for
310+
// CSV / Markdown exception: file-type v22 returns undefined (not "text/plain") for
303311
// plain-text buffers that have no binary magic bytes. Allow these formats when:
304312
// - sniffedMime is undefined (no binary signature detected by file-type)
305-
// - The extension-derived MIME is text/csv, text/html, or text/markdown (operator intent)
313+
// - The extension-derived MIME is text/csv or text/markdown (operator intent)
306314
// - The buffer decodes as actual text instead of opaque binary bytes
307315
if (
308316
!sniffedMime &&
@@ -325,7 +333,7 @@ function assertHostReadMediaAllowed(params: {
325333
}
326334
throw new LocalMediaAccessError(
327335
"path-not-allowed",
328-
`Host-local media sends only allow buffer-verified images, audio, video, PDF, Office documents, archives, CSV, HTML, and Markdown (got ${sniffedMime ?? normalizedMime ?? "unknown"}).`,
336+
`Host-local media sends only allow buffer-verified images, audio, video, PDF, Office documents, archives, CSV, and Markdown (got ${sniffedMime ?? normalizedMime ?? "unknown"}).`,
329337
);
330338
}
331339

0 commit comments

Comments
 (0)