Skip to content

Commit 8734194

Browse files
committed
tighten transcript image redaction boundary
1 parent 6bb4f02 commit 8734194

7 files changed

Lines changed: 380 additions & 85 deletions

packages/media-core/src/inline-image-data-url.test.ts

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
11
// Media Core tests cover inline image data url behavior.
22
import { describe, expect, it } from "vitest";
3-
import { sanitizeInlineImageDataUrl, sniffInlineImageMime } from "./inline-image-data-url.js";
3+
import {
4+
sanitizeInlineImageBase64,
5+
sanitizeInlineImageDataUrl,
6+
sanitizeInlineImageDataUrlForStorage,
7+
sniffInlineImageMime,
8+
} from "./inline-image-data-url.js";
49

510
const PNG_1X1 =
611
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII=";
12+
const BMP_HEADER = Buffer.from("BMfixture", "ascii").toString("base64");
13+
const HEIC_HEADER = Buffer.from([
14+
0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70, 0x68, 0x65, 0x69, 0x63, 0x00, 0x00, 0x00, 0x00,
15+
0x6d, 0x69, 0x66, 0x31,
16+
]).toString("base64");
17+
const HEIF_HEADER = Buffer.from([
18+
0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70, 0x6d, 0x69, 0x66, 0x31, 0x00, 0x00, 0x00, 0x00,
19+
]).toString("base64");
720

821
describe("inline image data URL sanitizer", () => {
922
it("keeps non-data image references unchanged", () => {
@@ -26,6 +39,46 @@ describe("inline image data URL sanitizer", () => {
2639
);
2740
});
2841

42+
it("rejects image data URLs for formats that require conversion before provider transport", () => {
43+
expect(sanitizeInlineImageDataUrl(`data:image/bmp;base64,${BMP_HEADER}`)).toBeUndefined();
44+
expect(sanitizeInlineImageDataUrl(`data:image/heic;base64,${HEIC_HEADER}`)).toBeUndefined();
45+
expect(sanitizeInlineImageDataUrl(`data:image/heif;base64,${HEIF_HEADER}`)).toBeUndefined();
46+
});
47+
48+
it("canonicalizes valid image data URLs for storage without transport allowlist filtering", () => {
49+
expect(sanitizeInlineImageDataUrlForStorage(`data:image/bmp;base64,${BMP_HEADER}`)).toBe(
50+
`data:image/bmp;base64,${BMP_HEADER}`,
51+
);
52+
expect(sanitizeInlineImageDataUrlForStorage(`data:image/heic;base64,${HEIC_HEADER}`)).toBe(
53+
`data:image/heic;base64,${HEIC_HEADER}`,
54+
);
55+
});
56+
57+
it("canonicalizes valid image base64 with sniffed MIME type", () => {
58+
expect(sanitizeInlineImageBase64({ mimeType: "image/jpeg", base64: `\n${PNG_1X1}` })).toEqual({
59+
mimeType: "image/png",
60+
base64: PNG_1X1,
61+
});
62+
expect(
63+
sanitizeInlineImageBase64({ mimeType: "image/png", base64: "SGVsbG8=" }),
64+
).toBeUndefined();
65+
});
66+
67+
it("accepts supported non-browser image signatures", () => {
68+
expect(sanitizeInlineImageBase64({ mimeType: "image/bmp", base64: BMP_HEADER })).toEqual({
69+
mimeType: "image/bmp",
70+
base64: BMP_HEADER,
71+
});
72+
expect(sanitizeInlineImageBase64({ mimeType: "image/heic", base64: HEIC_HEADER })).toEqual({
73+
mimeType: "image/heic",
74+
base64: HEIC_HEADER,
75+
});
76+
expect(sanitizeInlineImageBase64({ mimeType: "image/heif", base64: HEIF_HEADER })).toEqual({
77+
mimeType: "image/heif",
78+
base64: HEIF_HEADER,
79+
});
80+
});
81+
2982
it("sniffs supported inline image signatures", () => {
3083
expect(sniffInlineImageMime(Buffer.from("GIF89a", "ascii"))).toBe("image/gif");
3184
expect(sniffInlineImageMime(Buffer.from([0xff, 0xd8, 0xff]))).toBe("image/jpeg");

packages/media-core/src/inline-image-data-url.ts

Lines changed: 93 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,80 @@ const IMAGE_SIGNATURES: Array<{
4040
(buffer.subarray(0, 6).toString("ascii") === "GIF87a" ||
4141
buffer.subarray(0, 6).toString("ascii") === "GIF89a"),
4242
},
43+
{
44+
mime: "image/bmp",
45+
matches: (buffer) => buffer.length >= 2 && buffer[0] === 0x42 && buffer[1] === 0x4d,
46+
},
4347
];
4448

49+
const HEIC_BRANDS = new Set(["heic", "heix", "hevc", "hevx", "heis", "heim", "hevm", "hevs"]);
50+
const HEIF_BRANDS = new Set(["mif1", "msf1"]);
51+
const IMAGE_SIGNATURE_PREFIX_BASE64_CHARS = 128;
52+
const INLINE_IMAGE_DATA_URL_MIMES = new Set(["image/png", "image/jpeg", "image/webp", "image/gif"]);
53+
4554
function startsWithDataUrl(value: string): boolean {
4655
return (
4756
value.slice(0, INLINE_IMAGE_DATA_URL_PREFIX.length).toLowerCase() ===
4857
INLINE_IMAGE_DATA_URL_PREFIX
4958
);
5059
}
5160

61+
function sniffIsoBmffImageMime(buffer: Buffer): string | undefined {
62+
if (buffer.length < 12 || buffer.subarray(4, 8).toString("ascii") !== "ftyp") {
63+
return undefined;
64+
}
65+
const brands = [buffer.subarray(8, 12).toString("ascii")];
66+
for (let offset = 16; offset + 4 <= buffer.length; offset += 4) {
67+
brands.push(buffer.subarray(offset, offset + 4).toString("ascii"));
68+
}
69+
if (brands.some((brand) => HEIC_BRANDS.has(brand))) {
70+
return "image/heic";
71+
}
72+
if (brands.some((brand) => HEIF_BRANDS.has(brand))) {
73+
return "image/heif";
74+
}
75+
return undefined;
76+
}
77+
5278
/** Sniffs supported inline image formats from decoded bytes. */
5379
export function sniffInlineImageMime(buffer: Buffer): string | undefined {
54-
return IMAGE_SIGNATURES.find((signature) => signature.matches(buffer))?.mime;
80+
return (
81+
IMAGE_SIGNATURES.find((signature) => signature.matches(buffer))?.mime ??
82+
sniffIsoBmffImageMime(buffer)
83+
);
84+
}
85+
86+
function isImageMimeType(value: string): boolean {
87+
return value.trim().toLowerCase().startsWith("image/");
88+
}
89+
90+
export type SanitizedInlineImageBase64 = {
91+
mimeType: string;
92+
base64: string;
93+
};
94+
95+
/** Canonicalizes trusted inline image base64 and rejects malformed or non-image payloads. */
96+
export function sanitizeInlineImageBase64(params: {
97+
mimeType: string;
98+
base64: string;
99+
}): SanitizedInlineImageBase64 | undefined {
100+
if (!isImageMimeType(params.mimeType)) {
101+
return undefined;
102+
}
103+
const canonicalPayload = canonicalizeBase64(params.base64);
104+
if (!canonicalPayload) {
105+
return undefined;
106+
}
107+
const sniffedMimeType = sniffInlineImageMime(
108+
Buffer.from(canonicalPayload.slice(0, IMAGE_SIGNATURE_PREFIX_BASE64_CHARS), "base64"),
109+
);
110+
if (!sniffedMimeType) {
111+
return undefined;
112+
}
113+
return {
114+
mimeType: sniffedMimeType,
115+
base64: canonicalPayload,
116+
};
55117
}
56118

57119
function parseInlineImageDataUrl(value: string):
@@ -78,12 +140,17 @@ function parseInlineImageDataUrl(value: string):
78140

79141
function metadataAllowsImageBase64(metadata: string[]): boolean {
80142
const [mimeType, ...options] = metadata;
81-
const isImageMimeType = mimeType !== undefined && mimeType.toLowerCase().startsWith("image/");
82-
return isImageMimeType && options.some((part) => part.toLowerCase() === "base64");
143+
return (
144+
mimeType !== undefined &&
145+
isImageMimeType(mimeType) &&
146+
options.some((part) => part.toLowerCase() === "base64")
147+
);
83148
}
84149

85-
/** Canonicalizes trusted inline image data URLs and rejects malformed or non-image payloads. */
86-
export function sanitizeInlineImageDataUrl(imageUrl: string): string | undefined {
150+
function sanitizeInlineImageDataUrlWithAllowedMimes(
151+
imageUrl: string,
152+
allowedMimes?: Set<string>,
153+
): string | undefined {
87154
const parsed = parseInlineImageDataUrl(imageUrl);
88155
if (!parsed) {
89156
return undefined;
@@ -95,14 +162,30 @@ export function sanitizeInlineImageDataUrl(imageUrl: string): string | undefined
95162
return undefined;
96163
}
97164

98-
const canonicalPayload = canonicalizeBase64(parsed.payload);
99-
if (!canonicalPayload) {
165+
const [mimeType] = parsed.metadata;
166+
const sanitized = sanitizeInlineImageBase64({
167+
mimeType: mimeType ?? "",
168+
base64: parsed.payload,
169+
});
170+
if (!sanitized) {
100171
return undefined;
101172
}
102-
const sniffedMimeType = sniffInlineImageMime(Buffer.from(canonicalPayload, "base64"));
103-
if (!sniffedMimeType) {
173+
if (allowedMimes && !allowedMimes.has(sanitized.mimeType)) {
104174
return undefined;
105175
}
106176
// Trust the byte signature over caller-supplied metadata before reinlining.
107-
return `data:${sniffedMimeType};base64,${canonicalPayload}`;
177+
return `data:${sanitized.mimeType};base64,${sanitized.base64}`;
178+
}
179+
180+
/**
181+
* Canonicalizes trusted inline image data URLs for persistence.
182+
* Accepts every image signature supported by `sanitizeInlineImageBase64`.
183+
*/
184+
export function sanitizeInlineImageDataUrlForStorage(imageUrl: string): string | undefined {
185+
return sanitizeInlineImageDataUrlWithAllowedMimes(imageUrl);
186+
}
187+
188+
/** Canonicalizes provider-safe inline image data URLs and rejects unsupported formats. */
189+
export function sanitizeInlineImageDataUrl(imageUrl: string): string | undefined {
190+
return sanitizeInlineImageDataUrlWithAllowedMimes(imageUrl, INLINE_IMAGE_DATA_URL_MIMES);
108191
}

src/agents/responses-image-payload-sanitizer.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ import { sanitizeResponsesImagePayload } from "./responses-image-payload-sanitiz
55
const PNG_1X1 =
66
// Valid JPEG-labeled data is sniffed as PNG and normalized to the real MIME type.
77
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII=";
8+
const HEIC_HEADER = Buffer.from([
9+
0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70, 0x68, 0x65, 0x69, 0x63, 0x00, 0x00, 0x00, 0x00,
10+
]).toString("base64");
811

912
describe("Responses image payload sanitizer", () => {
1013
it("replaces malformed input_image data URLs before sending Responses payloads", () => {
@@ -57,4 +60,29 @@ describe("Responses image payload sanitizer", () => {
5760
},
5861
]);
5962
});
63+
64+
it("replaces HEIC inline image data URLs before Responses transport", () => {
65+
const sanitized = sanitizeResponsesImagePayload({
66+
input: [
67+
{
68+
type: "message",
69+
role: "user",
70+
content: [{ type: "input_image", image_url: `data:image/heic;base64,${HEIC_HEADER}` }],
71+
},
72+
],
73+
});
74+
75+
expect(sanitized.input).toEqual([
76+
{
77+
type: "message",
78+
role: "user",
79+
content: [
80+
{
81+
type: "input_text",
82+
text: "[omitted image payload: invalid inline image data]",
83+
},
84+
],
85+
},
86+
]);
87+
});
6088
});

src/agents/session-file-repair.test.ts

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ function requireFirstLogMessage(log: ReturnType<typeof vi.fn>): string {
5656
return message;
5757
}
5858

59+
const PNG_1X1 =
60+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII=";
61+
const BMP_HEADER = Buffer.from("BMfixture", "ascii").toString("base64");
62+
5963
afterEach(async () => {
6064
vi.restoreAllMocks();
6165
await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true })));
@@ -290,7 +294,7 @@ describe("repairSessionFileIfNeeded", () => {
290294
role: "user",
291295
content: [
292296
{ type: "text", text: " " },
293-
{ type: "image", data: "AA==", mimeType: "image/png" },
297+
{ type: "image", data: PNG_1X1, mimeType: "image/png" },
294298
],
295299
},
296300
};
@@ -304,7 +308,7 @@ describe("repairSessionFileIfNeeded", () => {
304308
const repaired = await fs.readFile(file, "utf-8");
305309
const repairedEntry = JSON.parse(repaired.trim().split("\n")[1] ?? "{}");
306310
expect(repairedEntry.message.content).toEqual([
307-
{ type: "image", data: "AA==", mimeType: "image/png" },
311+
{ type: "image", data: PNG_1X1, mimeType: "image/png" },
308312
]);
309313
});
310314

@@ -353,7 +357,7 @@ describe("repairSessionFileIfNeeded", () => {
353357
role: "user",
354358
content: [
355359
{ type: "text", text: "inspect this" },
356-
{ type: "image", data: "AA==", mimeType: "image/png" },
360+
{ type: "image", data: PNG_1X1, mimeType: "image/png" },
357361
],
358362
},
359363
};
@@ -367,6 +371,63 @@ describe("repairSessionFileIfNeeded", () => {
367371
expect(repaired).toBe(original);
368372
});
369373

374+
it("preserves valid non-browser image blocks during repair", async () => {
375+
const { file } = await createTempSessionPath();
376+
const { header } = buildSessionHeaderAndMessage();
377+
const validUserEntry = {
378+
type: "message",
379+
id: "msg-valid-bmp",
380+
parentId: null,
381+
timestamp: new Date().toISOString(),
382+
message: {
383+
role: "user",
384+
content: [
385+
{ type: "text", text: "inspect this" },
386+
{ type: "image", data: BMP_HEADER, mimeType: "image/bmp" },
387+
],
388+
},
389+
};
390+
const original = `${JSON.stringify(header)}\n${JSON.stringify(validUserEntry)}\n`;
391+
await fs.writeFile(file, original, "utf-8");
392+
393+
const result = await repairSessionFileIfNeeded({ sessionFile: file });
394+
395+
expect(result.repaired).toBe(false);
396+
const repaired = await fs.readFile(file, "utf-8");
397+
expect(repaired).toBe(original);
398+
});
399+
400+
it("rewrites syntactically valid base64 that is not image bytes", async () => {
401+
const { file } = await createTempSessionPath();
402+
const { header } = buildSessionHeaderAndMessage();
403+
const fakeImageUserEntry = {
404+
type: "message",
405+
id: "msg-fake-image",
406+
parentId: null,
407+
timestamp: new Date().toISOString(),
408+
message: {
409+
role: "user",
410+
content: [
411+
{ type: "text", text: "inspect this" },
412+
{ type: "image", data: "SGVsbG8=", mimeType: "image/png" },
413+
],
414+
},
415+
};
416+
const original = `${JSON.stringify(header)}\n${JSON.stringify(fakeImageUserEntry)}\n`;
417+
await fs.writeFile(file, original, "utf-8");
418+
419+
const result = await repairSessionFileIfNeeded({ sessionFile: file });
420+
421+
expect(result.repaired).toBe(true);
422+
expect(result.removedCorruptedImageBlocks).toBe(1);
423+
const repaired = await fs.readFile(file, "utf-8");
424+
const repairedEntry = JSON.parse(repaired.trim().split("\n")[1] ?? "{}");
425+
expect(repairedEntry.message.content).toEqual([
426+
{ type: "text", text: "inspect this" },
427+
{ type: "text", text: CORRUPTED_IMAGE_FALLBACK_TEXT },
428+
]);
429+
});
430+
370431
it("reports both drops and rewrites in the debug message when both occur", async () => {
371432
const { file } = await createTempSessionPath();
372433
const { header } = buildSessionHeaderAndMessage();

src/agents/session-file-repair.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import { randomUUID } from "node:crypto";
77
import fs from "node:fs/promises";
88
import path from "node:path";
9-
import { canonicalizeBase64 } from "@openclaw/media-core/base64";
9+
import { sanitizeInlineImageBase64 } from "@openclaw/media-core/inline-image-data-url";
1010
import { replaceFileAtomic } from "../infra/replace-file.js";
1111
import type { AgentMessage } from "./runtime/index.js";
1212
import { makeMissingToolResult } from "./session-transcript-repair.js";
@@ -127,14 +127,19 @@ function isCorruptedImageContentBlock(block: unknown): boolean {
127127
data?: unknown;
128128
mimeType?: unknown;
129129
mediaType?: unknown;
130+
media_type?: unknown;
130131
};
131132
if (record.type !== "image" || typeof record.data !== "string") {
132133
return false;
133134
}
134-
if (!isImageMimeType(record.mimeType) && !isImageMimeType(record.mediaType)) {
135+
const mimeType = [record.mimeType, record.mediaType, record.media_type].find(isImageMimeType);
136+
if (!mimeType) {
135137
return false;
136138
}
137-
return containsNonAscii(record.data) || canonicalizeBase64(record.data) === undefined;
139+
return (
140+
containsNonAscii(record.data) ||
141+
sanitizeInlineImageBase64({ base64: record.data, mimeType }) === undefined
142+
);
138143
}
139144

140145
function repairEntryWithCorruptedImageBlocks(entry: SessionMessageEntry): {

0 commit comments

Comments
 (0)