Skip to content

Commit 6b6dcaf

Browse files
authored
fix(webchat): support non-image file attachments
1 parent 303cde8 commit 6b6dcaf

16 files changed

Lines changed: 505 additions & 60 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
1515
- Plugins/CLI: refresh the persisted registry after managed plugin files are removed so ClawHub uninstall cannot leave stale `plugins list` entries. Thanks @codex.
1616
- Plugins/CLI: make plugin install and uninstall config writes conflict-aware, clear stale denylist entries on explicit reinstall/removal, and delete managed plugin files only after config/index commit succeeds. Thanks @codex.
1717
- Plugins: fail `plugins update` when tracked plugin or hook updates error, keep bundled runtime-dependency repair behind restrictive allowlists, and reject package installs with unloadable extension entries. Thanks @codex.
18+
- WebChat/Control UI: support non-video file attachments in chat uploads while preserving the existing image attachment path and MIME-sniff fallback for generic image uploads. (#70947) Thanks @IAMSamuelRodda.
1819
- Gateway/chat: keep duplicate attachment-backed `chat.send` retries with the same idempotency key on the documented in-flight path so aborts still target the real active run. Fixes #70139. Thanks @Feelw00.
1920
- Plugins: share package entrypoint resolution between install and discovery, reject mismatched `runtimeExtensions`, and cache bundled runtime-dependency manifest reads during scans. Thanks @codex.
2021
- WhatsApp/Web: keep quiet but healthy linked-device sessions connected by basing the watchdog on WhatsApp Web transport activity, while retaining a longer app-silence cap so frame activity cannot mask a stuck session forever. Fixes #70678; carries forward the focused #71466 approach and keeps #63939 as related configurable-timeout follow-up. Thanks @vincentkoc and @oromeis.

docs/web/control-ui.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ The Control UI can localize itself on first load based on your browser locale. T
134134
<AccordionGroup>
135135
<Accordion title="Send and history semantics">
136136
- `chat.send` is **non-blocking**: it acks immediately with `{ runId, status: "started" }` and the response streams via `chat` events.
137+
- Chat uploads accept images plus non-video files. Images keep the native image path; other files are stored as managed media and shown in history as attachment links.
137138
- Re-sending with the same `idempotencyKey` returns `{ status: "in_flight" }` while running, and `{ status: "ok" }` after completion.
138139
- `chat.history` responses are size-bounded for UI safety. When transcript entries are too large, Gateway may truncate long text fields, omit heavy metadata blocks, and replace oversized messages with a placeholder (`[chat.history omitted: message too large]`).
139140
- Assistant/generated images are persisted as managed media references and served back through authenticated Gateway media URLs, so reloads do not depend on raw base64 image payloads staying in the chat history response.

src/gateway/chat-attachments.test.ts

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,23 @@ describe("parseMessageWithAttachments", () => {
111111
expect(logs[0]).toMatch(/mime mismatch/i);
112112
});
113113

114-
it("drops unknown mime when sniff fails and logs", async () => {
114+
it("persists unknown non-image files when sniff fails", async () => {
115115
const unknown = Buffer.from("not an image").toString("base64");
116116
const { parsed, logs } = await parseWithWarnings("x", [
117117
{ type: "file", fileName: "unknown.bin", content: unknown },
118118
]);
119-
expect(parsed.images).toHaveLength(0);
120-
expect(logs).toHaveLength(1);
121-
expect(logs[0]).toMatch(/unable to detect image mime type/i);
119+
try {
120+
expect(parsed.images).toHaveLength(0);
121+
expect(parsed.offloadedRefs).toHaveLength(1);
122+
expect(parsed.offloadedRefs[0]).toMatchObject({
123+
label: "unknown.bin",
124+
mimeType: "application/octet-stream",
125+
});
126+
expect(parsed.message).toMatch(/^x\n\[media attached: media:\/\/inbound\//);
127+
expect(logs).toHaveLength(0);
128+
} finally {
129+
await cleanupOffloadedRefs(parsed.offloadedRefs);
130+
}
122131
});
123132

124133
it("keeps valid images and drops invalid ones", async () => {
@@ -143,6 +152,49 @@ describe("parseMessageWithAttachments", () => {
143152
expect(logs.some((l) => /non-image/i.test(l))).toBe(true);
144153
});
145154

155+
it("persists non-image file attachments as media refs", async () => {
156+
const parsed = await parseMessageWithAttachments(
157+
"read this",
158+
[
159+
{
160+
type: "file",
161+
mimeType: "application/pdf",
162+
fileName: "brief.pdf",
163+
content: Buffer.from("%PDF-1.4\n").toString("base64"),
164+
},
165+
],
166+
{ log: { warn: () => {} } },
167+
);
168+
169+
try {
170+
expect(parsed.images).toHaveLength(0);
171+
expect(parsed.imageOrder).toEqual(["offloaded"]);
172+
expect(parsed.offloadedRefs).toHaveLength(1);
173+
expect(parsed.offloadedRefs[0]).toMatchObject({
174+
mimeType: "application/pdf",
175+
label: "brief.pdf",
176+
});
177+
expect(parsed.message).toMatch(/^read this\n\[media attached: media:\/\/inbound\//);
178+
} finally {
179+
await cleanupOffloadedRefs(parsed.offloadedRefs);
180+
}
181+
});
182+
183+
it("keeps image sniff fallback for generic image attachments", async () => {
184+
const { parsed, logs } = await parseWithWarnings("see this", [
185+
{
186+
type: "file",
187+
mimeType: "application/octet-stream",
188+
fileName: "dot",
189+
content: PNG_1x1,
190+
},
191+
]);
192+
expect(parsed.images).toHaveLength(1);
193+
expect(parsed.images[0]?.mimeType).toBe("image/png");
194+
expect(parsed.offloadedRefs).toHaveLength(0);
195+
expect(logs).toHaveLength(0);
196+
});
197+
146198
it("offloads images for text-only models instead of dropping them", async () => {
147199
const logs: string[] = [];
148200
const infos: string[] = [];

src/gateway/chat-attachments.ts

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,19 @@ function isImageMime(mime?: string): boolean {
142142
return typeof mime === "string" && mime.startsWith("image/");
143143
}
144144

145+
function isVideoMime(mime?: string): boolean {
146+
return typeof mime === "string" && mime.startsWith("video/");
147+
}
148+
149+
function isGenericMime(mime?: string): boolean {
150+
return (
151+
!mime ||
152+
mime === "application/octet-stream" ||
153+
mime === "binary/octet-stream" ||
154+
mime === "application/unknown"
155+
);
156+
}
157+
145158
function isValidBase64(value: string): boolean {
146159
if (value.length === 0 || value.length % 4 !== 0) {
147160
return false;
@@ -307,6 +320,7 @@ export async function parseMessageWithAttachments(
307320
const offloadedRefs: OffloadedRef[] = [];
308321
let updatedMessage = message;
309322
const shouldForceOffload = opts?.supportsImages === false;
323+
let textOnlyImageOffloadCount = 0;
310324

311325
// Track IDs of files saved during this request for cleanup if a later
312326
// attachment fails validation and the entire parse is aborted.
@@ -344,15 +358,54 @@ export async function parseMessageWithAttachments(
344358
const providedMime = normalizeMime(mime);
345359
const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64));
346360

347-
if (sniffedMime && !isImageMime(sniffedMime)) {
361+
if (sniffedMime && !isImageMime(sniffedMime) && isImageMime(providedMime)) {
348362
log?.warn(`attachment ${label}: detected non-image (${sniffedMime}), dropping`);
349363
continue;
350364
}
351-
if (!sniffedMime && !isImageMime(providedMime)) {
352-
log?.warn(`attachment ${label}: unable to detect image mime type, dropping`);
365+
366+
const shouldHandleAsImage =
367+
isImageMime(sniffedMime) || (isImageMime(providedMime) && !sniffedMime);
368+
if (!shouldHandleAsImage) {
369+
const finalMime = sniffedMime ?? providedMime ?? "application/octet-stream";
370+
if (isVideoMime(finalMime)) {
371+
log?.warn(`attachment ${label}: video attachments are not supported, dropping`);
372+
continue;
373+
}
374+
375+
const buffer = Buffer.from(b64, "base64");
376+
verifyDecodedSize(buffer, sizeBytes, label);
377+
378+
try {
379+
const rawResult = await saveMediaBuffer(buffer, finalMime, "inbound", maxBytes, label);
380+
const savedMedia = assertSavedMedia(rawResult, label);
381+
savedMediaIds.push(savedMedia.id);
382+
383+
const mediaRef = `media://inbound/${savedMedia.id}`;
384+
updatedMessage += `\n[media attached: ${mediaRef}]`;
385+
log?.info?.(`[Gateway] Saved file attachment. Saved: ${mediaRef}`);
386+
offloadedRefs.push({
387+
mediaRef,
388+
id: savedMedia.id,
389+
path: savedMedia.path ?? "",
390+
mimeType: finalMime,
391+
label,
392+
});
393+
imageOrder.push("offloaded");
394+
} catch (err) {
395+
const errorMessage = formatErrorMessage(err);
396+
throw new MediaOffloadError(
397+
`[Gateway Error] Failed to save intercepted media to disk: ${errorMessage}`,
398+
{ cause: err },
399+
);
400+
}
353401
continue;
354402
}
355-
if (sniffedMime && providedMime && sniffedMime !== providedMime) {
403+
if (
404+
sniffedMime &&
405+
providedMime &&
406+
!isGenericMime(providedMime) &&
407+
sniffedMime !== providedMime
408+
) {
356409
log?.warn(
357410
`attachment ${label}: mime mismatch (${providedMime} -> ${sniffedMime}), using sniffed`,
358411
);
@@ -364,7 +417,7 @@ export async function parseMessageWithAttachments(
364417

365418
let isOffloaded = false;
366419

367-
if (shouldForceOffload && offloadedRefs.length >= TEXT_ONLY_OFFLOAD_LIMIT) {
420+
if (shouldForceOffload && textOnlyImageOffloadCount >= TEXT_ONLY_OFFLOAD_LIMIT) {
368421
log?.warn(
369422
`attachment ${label}: dropping image because text-only offload limit ` +
370423
`${TEXT_ONLY_OFFLOAD_LIMIT} was reached`,
@@ -437,6 +490,9 @@ export async function parseMessageWithAttachments(
437490
label,
438491
});
439492
imageOrder.push("offloaded");
493+
if (shouldForceOffload) {
494+
textOnlyImageOffloadCount++;
495+
}
440496

441497
isOffloaded = true;
442498
} catch (err) {

src/gateway/server-methods/chat.directive-tags.test.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1789,6 +1789,71 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
17891789
});
17901790
});
17911791

1792+
it("persists non-image chat.send attachments as media refs without dispatch images", async () => {
1793+
createTranscriptFixture("openclaw-chat-send-user-transcript-file-");
1794+
mockState.finalText = "ok";
1795+
mockState.triggerAgentRunStart = true;
1796+
mockState.savedMediaResults = [
1797+
{ path: "/tmp/chat-send-brief.pdf", contentType: "application/pdf" },
1798+
];
1799+
const respond = vi.fn();
1800+
const context = createChatContext();
1801+
1802+
await runNonStreamingChatSend({
1803+
context,
1804+
respond,
1805+
idempotencyKey: "idem-user-transcript-file",
1806+
message: "summarize this",
1807+
requestParams: {
1808+
attachments: [
1809+
{
1810+
type: "file",
1811+
mimeType: "application/pdf",
1812+
fileName: "brief.pdf",
1813+
content: Buffer.from("%PDF-1.4\n").toString("base64"),
1814+
},
1815+
],
1816+
},
1817+
expectBroadcast: false,
1818+
waitForCompletion: false,
1819+
});
1820+
1821+
await waitForAssertion(() => {
1822+
const userUpdate = mockState.emittedTranscriptUpdates.find(
1823+
(update) =>
1824+
typeof update.message === "object" &&
1825+
update.message !== null &&
1826+
(update.message as { role?: unknown }).role === "user",
1827+
);
1828+
const message = userUpdate?.message as
1829+
| {
1830+
content?: unknown;
1831+
MediaPath?: string;
1832+
MediaPaths?: string[];
1833+
MediaType?: string;
1834+
MediaTypes?: string[];
1835+
}
1836+
| undefined;
1837+
expect(mockState.lastDispatchImages).toBeUndefined();
1838+
expect(mockState.lastDispatchImageOrder).toEqual(["offloaded"]);
1839+
expect(mockState.lastDispatchCtx?.Body).toMatch(
1840+
/^summarize this\n\[media attached: media:\/\/inbound\//,
1841+
);
1842+
expect(mockState.savedMediaCalls).toEqual([
1843+
expect.objectContaining({
1844+
contentType: "application/pdf",
1845+
subdir: "inbound",
1846+
size: expect.any(Number),
1847+
}),
1848+
]);
1849+
expect(message?.content).toMatch(/^summarize this\n\[media attached: media:\/\/inbound\//);
1850+
expect(message?.MediaPath).toBe("/tmp/chat-send-brief.pdf");
1851+
expect(message?.MediaPaths).toEqual(["/tmp/chat-send-brief.pdf"]);
1852+
expect(message?.MediaType).toBe("application/pdf");
1853+
expect(message?.MediaTypes).toEqual(["application/pdf"]);
1854+
});
1855+
});
1856+
17921857
it("preserves offloaded attachment media paths in transcript order", async () => {
17931858
createTranscriptFixture("openclaw-chat-send-user-transcript-offloaded-");
17941859
mockState.finalText = "ok";

src/media/store.test.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ describe("media store", () => {
157157
async function expectSavedBufferCase(params: {
158158
buffer: Buffer;
159159
contentType?: string;
160+
originalFilename?: string;
160161
expectedContentType: string;
161162
expectedExtension: string;
162163
assertSaved?: (
@@ -165,7 +166,13 @@ describe("media store", () => {
165166
) => Promise<void> | void;
166167
}) {
167168
await withTempStore(async (store) => {
168-
const saved = await store.saveMediaBuffer(params.buffer, params.contentType);
169+
const saved = await store.saveMediaBuffer(
170+
params.buffer,
171+
params.contentType,
172+
"inbound",
173+
5 * 1024 * 1024,
174+
params.originalFilename,
175+
);
169176
expect(saved.contentType).toBe(params.expectedContentType);
170177
expect(saved.path.endsWith(params.expectedExtension)).toBe(true);
171178
await params.assertSaved?.(saved, params.buffer);
@@ -371,6 +378,14 @@ describe("media store", () => {
371378
expectedContentType: "image/jpeg",
372379
expectedExtension: ".jpg",
373380
},
381+
{
382+
name: "preserves original extension for generic file buffers",
383+
buffer: Buffer.from("custom binary"),
384+
contentType: "application/octet-stream",
385+
originalFilename: "report.custom",
386+
expectedContentType: "application/octet-stream",
387+
expectedExtension: ".custom",
388+
},
374389
] as const)("$name", async (testCase) => {
375390
const buffer =
376391
"bufferFactory" in testCase && testCase.bufferFactory
@@ -379,8 +394,16 @@ describe("media store", () => {
379394
await expectSavedBufferCase({
380395
buffer,
381396
contentType: testCase.contentType,
397+
...("originalFilename" in testCase ? { originalFilename: testCase.originalFilename } : {}),
382398
expectedContentType: testCase.expectedContentType,
383399
expectedExtension: testCase.expectedExtension,
400+
...("originalFilename" in testCase
401+
? {
402+
assertSaved: async (saved: Awaited<ReturnType<typeof store.saveMediaBuffer>>) => {
403+
expect(path.basename(saved.path)).toMatch(/^report---.+\.custom$/);
404+
},
405+
}
406+
: {}),
384407
...("assertSaved" in testCase ? { assertSaved: testCase.assertSaved } : {}),
385408
});
386409
});

src/media/store.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,14 @@ function buildSavedMediaId(params: {
284284
: `${params.baseId}${params.ext}`;
285285
}
286286

287+
function safeOriginalFilenameExtension(originalFilename?: string): string | undefined {
288+
if (!originalFilename) {
289+
return undefined;
290+
}
291+
const ext = path.extname(originalFilename).toLowerCase();
292+
return /^\.[a-z0-9]{1,16}$/.test(ext) ? ext : undefined;
293+
}
294+
287295
function buildSavedMediaResult(params: {
288296
dir: string;
289297
id: string;
@@ -419,7 +427,8 @@ export async function saveMediaBuffer(
419427
const uuid = crypto.randomUUID();
420428
const headerExt = extensionForMime(normalizeOptionalString(contentType?.split(";")[0]));
421429
const mime = await detectMime({ buffer, headerMime: contentType });
422-
const ext = headerExt ?? extensionForMime(mime) ?? "";
430+
const ext =
431+
headerExt ?? extensionForMime(mime) ?? safeOriginalFilenameExtension(originalFilename) ?? "";
423432
const id = buildSavedMediaId({ baseId: uuid, ext, originalFilename });
424433
await writeSavedMediaBuffer({ dir, id, buffer });
425434
return buildSavedMediaResult({ dir, id, size: buffer.byteLength, contentType: mime });

0 commit comments

Comments
 (0)