Skip to content

Commit 01e7b3c

Browse files
committed
fix(message-tool): hydrate attachments[] on reply/sendAttachment/setGroupIcon/upload-file
The message-tool schema (src/agents/tools/message-tool.ts:194) advertises a structured `attachments: [{path, mimeType, name, …}]` array as a peer of the top-level `media|mediaUrl|path|filePath` slots. The `send` action consumed it via `collectMessageAttachmentMediaHints` in the runner, but the single-attachment hydration path used by `reply`, `sendAttachment`, `setGroupIcon`, and `upload-file` (`hydrateAttachmentActionPayload` in message-action-params.ts) only looked at top-level keys. Agents that followed the published schema and passed `attachments: [{path: "/abs/pic.png", mimeType: "image/png", name: "pic.png"}]` to `action: "reply"` had hydration find no hint, no buffer was loaded, and the channel handler shipped text-only with the attachment silently dropped. No error was raised because `extractReplyAttachment` only throws on a *bypass* shape (top-level path with no buffer), not on absence — and `attachments[]` left every top-level slot empty. Fix: lift the first attachment's path / mimeType / filename into the top-level slots inside `hydrateAttachmentActionPayload` before reading hints. Backward-compatible (only fills empty slots; explicit top-level hints win) and preserves all existing sandbox / localRoots / size enforcement, since the loaded `mediaSource` still flows through `loadWebMedia` with the resolved policy. Bug surface: every channel's `reply` action (iMessage, Telegram, WhatsApp, Discord, BlueBubbles, …), since they all share the same core hydration step.
1 parent c04c03f commit 01e7b3c

2 files changed

Lines changed: 104 additions & 0 deletions

File tree

src/infra/outbound/message-action-params.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,58 @@ function readAttachmentFileHint(args: Record<string, unknown>): string | undefin
114114
);
115115
}
116116

117+
// The message tool exposes a structured `attachments: [{path, mimeType, name}]`
118+
// shape that already worked for the `send` action (see
119+
// collectMessageAttachmentMediaHints in message-action-runner). Single-attachment
120+
// actions (`reply`, `sendAttachment`, `setGroupIcon`, `upload-file`) hydrate via
121+
// top-level `media|mediaUrl|path|filePath|fileUrl`, so without this lift an
122+
// agent calling `message(action:"reply", attachments:[{...}])` had its file
123+
// silently dropped: hydration found no hint, no buffer was loaded, and the
124+
// channel handler shipped text-only. Backward-compatible: only fills slots when
125+
// they are empty.
126+
function liftFirstAttachmentToTopLevel(args: Record<string, unknown>): void {
127+
if (!Array.isArray(args.attachments) || args.attachments.length === 0) {
128+
return;
129+
}
130+
for (const attachment of args.attachments) {
131+
if (!attachment || typeof attachment !== "object" || Array.isArray(attachment)) {
132+
continue;
133+
}
134+
const record = attachment as Record<string, unknown>;
135+
const source =
136+
normalizeOptionalString(record.path) ??
137+
normalizeOptionalString(record.filePath) ??
138+
normalizeOptionalString(record.media) ??
139+
normalizeOptionalString(record.mediaUrl) ??
140+
normalizeOptionalString(record.fileUrl) ??
141+
normalizeOptionalString(record.url);
142+
if (!source) {
143+
continue;
144+
}
145+
if (
146+
!readAttachmentMediaHint(args) &&
147+
!readAttachmentFileHint(args) &&
148+
!readStringParam(args, "buffer", { trim: false })
149+
) {
150+
args.path = source;
151+
}
152+
const mimeType =
153+
normalizeOptionalString(record.mimeType) ?? normalizeOptionalString(record.contentType);
154+
if (mimeType && !readStringParam(args, "mimeType") && !readStringParam(args, "contentType")) {
155+
// Write to `contentType` (canonical) — hydration treats it as the
156+
// primary content type and falls back to `mimeType`. Downstream channel
157+
// handlers read `args.contentType`.
158+
args.contentType = mimeType;
159+
}
160+
const filename =
161+
normalizeOptionalString(record.name) ?? normalizeOptionalString(record.filename);
162+
if (filename && !readStringParam(args, "filename")) {
163+
args.filename = filename;
164+
}
165+
return;
166+
}
167+
}
168+
117169
function resolveAttachmentMaxBytes(params: {
118170
cfg: OpenClawConfig;
119171
channel: ChannelId;
@@ -361,6 +413,7 @@ async function hydrateAttachmentActionPayload(params: {
361413
mediaPolicy: AttachmentMediaPolicy;
362414
optimizeImages?: boolean;
363415
}): Promise<void> {
416+
liftFirstAttachmentToTopLevel(params.args);
364417
const mediaHint = readAttachmentMediaHint(params.args);
365418
const fileHint = readAttachmentFileHint(params.args);
366419
const contentTypeParam =

src/infra/outbound/message-action-runner.media.test.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,57 @@ describe("runMessageAction media behavior", () => {
774774
expect(handlerParams.caption).toBeUndefined();
775775
expect(handlerParams.message).toBe("look at this");
776776
});
777+
778+
it("hydrates buffer/filename/contentType from attachments[] on reply", async () => {
779+
// Regression: agents calling message(action:"reply", attachments:[{path,mimeType,name}])
780+
// previously had the attachment silently dropped because the reply
781+
// hydration path only read top-level media/mediaUrl/path/filePath/fileUrl.
782+
const result = await runMessageAction({
783+
cfg,
784+
action: "reply",
785+
params: {
786+
channel: "replychat",
787+
target: "+15551234567",
788+
messageId: "parent-id",
789+
text: "look at this",
790+
attachments: [
791+
{
792+
path: "https://example.com/pic.png",
793+
mimeType: "image/png",
794+
name: "mug.png",
795+
},
796+
],
797+
},
798+
});
799+
800+
expect(result.kind).toBe("action");
801+
expect(handleActionMock).toHaveBeenCalledTimes(1);
802+
const handlerParams = firstMockArg(handleActionMock, "handleAction");
803+
expect(handlerParams.buffer).toBe(Buffer.from("hello").toString("base64"));
804+
expect(handlerParams.filename).toBe("mug.png");
805+
expect(handlerParams.contentType).toBe("image/png");
806+
});
807+
808+
it("prefers an explicit top-level hint over attachments[] entries", async () => {
809+
await runMessageAction({
810+
cfg,
811+
action: "reply",
812+
params: {
813+
channel: "replychat",
814+
target: "+15551234567",
815+
messageId: "parent-id",
816+
text: "look at this",
817+
media: "https://example.com/explicit.png",
818+
attachments: [{ path: "https://example.com/ignored.png" }],
819+
},
820+
});
821+
822+
expect(handleActionMock).toHaveBeenCalledTimes(1);
823+
const handlerParams = firstMockArg(handleActionMock, "handleAction");
824+
// hint resolution preserved the explicit `media` and didn't overwrite
825+
// it with the attachments[] entry.
826+
expect(handlerParams.buffer).toBe(Buffer.from("hello").toString("base64"));
827+
});
777828
});
778829

779830
describe("plugin-owned media-source discovery routing", () => {

0 commit comments

Comments
 (0)