Skip to content

Commit c2a2a48

Browse files
committed
fix(whatsapp): preserve audio-as-voice payload intent
1 parent 80b6da7 commit c2a2a48

8 files changed

Lines changed: 85 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ Docs: https://docs.openclaw.ai
7474
- Discord/cron: deliver text-only isolated cron and heartbeat announce output from the canonical final assistant text once, avoiding duplicate Discord posts when streamed block payloads and the final answer contain the same content. Fixes #71406. Thanks @alexgross21.
7575
- macOS Gateway: wait for launchd to reload the exited Gateway LaunchAgent before bootstrapping repair fallback, preventing config-triggered restarts from leaving the service not loaded. Fixes #45178. Thanks @vincentkoc.
7676
- TTS/hooks: preserve audio-only TTS transcripts for `message_sending` and `message_sent` hooks without rendering the transcript as a media caption. Thanks @zqchris.
77+
- WhatsApp/TTS: preserve `audioAsVoice` through shared media payload sends and the WhatsApp outbound adapter, so `[[audio_as_voice]]` reply payloads keep their voice-note intent when routed through `sendPayload`. Fixes #66053. Thanks @masatohoshino.
7778
- Control UI/WebChat: hide heartbeat prompts, `HEARTBEAT_OK` acknowledgments, and internal-only runtime context turns from visible chat history while leaving the underlying transcript intact. Fixes #71381. Thanks @gerald1950ggg-ai.
7879
- Control UI/chat: keep optimistic user and assistant tail messages visible when a final history refresh briefly returns an older snapshot, preventing message cards from flash-disappearing until the next refresh. Fixes #71371. Thanks @WolvenRA.
7980
- Talk/TTS: resolve configured extension speech providers from the active runtime registry before provider-list discovery, so Talk mode no longer rejects valid plugin speech providers as unsupported.

docs/channels/whatsapp.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ When the linked self number is also present in `allowFrom`, WhatsApp self-chat s
361361

362362
<Accordion title="Outbound media behavior">
363363
- supports image, video, audio (PTT voice-note), and document payloads
364+
- reply payloads preserve `audioAsVoice`; WhatsApp sends audio media as Baileys PTT voice notes
364365
- `audio/ogg` is rewritten to `audio/ogg; codecs=opus` for voice-note compatibility
365366
- animated GIF playback is supported via `gifPlayback: true` on video sends
366367
- captions are applied to the first media item when sending multi-media reply payloads

extensions/whatsapp/src/outbound-adapter.sendpayload.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,28 @@ describe("whatsappOutbound sendPayload", () => {
7575
});
7676
});
7777

78+
it("preserves audioAsVoice from payload media sends", async () => {
79+
const sendWhatsApp = vi.fn(async () => ({ messageId: "wa-1", toJid: "jid" }));
80+
81+
await whatsappOutbound.sendPayload!({
82+
cfg: {},
83+
to: "5511999999999@c.us",
84+
text: "",
85+
payload: { text: "voice", mediaUrl: "/tmp/voice.ogg", audioAsVoice: true },
86+
deps: { sendWhatsApp },
87+
});
88+
89+
expect(sendWhatsApp).toHaveBeenCalledWith("5511999999999@c.us", "voice", {
90+
verbose: false,
91+
cfg: {},
92+
mediaUrl: "/tmp/voice.ogg",
93+
mediaLocalRoots: undefined,
94+
audioAsVoice: true,
95+
accountId: undefined,
96+
gifPlayback: undefined,
97+
});
98+
});
99+
78100
it("drops blank mediaUrls before sending payload media", async () => {
79101
const sendWhatsApp = vi.fn(async () => ({ messageId: "wa-1", toJid: "jid" }));
80102

extensions/whatsapp/src/outbound-base.test.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,40 @@ describe("createWhatsAppOutboundBase", () => {
5555
expect(result).toMatchObject({ channel: "whatsapp", messageId: "msg-1" });
5656
});
5757

58+
it("forwards audioAsVoice to sendMessageWhatsApp", async () => {
59+
const sendMessageWhatsApp = vi.fn(async () => ({
60+
messageId: "msg-voice",
61+
toJid: "15551234567@s.whatsapp.net",
62+
}));
63+
const outbound = createWhatsAppOutboundBase({
64+
chunker: (text) => [text],
65+
sendMessageWhatsApp,
66+
sendPollWhatsApp: vi.fn(),
67+
shouldLogVerbose: () => false,
68+
resolveTarget: ({ to }) => ({ ok: true as const, to: to ?? "" }),
69+
});
70+
71+
await outbound.sendMedia!({
72+
cfg: {} as never,
73+
to: "whatsapp:+15551234567",
74+
text: "voice",
75+
mediaUrl: "/tmp/workspace/voice.ogg",
76+
audioAsVoice: true,
77+
accountId: "default",
78+
deps: { sendWhatsApp: sendMessageWhatsApp },
79+
});
80+
81+
expect(sendMessageWhatsApp).toHaveBeenCalledWith(
82+
"whatsapp:+15551234567",
83+
"voice",
84+
expect.objectContaining({
85+
mediaUrl: "/tmp/workspace/voice.ogg",
86+
audioAsVoice: true,
87+
accountId: "default",
88+
}),
89+
);
90+
});
91+
5892
it("uses the configured default account for quote metadata lookup when accountId is omitted", async () => {
5993
cacheInboundMessageMeta("work", "15551234567@s.whatsapp.net", "reply-1", {
6094
participant: "111@s.whatsapp.net",

extensions/whatsapp/src/outbound-base.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type WhatsAppSendTextOptions = {
3131
mediaLocalRoots?: readonly string[];
3232
mediaReadFile?: (filePath: string) => Promise<Buffer>;
3333
gifPlayback?: boolean;
34+
audioAsVoice?: boolean;
3435
accountId?: string;
3536
quotedMessageKey?: {
3637
id: string;
@@ -178,6 +179,7 @@ export function createWhatsAppOutboundBase({
178179
mediaAccess,
179180
mediaLocalRoots,
180181
mediaReadFile,
182+
audioAsVoice,
181183
accountId,
182184
deps,
183185
gifPlayback,
@@ -200,6 +202,7 @@ export function createWhatsAppOutboundBase({
200202
mediaAccess,
201203
mediaLocalRoots,
202204
mediaReadFile,
205+
...(audioAsVoice === undefined ? {} : { audioAsVoice }),
203206
accountId: accountId ?? undefined,
204207
gifPlayback,
205208
quotedMessageKey,

extensions/whatsapp/src/send.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ export async function sendMessageWhatsApp(
6767
mediaLocalRoots?: readonly string[];
6868
mediaReadFile?: (filePath: string) => Promise<Buffer>;
6969
gifPlayback?: boolean;
70+
audioAsVoice?: boolean;
7071
accountId?: string;
7172
quotedMessageKey?: {
7273
id: string;

src/plugin-sdk/reply-payload.test.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,27 @@ describe("sendTextMediaPayload", () => {
139139
expect(sendMedia.mock.calls.map((call) => call[0].replyToId)).toEqual(["reply-1", undefined]);
140140
});
141141

142+
it("preserves audioAsVoice on media fallback sends", async () => {
143+
const sendMedia = vi.fn(async ({ mediaUrl }) => ({ channel: "test", messageId: mediaUrl }));
144+
145+
await sendTextMediaPayload({
146+
channel: "test",
147+
ctx: {
148+
cfg: {},
149+
to: "target",
150+
text: "",
151+
payload: {
152+
text: "caption",
153+
mediaUrls: ["https://example.com/voice.ogg", "https://example.com/next.ogg"],
154+
audioAsVoice: true,
155+
},
156+
},
157+
adapter: { sendMedia },
158+
});
159+
160+
expect(sendMedia.mock.calls.map((call) => call[0].audioAsVoice)).toEqual([true, true]);
161+
});
162+
142163
it("keeps explicit reply tags independent from single-use implicit reply modes", async () => {
143164
const sendText = vi.fn(async ({ text }) => ({ channel: "test", messageId: text }));
144165

src/plugin-sdk/reply-payload.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ export async function sendTextMediaPayload(params: {
292292
}
293293
const nextReplyToId = createReplyToFanout(params.ctx);
294294
if (urls.length > 0) {
295+
const audioAsVoice = params.ctx.payload.audioAsVoice ?? params.ctx.audioAsVoice;
295296
const lastResult = await sendPayloadMediaSequence({
296297
text,
297298
mediaUrls: urls,
@@ -300,6 +301,7 @@ export async function sendTextMediaPayload(params: {
300301
...params.ctx,
301302
text,
302303
mediaUrl,
304+
...(audioAsVoice === undefined ? {} : { audioAsVoice }),
303305
replyToId: nextReplyToId(),
304306
}),
305307
});

0 commit comments

Comments
 (0)