mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 06:39:23 +02:00
fix: complete whatsapp forced document delivery
This commit is contained in:
@@ -393,6 +393,7 @@ When the linked self number is also present in `allowFrom`, WhatsApp self-chat s
|
||||
- non-Ogg audio, including Microsoft Edge TTS MP3/WebM output, is transcoded with `ffmpeg` to 48 kHz mono Ogg/Opus before PTT delivery
|
||||
- `/tts latest` sends the latest assistant reply as one voice note and suppresses repeat sends for the same reply; `/tts chat on|off|default` controls auto-TTS for the current WhatsApp chat
|
||||
- animated GIF playback is supported via `gifPlayback: true` on video sends
|
||||
- `forceDocument` / `asDocument` sends outbound images, GIFs, and videos through the Baileys document payload to avoid WhatsApp media compression while preserving the resolved filename and MIME type
|
||||
- captions are applied to the first media item when sending multi-media reply payloads, except PTT voice notes send the audio first and visible text separately because WhatsApp clients do not render voice-note captions consistently
|
||||
- media source can be HTTP(S), `file://`, or local paths
|
||||
|
||||
@@ -402,7 +403,7 @@ When the linked self number is also present in `allowFrom`, WhatsApp self-chat s
|
||||
- inbound media save cap: `channels.whatsapp.mediaMaxMb` (default `50`)
|
||||
- outbound media send cap: `channels.whatsapp.mediaMaxMb` (default `50`)
|
||||
- per-account overrides use `channels.whatsapp.accounts.<accountId>.mediaMaxMb`
|
||||
- images are auto-optimized (resize/quality sweep) to fit limits
|
||||
- images are auto-optimized (resize/quality sweep) to fit limits unless `forceDocument` / `asDocument` requests document delivery
|
||||
- on media send failure, first-item fallback sends text warning instead of dropping the response silently
|
||||
|
||||
</Accordion>
|
||||
|
||||
@@ -302,7 +302,7 @@ openclaw message send --channel msteams \
|
||||
--presentation '{"title":"Status update","blocks":[{"type":"text","text":"Build completed"}]}'
|
||||
```
|
||||
|
||||
Send a Telegram image as a document to avoid compression:
|
||||
Send a Telegram or WhatsApp image as a document to avoid compression:
|
||||
|
||||
```bash
|
||||
openclaw message send --channel telegram --target @mychat \
|
||||
|
||||
@@ -123,7 +123,7 @@ describe("createWebSendApi", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("sends as document when sendOptions.asDocument is true regardless of MIME", async () => {
|
||||
it("sends visual media as document when sendOptions.asDocument is true", async () => {
|
||||
const payload = Buffer.from("img");
|
||||
await api.sendMessage("+1555", "promo", payload, "image/png", {
|
||||
asDocument: true,
|
||||
@@ -140,6 +140,20 @@ describe("createWebSendApi", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("does not force audio media onto the document branch", async () => {
|
||||
const payload = Buffer.from("aud");
|
||||
await api.sendMessage("+1555", "voice", payload, "audio/ogg", {
|
||||
asDocument: true,
|
||||
fileName: "voice.ogg",
|
||||
});
|
||||
|
||||
expect(sendMessage).toHaveBeenCalledWith("1555@s.whatsapp.net", {
|
||||
audio: payload,
|
||||
ptt: true,
|
||||
mimetype: "audio/ogg",
|
||||
});
|
||||
});
|
||||
|
||||
it("sends plain text messages", async () => {
|
||||
const res = await api.sendMessage("+1555", "hello");
|
||||
expect(sendMessage).toHaveBeenCalledWith("1555@s.whatsapp.net", { text: "hello" });
|
||||
@@ -216,6 +230,39 @@ describe("createWebSendApi", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("uses resolved mention caption text for forced-document media", async () => {
|
||||
api = createWebSendApi({
|
||||
sock: { sendMessage, sendPresenceUpdate },
|
||||
defaultAccountId: "main",
|
||||
resolveOutboundMentions: ({ jid, text }) =>
|
||||
resolveWhatsAppOutboundMentions({
|
||||
chatJid: jid,
|
||||
text,
|
||||
participants: [
|
||||
{
|
||||
id: "277038292303944:4@lid",
|
||||
phoneNumber: "5511976136970@s.whatsapp.net",
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
const payload = Buffer.from("img");
|
||||
|
||||
await api.sendMessage("120363000000000000@g.us", "cap @+5511976136970", payload, "image/jpeg", {
|
||||
asDocument: true,
|
||||
fileName: "promo.jpg",
|
||||
});
|
||||
|
||||
expectFirstSendJid("120363000000000000@g.us");
|
||||
expectSendContentFields(0, {
|
||||
document: payload,
|
||||
fileName: "promo.jpg",
|
||||
caption: "cap @277038292303944",
|
||||
mimetype: "image/jpeg",
|
||||
mentions: ["277038292303944@lid"],
|
||||
});
|
||||
});
|
||||
|
||||
it("supports audio as push-to-talk voice note", async () => {
|
||||
const payload = Buffer.from("aud");
|
||||
await api.sendMessage("+1555", "", payload, "audio/ogg", { accountId: "alt" });
|
||||
|
||||
@@ -27,6 +27,10 @@ function recordWhatsAppOutbound(accountId: string) {
|
||||
});
|
||||
}
|
||||
|
||||
function supportsForcedDocumentMediaType(mediaType: string): boolean {
|
||||
return mediaType.startsWith("image/") || mediaType.startsWith("video/");
|
||||
}
|
||||
|
||||
export function createWebSendApi(params: {
|
||||
sock: {
|
||||
sendMessage: (
|
||||
@@ -79,12 +83,12 @@ export function createWebSendApi(params: {
|
||||
? { text, mentionedJids: [] }
|
||||
: await resolveMentions(jid, text);
|
||||
if (mediaBuffer && mediaType) {
|
||||
if (sendOptions?.asDocument === true) {
|
||||
if (sendOptions?.asDocument === true && supportsForcedDocumentMediaType(mediaType)) {
|
||||
const fileName = sendOptions?.fileName?.trim() || "file";
|
||||
payload = {
|
||||
document: mediaBuffer,
|
||||
fileName,
|
||||
caption: text || undefined,
|
||||
caption: resolvedPayloadText.text || undefined,
|
||||
mimetype: mediaType,
|
||||
};
|
||||
} else if (mediaType.startsWith("image/")) {
|
||||
|
||||
@@ -10,6 +10,7 @@ export async function loadOutboundMediaFromUrl(
|
||||
};
|
||||
mediaLocalRoots?: readonly string[];
|
||||
mediaReadFile?: (filePath: string) => Promise<Buffer>;
|
||||
optimizeImages?: boolean;
|
||||
} = {},
|
||||
) {
|
||||
const readFile = options.mediaAccess?.readFile ?? options.mediaReadFile;
|
||||
@@ -19,17 +20,21 @@ export async function loadOutboundMediaFromUrl(
|
||||
: options.mediaLocalRoots && options.mediaLocalRoots.length > 0
|
||||
? options.mediaLocalRoots
|
||||
: undefined;
|
||||
const sharedOptions = {
|
||||
...(options.maxBytes !== undefined ? { maxBytes: options.maxBytes } : {}),
|
||||
...(options.optimizeImages !== undefined ? { optimizeImages: options.optimizeImages } : {}),
|
||||
};
|
||||
return await loadWebMedia(
|
||||
mediaUrl,
|
||||
readFile
|
||||
? {
|
||||
...(options.maxBytes !== undefined ? { maxBytes: options.maxBytes } : {}),
|
||||
...sharedOptions,
|
||||
localRoots: "any",
|
||||
readFile,
|
||||
hostReadCapability: true,
|
||||
}
|
||||
: {
|
||||
...(options.maxBytes !== undefined ? { maxBytes: options.maxBytes } : {}),
|
||||
...sharedOptions,
|
||||
...(localRoots ? { localRoots } : {}),
|
||||
},
|
||||
);
|
||||
|
||||
@@ -107,6 +107,7 @@ describe("web outbound", () => {
|
||||
};
|
||||
mediaLocalRoots?: readonly string[];
|
||||
mediaReadFile?: (filePath: string) => Promise<Buffer>;
|
||||
optimizeImages?: boolean;
|
||||
},
|
||||
) =>
|
||||
await loadWebMediaMock(mediaUrl, {
|
||||
@@ -469,6 +470,30 @@ describe("web outbound", () => {
|
||||
asDocument: true,
|
||||
fileName: "promo.jpg",
|
||||
});
|
||||
expect(hoisted.loadOutboundMediaFromUrl).toHaveBeenCalledWith(
|
||||
"/tmp/pic.jpg",
|
||||
expect.objectContaining({ optimizeImages: false }),
|
||||
);
|
||||
});
|
||||
|
||||
it("forces document branch when forceDocument is true with video media", async () => {
|
||||
const buf = Buffer.from("video");
|
||||
loadWebMediaMock.mockResolvedValueOnce({
|
||||
buffer: buf,
|
||||
contentType: "video/mp4",
|
||||
kind: "video",
|
||||
fileName: "clip.mp4",
|
||||
});
|
||||
await sendMessageWhatsApp("+1555", "watch", {
|
||||
verbose: false,
|
||||
cfg: WHATSAPP_TEST_CFG,
|
||||
mediaUrl: "/tmp/clip.mp4",
|
||||
forceDocument: true,
|
||||
});
|
||||
expect(sendMessage).toHaveBeenLastCalledWith("+1555", "watch", buf, "video/mp4", {
|
||||
asDocument: true,
|
||||
fileName: "clip.mp4",
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to a default filename when forceDocument media has no fileName", async () => {
|
||||
@@ -490,6 +515,26 @@ describe("web outbound", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps audio on the voice-note path when forceDocument is true", async () => {
|
||||
const buf = Buffer.from("audio");
|
||||
loadWebMediaMock.mockResolvedValueOnce({
|
||||
buffer: buf,
|
||||
contentType: "audio/ogg",
|
||||
kind: "audio",
|
||||
fileName: "voice.ogg",
|
||||
});
|
||||
|
||||
await sendMessageWhatsApp("+1555", "voice note", {
|
||||
verbose: false,
|
||||
cfg: WHATSAPP_TEST_CFG,
|
||||
mediaUrl: "/tmp/voice.ogg",
|
||||
forceDocument: true,
|
||||
});
|
||||
|
||||
expect(sendMessage).toHaveBeenNthCalledWith(1, "+1555", "", buf, "audio/ogg; codecs=opus");
|
||||
expect(sendMessage).toHaveBeenNthCalledWith(2, "+1555", "voice note", undefined, undefined);
|
||||
});
|
||||
|
||||
it("uses account-aware WhatsApp media caps for outbound uploads", async () => {
|
||||
hoisted.controllerListeners.set("work", {
|
||||
sendComposingTo,
|
||||
|
||||
@@ -27,6 +27,10 @@ import { markdownToWhatsApp, toWhatsappJid } from "./text-runtime.js";
|
||||
|
||||
const outboundLog = createSubsystemLogger("gateway/channels/whatsapp").child("outbound");
|
||||
|
||||
function supportsForcedDocumentDelivery(kind: "image" | "audio" | "video" | "document"): boolean {
|
||||
return kind === "image" || kind === "video";
|
||||
}
|
||||
|
||||
function resolveOutboundWhatsAppAccountId(params: {
|
||||
cfg: OpenClawConfig;
|
||||
accountId?: string;
|
||||
@@ -119,10 +123,12 @@ export async function sendMessageWhatsApp(
|
||||
let mediaType: string | undefined;
|
||||
let documentFileName: string | undefined;
|
||||
let visibleTextAfterVoice: string | undefined;
|
||||
let forceDocumentDelivery = false;
|
||||
if (primaryMediaUrl) {
|
||||
const media = await prepareWhatsAppOutboundMedia(
|
||||
await loadOutboundMediaFromUrl(primaryMediaUrl, {
|
||||
maxBytes: resolveWhatsAppMediaMaxBytes(account),
|
||||
optimizeImages: options.forceDocument ? false : undefined,
|
||||
mediaAccess: options.mediaAccess,
|
||||
mediaLocalRoots: options.mediaLocalRoots,
|
||||
mediaReadFile: options.mediaReadFile,
|
||||
@@ -132,7 +138,10 @@ export async function sendMessageWhatsApp(
|
||||
const caption = text || undefined;
|
||||
mediaBuffer = media.buffer;
|
||||
mediaType = media.mimetype;
|
||||
if (media.kind === "audio" && caption && !options.forceDocument) {
|
||||
forceDocumentDelivery = Boolean(
|
||||
options.forceDocument && supportsForcedDocumentDelivery(media.kind),
|
||||
);
|
||||
if (media.kind === "audio" && caption) {
|
||||
visibleTextAfterVoice = caption;
|
||||
text = "";
|
||||
} else if (media.kind === "document") {
|
||||
@@ -141,7 +150,7 @@ export async function sendMessageWhatsApp(
|
||||
} else {
|
||||
text = caption ?? "";
|
||||
}
|
||||
if (options.forceDocument) {
|
||||
if (forceDocumentDelivery) {
|
||||
documentFileName ??= media.fileName ?? "file";
|
||||
}
|
||||
}
|
||||
@@ -154,13 +163,13 @@ export async function sendMessageWhatsApp(
|
||||
const accountId = hasExplicitAccountId ? resolvedAccountId : undefined;
|
||||
const sendOptions: ActiveWebSendOptions | undefined =
|
||||
options.gifPlayback ||
|
||||
options.forceDocument ||
|
||||
forceDocumentDelivery ||
|
||||
accountId ||
|
||||
documentFileName ||
|
||||
options.quotedMessageKey
|
||||
? {
|
||||
...(options.gifPlayback ? { gifPlayback: true } : {}),
|
||||
...(options.forceDocument ? { asDocument: true } : {}),
|
||||
...(forceDocumentDelivery ? { asDocument: true } : {}),
|
||||
...(documentFileName ? { fileName: documentFileName } : {}),
|
||||
...(options.quotedMessageKey ? { quotedMessageKey: options.quotedMessageKey } : {}),
|
||||
accountId,
|
||||
|
||||
@@ -219,13 +219,13 @@ function buildSendSchema(options: { includePresentation: boolean; includeDeliver
|
||||
gifPlayback: Type.Optional(Type.Boolean()),
|
||||
forceDocument: Type.Optional(
|
||||
Type.Boolean({
|
||||
description: "Send image/GIF as document to avoid channel compression.",
|
||||
description: "Send image/GIF/video as document to avoid channel compression.",
|
||||
}),
|
||||
),
|
||||
asDocument: Type.Optional(
|
||||
Type.Boolean({
|
||||
description:
|
||||
"Send image/GIF as document to avoid channel compression. Alias for forceDocument.",
|
||||
"Send image/GIF/video as document to avoid channel compression. Alias for forceDocument.",
|
||||
}),
|
||||
),
|
||||
};
|
||||
|
||||
@@ -23,7 +23,7 @@ export type ChannelOutboundContext = {
|
||||
mediaLocalRoots?: readonly string[];
|
||||
mediaReadFile?: (filePath: string) => Promise<Buffer>;
|
||||
gifPlayback?: boolean;
|
||||
/** Send image as document to avoid channel compression. */
|
||||
/** Send image, GIF, or video as document to avoid channel compression. */
|
||||
forceDocument?: boolean;
|
||||
replyToId?: string | null;
|
||||
replyToIdSource?: "explicit" | "implicit";
|
||||
|
||||
@@ -26,7 +26,7 @@ export function registerMessageSendCommand(message: Command, helpers: MessageCli
|
||||
.option("--gif-playback", "Treat video media as GIF playback (WhatsApp only).", false)
|
||||
.option(
|
||||
"--force-document",
|
||||
"Send media as document to avoid channel compression (Telegram, WhatsApp). Applies to images and GIFs.",
|
||||
"Send media as document to avoid channel compression (Telegram, WhatsApp). Applies to images, GIFs, and videos.",
|
||||
false,
|
||||
)
|
||||
.option(
|
||||
|
||||
@@ -625,7 +625,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"asDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression. Alias for forceDocument.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression. Alias for forceDocument.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"asVoice": {
|
||||
@@ -702,7 +702,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"forceDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"gatewayToken": {
|
||||
|
||||
@@ -625,7 +625,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"asDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression. Alias for forceDocument.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression. Alias for forceDocument.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"asVoice": {
|
||||
@@ -702,7 +702,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"forceDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"gatewayToken": {
|
||||
|
||||
@@ -625,7 +625,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"asDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression. Alias for forceDocument.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression. Alias for forceDocument.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"asVoice": {
|
||||
@@ -702,7 +702,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"forceDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"gatewayToken": {
|
||||
|
||||
@@ -217,8 +217,8 @@ This is the deterministic model-bound layer stack OpenClaw can snapshot for the
|
||||
"roughTokens": 140
|
||||
},
|
||||
"dynamicToolsJson": {
|
||||
"chars": 44373,
|
||||
"roughTokens": 11094
|
||||
"chars": 44351,
|
||||
"roughTokens": 11088
|
||||
},
|
||||
"openClawDeveloperInstructions": {
|
||||
"chars": 5436,
|
||||
@@ -229,8 +229,8 @@ This is the deterministic model-bound layer stack OpenClaw can snapshot for the
|
||||
"roughTokens": 7129
|
||||
},
|
||||
"totalWithDynamicToolsJson": {
|
||||
"chars": 72891,
|
||||
"roughTokens": 18223
|
||||
"chars": 72869,
|
||||
"roughTokens": 18218
|
||||
},
|
||||
"userInputText": {
|
||||
"chars": 870,
|
||||
@@ -602,7 +602,7 @@ Full JSON: `codex-dynamic-tools.discord-group.json`
|
||||
"type": "string"
|
||||
},
|
||||
"asDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression. Alias for forceDocument.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression. Alias for forceDocument.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"asVoice": {
|
||||
@@ -679,7 +679,7 @@ Full JSON: `codex-dynamic-tools.discord-group.json`
|
||||
"type": "string"
|
||||
},
|
||||
"forceDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"gatewayToken": {
|
||||
|
||||
@@ -217,8 +217,8 @@ This is the deterministic model-bound layer stack OpenClaw can snapshot for the
|
||||
"roughTokens": 140
|
||||
},
|
||||
"dynamicToolsJson": {
|
||||
"chars": 44064,
|
||||
"roughTokens": 11016
|
||||
"chars": 44042,
|
||||
"roughTokens": 11011
|
||||
},
|
||||
"openClawDeveloperInstructions": {
|
||||
"chars": 4412,
|
||||
@@ -229,8 +229,8 @@ This is the deterministic model-bound layer stack OpenClaw can snapshot for the
|
||||
"roughTokens": 6748
|
||||
},
|
||||
"totalWithDynamicToolsJson": {
|
||||
"chars": 71058,
|
||||
"roughTokens": 17765
|
||||
"chars": 71036,
|
||||
"roughTokens": 17759
|
||||
},
|
||||
"userInputText": {
|
||||
"chars": 370,
|
||||
@@ -579,7 +579,7 @@ Full JSON: `codex-dynamic-tools.telegram-direct.json`
|
||||
"type": "string"
|
||||
},
|
||||
"asDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression. Alias for forceDocument.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression. Alias for forceDocument.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"asVoice": {
|
||||
@@ -656,7 +656,7 @@ Full JSON: `codex-dynamic-tools.telegram-direct.json`
|
||||
"type": "string"
|
||||
},
|
||||
"forceDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"gatewayToken": {
|
||||
|
||||
@@ -218,8 +218,8 @@ This is the deterministic model-bound layer stack OpenClaw can snapshot for the
|
||||
"roughTokens": 140
|
||||
},
|
||||
"dynamicToolsJson": {
|
||||
"chars": 45242,
|
||||
"roughTokens": 11311
|
||||
"chars": 45220,
|
||||
"roughTokens": 11305
|
||||
},
|
||||
"openClawDeveloperInstructions": {
|
||||
"chars": 4412,
|
||||
@@ -230,8 +230,8 @@ This is the deterministic model-bound layer stack OpenClaw can snapshot for the
|
||||
"roughTokens": 7155
|
||||
},
|
||||
"totalWithDynamicToolsJson": {
|
||||
"chars": 73863,
|
||||
"roughTokens": 18466
|
||||
"chars": 73841,
|
||||
"roughTokens": 18461
|
||||
},
|
||||
"userInputText": {
|
||||
"chars": 608,
|
||||
@@ -596,7 +596,7 @@ Full JSON: `codex-dynamic-tools.heartbeat-turn.json`
|
||||
"type": "string"
|
||||
},
|
||||
"asDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression. Alias for forceDocument.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression. Alias for forceDocument.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"asVoice": {
|
||||
@@ -673,7 +673,7 @@ Full JSON: `codex-dynamic-tools.heartbeat-turn.json`
|
||||
"type": "string"
|
||||
},
|
||||
"forceDocument": {
|
||||
"description": "Send image/GIF as document to avoid channel compression.",
|
||||
"description": "Send image/GIF/video as document to avoid channel compression.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"gatewayToken": {
|
||||
|
||||
Reference in New Issue
Block a user