Skip to content

Commit 00e932a

Browse files
authored
fix: restore inbound image embedding for CLI routed BlueBubbles turns (#51373)
* fix(cli): hydrate prompt image refs for inbound media * Agents: harden CLI prompt image hydration (#51373) * test: fix CLI prompt image hydration helper mocks
1 parent a16dd96 commit 00e932a

5 files changed

Lines changed: 285 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai
1717

1818
### Fixes
1919

20+
- BlueBubbles/CLI agents: restore inbound prompt image refs for CLI routed turns, reapply embedded runner image size guardrails, and cover both CLI image transport paths with regression tests. (#51373)
2021
- OpenAI Codex/image tools: register Codex for media understanding and route image prompts through Codex instructions so image analysis no longer fails on missing provider registration or missing `instructions`. (#54829) Thanks @neeravmakwana.
2122
- Telegram: deliver verbose tool summaries inside forum topic sessions again, so threaded topic chats now match DM verbose behavior. (#43236) Thanks @frankbuild.
2223
- Agents/sandbox: honor `tools.sandbox.tools.alsoAllow`, let explicit sandbox re-allows remove matching built-in default-deny tools, and keep sandbox explain/error guidance aligned with the effective sandbox tool policy. (#54492) Thanks @ngutman.
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import type { ImageContent } from "@mariozechner/pi-ai";
2+
import { beforeEach, describe, expect, it, vi } from "vitest";
3+
import { MAX_IMAGE_BYTES } from "../media/constants.js";
4+
import { loadPromptRefImages } from "./cli-runner/helpers.js";
5+
import * as promptImageUtils from "./pi-embedded-runner/run/images.js";
6+
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
7+
import * as toolImages from "./tool-images.js";
8+
9+
describe("loadPromptRefImages", () => {
10+
beforeEach(() => {
11+
vi.restoreAllMocks();
12+
});
13+
14+
it("returns empty results when the prompt has no image refs", async () => {
15+
const loadImageFromRefSpy = vi.spyOn(promptImageUtils, "loadImageFromRef");
16+
const sanitizeImageBlocksSpy = vi.spyOn(toolImages, "sanitizeImageBlocks");
17+
18+
await expect(
19+
loadPromptRefImages({
20+
prompt: "just text",
21+
workspaceDir: "/workspace",
22+
}),
23+
).resolves.toEqual([]);
24+
25+
expect(loadImageFromRefSpy).not.toHaveBeenCalled();
26+
expect(sanitizeImageBlocksSpy).not.toHaveBeenCalled();
27+
});
28+
29+
it("passes the max-byte guardrail through load and sanitize", async () => {
30+
const loadedImage: ImageContent = {
31+
type: "image",
32+
data: "c29tZS1pbWFnZQ==",
33+
mimeType: "image/png",
34+
};
35+
const sanitizedImage: ImageContent = {
36+
type: "image",
37+
data: "c2FuaXRpemVkLWltYWdl",
38+
mimeType: "image/jpeg",
39+
};
40+
const sandbox = {
41+
root: "/sandbox",
42+
bridge: {} as SandboxFsBridge,
43+
};
44+
45+
const loadImageFromRefSpy = vi
46+
.spyOn(promptImageUtils, "loadImageFromRef")
47+
.mockResolvedValueOnce(loadedImage);
48+
const sanitizeImageBlocksSpy = vi
49+
.spyOn(toolImages, "sanitizeImageBlocks")
50+
.mockResolvedValueOnce({ images: [sanitizedImage], dropped: 0 });
51+
52+
const result = await loadPromptRefImages({
53+
prompt: "Look at /tmp/photo.png",
54+
workspaceDir: "/workspace",
55+
workspaceOnly: true,
56+
sandbox,
57+
});
58+
59+
const [ref, workspaceDir, options] = loadImageFromRefSpy.mock.calls[0] ?? [];
60+
expect(ref).toMatchObject({ resolved: "/tmp/photo.png", type: "path" });
61+
expect(workspaceDir).toBe("/workspace");
62+
expect(options).toEqual({
63+
maxBytes: MAX_IMAGE_BYTES,
64+
workspaceOnly: true,
65+
sandbox,
66+
});
67+
expect(sanitizeImageBlocksSpy).toHaveBeenCalledWith([loadedImage], "prompt:images", {
68+
maxBytes: MAX_IMAGE_BYTES,
69+
});
70+
expect(result).toEqual([sanitizedImage]);
71+
});
72+
73+
it("dedupes repeated refs and skips failed loads before sanitizing", async () => {
74+
const loadedImage: ImageContent = {
75+
type: "image",
76+
data: "b25lLWltYWdl",
77+
mimeType: "image/png",
78+
};
79+
80+
const loadImageFromRefSpy = vi
81+
.spyOn(promptImageUtils, "loadImageFromRef")
82+
.mockResolvedValueOnce(loadedImage)
83+
.mockResolvedValueOnce(null);
84+
const sanitizeImageBlocksSpy = vi
85+
.spyOn(toolImages, "sanitizeImageBlocks")
86+
.mockResolvedValueOnce({ images: [loadedImage], dropped: 0 });
87+
88+
const result = await loadPromptRefImages({
89+
prompt: "Compare /tmp/a.png with /tmp/a.png and /tmp/b.png",
90+
workspaceDir: "/workspace",
91+
});
92+
93+
expect(loadImageFromRefSpy).toHaveBeenCalledTimes(2);
94+
expect(
95+
loadImageFromRefSpy.mock.calls.map(
96+
(call) => (call[0] as { resolved?: string } | undefined)?.resolved,
97+
),
98+
).toEqual(["/tmp/a.png", "/tmp/b.png"]);
99+
expect(sanitizeImageBlocksSpy).toHaveBeenCalledWith([loadedImage], "prompt:images", {
100+
maxBytes: MAX_IMAGE_BYTES,
101+
});
102+
expect(result).toEqual([loadedImage]);
103+
});
104+
});

src/agents/cli-runner.test.ts

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@ import os from "node:os";
33
import path from "node:path";
44
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
55
import type { OpenClawConfig } from "../config/config.js";
6+
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
67
import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js";
78
import type { EmbeddedContextFile } from "./pi-embedded-helpers.js";
89
import type { WorkspaceBootstrapFile } from "./workspace.js";
910

1011
const supervisorSpawnMock = vi.fn();
1112
const enqueueSystemEventMock = vi.fn();
1213
const requestHeartbeatNowMock = vi.fn();
14+
const SMALL_PNG_BASE64 =
15+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
1316
const hoisted = vi.hoisted(() => {
1417
type BootstrapContext = {
1518
bootstrapFiles: WorkspaceBootstrapFile[];
@@ -325,6 +328,135 @@ describe("runCliAgent with process supervisor", () => {
325328
expect(promptCarrier).toContain("hi");
326329
});
327330

331+
it("hydrates prompt media refs into CLI image args", async () => {
332+
supervisorSpawnMock.mockResolvedValueOnce(
333+
createManagedRun({
334+
reason: "exit",
335+
exitCode: 0,
336+
exitSignal: null,
337+
durationMs: 50,
338+
stdout: "ok",
339+
stderr: "",
340+
timedOut: false,
341+
noOutputTimedOut: false,
342+
}),
343+
);
344+
345+
const tempDir = await fs.mkdtemp(
346+
path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-"),
347+
);
348+
const sourceImage = path.join(tempDir, "bb-image.png");
349+
await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
350+
351+
try {
352+
await runCliAgent({
353+
sessionId: "s1",
354+
sessionFile: "/tmp/session.jsonl",
355+
workspaceDir: tempDir,
356+
prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
357+
provider: "codex-cli",
358+
model: "gpt-5.2-codex",
359+
timeoutMs: 1_000,
360+
runId: "run-prompt-image",
361+
});
362+
} finally {
363+
await fs.rm(tempDir, { recursive: true, force: true });
364+
}
365+
366+
const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
367+
const argv = input.argv ?? [];
368+
const imageArgIndex = argv.indexOf("--image");
369+
expect(imageArgIndex).toBeGreaterThanOrEqual(0);
370+
expect(argv[imageArgIndex + 1]).toContain("openclaw-cli-images-");
371+
expect(argv[imageArgIndex + 1]).not.toBe(sourceImage);
372+
});
373+
374+
it("appends hydrated prompt media refs to generic backend prompts", async () => {
375+
supervisorSpawnMock.mockResolvedValueOnce(
376+
createManagedRun({
377+
reason: "exit",
378+
exitCode: 0,
379+
exitSignal: null,
380+
durationMs: 50,
381+
stdout: "ok",
382+
stderr: "",
383+
timedOut: false,
384+
noOutputTimedOut: false,
385+
}),
386+
);
387+
388+
const tempDir = await fs.mkdtemp(
389+
path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-generic-"),
390+
);
391+
const sourceImage = path.join(tempDir, "claude-image.png");
392+
await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
393+
394+
try {
395+
await runCliAgent({
396+
sessionId: "s1",
397+
sessionFile: "/tmp/session.jsonl",
398+
workspaceDir: tempDir,
399+
prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
400+
provider: "claude-cli",
401+
model: "claude-opus-4-1",
402+
timeoutMs: 1_000,
403+
runId: "run-prompt-image-generic",
404+
});
405+
} finally {
406+
await fs.rm(tempDir, { recursive: true, force: true });
407+
}
408+
409+
const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; input?: string };
410+
const argv = input.argv ?? [];
411+
expect(argv).not.toContain("--image");
412+
const promptCarrier = [input.input ?? "", ...argv].join("\n");
413+
const appendedPath = argv.find((value) => value.includes("openclaw-cli-images-"));
414+
expect(appendedPath).toBeDefined();
415+
expect(appendedPath).not.toBe(sourceImage);
416+
expect(promptCarrier).toContain(appendedPath ?? "");
417+
});
418+
419+
it("prefers explicit images over prompt refs", async () => {
420+
supervisorSpawnMock.mockResolvedValueOnce(
421+
createManagedRun({
422+
reason: "exit",
423+
exitCode: 0,
424+
exitSignal: null,
425+
durationMs: 50,
426+
stdout: "ok",
427+
stderr: "",
428+
timedOut: false,
429+
noOutputTimedOut: false,
430+
}),
431+
);
432+
433+
const tempDir = await fs.mkdtemp(
434+
path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-explicit-images-"),
435+
);
436+
const sourceImage = path.join(tempDir, "ignored-prompt-image.png");
437+
await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
438+
439+
try {
440+
await runCliAgent({
441+
sessionId: "s1",
442+
sessionFile: "/tmp/session.jsonl",
443+
workspaceDir: tempDir,
444+
prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
445+
images: [{ type: "image", data: SMALL_PNG_BASE64, mimeType: "image/png" }],
446+
provider: "codex-cli",
447+
model: "gpt-5.2-codex",
448+
timeoutMs: 1_000,
449+
runId: "run-explicit-image-precedence",
450+
});
451+
} finally {
452+
await fs.rm(tempDir, { recursive: true, force: true });
453+
}
454+
455+
const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
456+
const argv = input.argv ?? [];
457+
expect(argv.filter((arg) => arg === "--image")).toHaveLength(1);
458+
});
459+
328460
it("fails with timeout when no-output watchdog trips", async () => {
329461
supervisorSpawnMock.mockResolvedValueOnce(
330462
createManagedRun({

src/agents/cli-runner.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import {
2727
buildCliArgs,
2828
buildSystemPrompt,
2929
enqueueCliRun,
30+
loadPromptRefImages,
3031
normalizeCliModel,
3132
parseCliJson,
3233
parseCliJsonl,
@@ -222,8 +223,12 @@ export async function runCliAgent(params: {
222223
let prompt = prependBootstrapPromptWarning(params.prompt, bootstrapPromptWarning.lines, {
223224
preserveExactPrompt: heartbeatPrompt,
224225
});
225-
if (params.images && params.images.length > 0) {
226-
const imagePayload = await writeCliImages(params.images);
226+
const resolvedImages =
227+
params.images && params.images.length > 0
228+
? params.images
229+
: await loadPromptRefImages({ prompt, workspaceDir });
230+
if (resolvedImages.length > 0) {
231+
const imagePayload = await writeCliImages(resolvedImages);
227232
imagePaths = imagePayload.paths;
228233
cleanupImages = imagePayload.cleanup;
229234
if (!backend.imageArg) {

src/agents/cli-runner/helpers.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,19 @@ import { KeyedAsyncQueue } from "openclaw/plugin-sdk/keyed-async-queue";
88
import type { ThinkLevel } from "../../auto-reply/thinking.js";
99
import type { OpenClawConfig } from "../../config/config.js";
1010
import type { CliBackendConfig } from "../../config/types.js";
11+
import { MAX_IMAGE_BYTES } from "../../media/constants.js";
1112
import { buildTtsSystemPromptHint } from "../../tts/tts.js";
1213
import { isRecord } from "../../utils.js";
1314
import { buildModelAliasLines } from "../model-alias-lines.js";
1415
import { resolveDefaultModelForAgent } from "../model-selection.js";
1516
import { resolveOwnerDisplaySetting } from "../owner-display.js";
1617
import type { EmbeddedContextFile } from "../pi-embedded-helpers.js";
18+
import { detectImageReferences, loadImageFromRef } from "../pi-embedded-runner/run/images.js";
19+
import type { SandboxFsBridge } from "../sandbox/fs-bridge.js";
1720
import { detectRuntimeShell } from "../shell-utils.js";
1821
import { buildSystemPromptParams } from "../system-prompt-params.js";
1922
import { buildAgentSystemPrompt } from "../system-prompt.js";
23+
import { sanitizeImageBlocks } from "../tool-images.js";
2024
export { buildCliSupervisorScopeKey, resolveCliNoOutputTimeoutMs } from "./reliability.js";
2125

2226
const CLI_RUN_QUEUE = new KeyedAsyncQueue();
@@ -324,6 +328,43 @@ export function appendImagePathsToPrompt(prompt: string, paths: string[]): strin
324328
return `${trimmed}${separator}${paths.join("\n")}`;
325329
}
326330

331+
export async function loadPromptRefImages(params: {
332+
prompt: string;
333+
workspaceDir: string;
334+
maxBytes?: number;
335+
workspaceOnly?: boolean;
336+
sandbox?: { root: string; bridge: SandboxFsBridge };
337+
}): Promise<ImageContent[]> {
338+
const refs = detectImageReferences(params.prompt);
339+
if (refs.length === 0) {
340+
return [];
341+
}
342+
343+
const maxBytes = params.maxBytes ?? MAX_IMAGE_BYTES;
344+
const seen = new Set<string>();
345+
const images: ImageContent[] = [];
346+
for (const ref of refs) {
347+
const key = `${ref.type}:${ref.resolved}`;
348+
if (seen.has(key)) {
349+
continue;
350+
}
351+
seen.add(key);
352+
const image = await loadImageFromRef(ref, params.workspaceDir, {
353+
maxBytes,
354+
workspaceOnly: params.workspaceOnly,
355+
sandbox: params.sandbox,
356+
});
357+
if (image) {
358+
images.push(image);
359+
}
360+
}
361+
362+
const { images: sanitizedImages } = await sanitizeImageBlocks(images, "prompt:images", {
363+
maxBytes,
364+
});
365+
return sanitizedImages;
366+
}
367+
327368
export async function writeCliImages(
328369
images: ImageContent[],
329370
): Promise<{ paths: string[]; cleanup: () => Promise<void> }> {

0 commit comments

Comments
 (0)