Skip to content

Commit 9fbbb80

Browse files
committed
fix(pdf): bound remote body reads
1 parent 8284c03 commit 9fbbb80

4 files changed

Lines changed: 110 additions & 1 deletion

File tree

src/agents/tools/pdf-tool.test.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,11 +388,35 @@ describe("createPdfTool", () => {
388388
const [loadRef, loadOptions] = firstMockCall(loadSpy, "loadWebMediaRaw");
389389
expect(loadRef).toBe("http://198.18.0.153/doc.pdf");
390390
expectFields(loadOptions, {
391+
readIdleTimeoutMs: 120_000,
391392
ssrfPolicy: { allowRfc2544BenchmarkRange: true },
392393
});
393394
});
394395
});
395396

397+
it("passes the shared remote read idle timeout when loading remote PDFs", async () => {
398+
await withTempPdfAgentDir(async (agentDir) => {
399+
const { loadSpy } = await stubPdfToolInfra(agentDir, {
400+
provider: "anthropic",
401+
input: ["text", "document"],
402+
});
403+
vi.spyOn(pdfNativeProviders, "anthropicAnalyzePdf").mockResolvedValue("native summary");
404+
const cfg = withPdfModel(ANTHROPIC_PDF_MODEL);
405+
const tool = requirePdfTool((await loadCreatePdfTool())({ config: cfg, agentDir }));
406+
407+
await tool.execute("t1", {
408+
prompt: "summarize",
409+
pdf: "https://example.com/stalled.pdf",
410+
});
411+
412+
const [loadRef, loadOptions] = firstMockCall(loadSpy, "loadWebMediaRaw");
413+
expect(loadRef).toBe("https://example.com/stalled.pdf");
414+
expectFields(loadOptions, {
415+
readIdleTimeoutMs: 120_000,
416+
});
417+
});
418+
});
419+
396420
it("allows managed inbound absolute PDF paths when workspaceOnly is enabled", async () => {
397421
await withManagedInboundPdf(async ({ mediaPath }) => {
398422
await withTempPdfAgentDir(async (agentDir) => {

src/agents/tools/pdf-tool.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ const DEFAULT_PROMPT = "Analyze this PDF document.";
5252
const DEFAULT_MAX_PDFS = 10;
5353
const DEFAULT_MAX_BYTES_MB = 10;
5454
const DEFAULT_MAX_PAGES = 20;
55+
const PDF_REMOTE_READ_IDLE_TIMEOUT_MS = 120_000;
5556

5657
const PDF_MIN_TEXT_CHARS = 200;
5758
const PDF_MAX_PIXELS = 4_000_000;
@@ -444,6 +445,7 @@ export function createPdfTool(options?: {
444445
: await loadWebMediaRaw(resolvedPathInfo.resolved, {
445446
maxBytes,
446447
localRoots,
448+
...(isHttpUrl ? { readIdleTimeoutMs: PDF_REMOTE_READ_IDLE_TIMEOUT_MS } : {}),
447449
ssrfPolicy: remoteMediaSsrfPolicy,
448450
});
449451

src/media/web-media.test.ts

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { resetPluginRuntimeStateForTest, setActivePluginRegistry } from "../plug
1010

1111
let LocalMediaAccessError: typeof import("./web-media.js").LocalMediaAccessError;
1212
let loadWebMedia: typeof import("./web-media.js").loadWebMedia;
13+
let loadWebMediaRaw: typeof import("./web-media.js").loadWebMediaRaw;
1314
let optimizeImageToJpeg: typeof import("./web-media.js").optimizeImageToJpeg;
1415

1516
const TINY_PNG_BASE64 =
@@ -39,7 +40,8 @@ function installCanvasMediaResolver() {
3940
}
4041

4142
beforeAll(async () => {
42-
({ LocalMediaAccessError, loadWebMedia, optimizeImageToJpeg } = await import("./web-media.js"));
43+
({ LocalMediaAccessError, loadWebMedia, loadWebMediaRaw, optimizeImageToJpeg } =
44+
await import("./web-media.js"));
4345
fixtureRoot = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "web-media-core-"));
4446
tinyPngFile = path.join(fixtureRoot, "tiny.png");
4547
await fs.writeFile(tinyPngFile, Buffer.from(TINY_PNG_BASE64, "base64"));
@@ -75,6 +77,47 @@ afterAll(async () => {
7577
});
7678

7779
describe("loadWebMedia", () => {
80+
function makeStallingFetch(firstChunk: Uint8Array) {
81+
return vi.fn(
82+
async () =>
83+
new Response(
84+
new ReadableStream<Uint8Array>({
85+
start(controller) {
86+
controller.enqueue(firstChunk);
87+
},
88+
}),
89+
{
90+
status: 200,
91+
headers: { "content-type": "application/pdf" },
92+
},
93+
),
94+
);
95+
}
96+
97+
async function expectWebMediaIdleTimeout(
98+
createLoadPromise: () => Promise<unknown>,
99+
idleTimeoutMs: number,
100+
) {
101+
vi.useFakeTimers();
102+
try {
103+
const outcome = createLoadPromise().then(
104+
() => ({ status: "resolved" as const }),
105+
(error: unknown) => ({ status: "rejected" as const, error }),
106+
);
107+
await vi.advanceTimersByTimeAsync(idleTimeoutMs + 5);
108+
await expect(
109+
Promise.race([outcome, Promise.resolve({ status: "pending" as const })]),
110+
).resolves.toMatchObject({ status: "rejected" });
111+
const result = await outcome;
112+
expect(result.status).toBe("rejected");
113+
if (result.status === "rejected") {
114+
expect(String(result.error)).toMatch(/stalled|no data received/i);
115+
}
116+
} finally {
117+
vi.useRealTimers();
118+
}
119+
}
120+
78121
function createLocalWebMediaOptions() {
79122
return {
80123
maxBytes: 1024 * 1024,
@@ -689,6 +732,43 @@ describe("loadWebMedia", () => {
689732
}
690733
});
691734

735+
it("applies the shared remote read idle timeout for raw web media loads", async () => {
736+
const readIdleTimeoutMs = 20;
737+
const fetchImpl = makeStallingFetch(new Uint8Array([0x25, 0x50, 0x44, 0x46]));
738+
739+
await expectWebMediaIdleTimeout(
740+
() =>
741+
loadWebMediaRaw("https://example.test/stalled.pdf", {
742+
maxBytes: 1024 * 1024,
743+
fetchImpl,
744+
readIdleTimeoutMs,
745+
ssrfPolicy: { allowedHostnames: ["example.test"] },
746+
}),
747+
readIdleTimeoutMs,
748+
);
749+
});
750+
751+
it("loads a valid remote PDF when the raw web media read stays active", async () => {
752+
const fetchImpl = vi.fn(
753+
async () =>
754+
new Response(Buffer.from("%PDF-1.4\n%%EOF"), {
755+
status: 200,
756+
headers: { "content-type": "application/pdf" },
757+
}),
758+
);
759+
760+
const result = await loadWebMediaRaw("https://example.test/ok.pdf", {
761+
maxBytes: 1024 * 1024,
762+
fetchImpl,
763+
readIdleTimeoutMs: 20,
764+
ssrfPolicy: { allowedHostnames: ["example.test"] },
765+
});
766+
767+
expect(result.kind).toBe("document");
768+
expect(result.contentType).toBe("application/pdf");
769+
expect(result.buffer.toString()).toContain("%PDF-1.4");
770+
});
771+
692772
it("rejects unsupported media store URI locations", async () => {
693773
await expectLoadWebMediaErrorCode(
694774
loadWebMedia("media://outbound/tiny.png"),

src/media/web-media.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ type WebMediaOptions = {
4949
proxyUrl?: string;
5050
fetchImpl?: (input: RequestInfo | URL, init?: RequestInit) => Promise<Response>;
5151
requestInit?: RequestInit;
52+
readIdleTimeoutMs?: number;
5253
trustExplicitProxyDns?: boolean;
5354
workspaceDir?: string;
5455
/** Allowed root directories for local path reads. "any" is deprecated; prefer sandboxValidated + readFile. */
@@ -388,6 +389,7 @@ async function loadWebMediaInternal(
388389
proxyUrl,
389390
fetchImpl,
390391
requestInit,
392+
readIdleTimeoutMs,
391393
trustExplicitProxyDns,
392394
workspaceDir,
393395
localRoots,
@@ -521,6 +523,7 @@ async function loadWebMediaInternal(
521523
url: mediaUrl,
522524
fetchImpl,
523525
requestInit,
526+
readIdleTimeoutMs,
524527
maxBytes: fetchCap,
525528
ssrfPolicy,
526529
dispatcherPolicy,

0 commit comments

Comments
 (0)