fix: adapt OpenAI batch upload sizing

shakkernerd · jalehman · commit 66d362a56dfb · 2026-06-09T06:29:03.000-04:00
diff --git a/extensions/openai/embedding-batch.test.ts b/extensions/openai/embedding-batch.test.ts
@@ -4,9 +4,9 @@ import { parseOpenAiBatchOutput, runOpenAiEmbeddingBatches } from "./embedding-b
 
 const jsonlEncoder = new TextEncoder();
 
-function jsonResponse(body: unknown): Response {
+function jsonResponse(body: unknown, status = 200): Response {
   return new Response(JSON.stringify(body), {
-    status: 200,
+    status,
     headers: { "Content-Type": "application/json" },
   });
 }
@@ -129,4 +129,118 @@ describe("OpenAI embedding batch output", () => {
       ["2", [3]],
     ]);
   });
+
+  it("adapts OpenAI-compatible upload groups after payload-size rejection", async () => {
+    const requests: Parameters<typeof runOpenAiEmbeddingBatches>[0]["requests"] = Array.from(
+      { length: 4 },
+      (_, index) => ({
+        custom_id: String(index),
+        method: "POST" as const,
+        url: "/v1/embeddings",
+        body: {
+          model: "text-embedding-3-small",
+          input: `payload-${index}`,
+        },
+      }),
+    );
+    const uploadedGroups: string[][] = [];
+    const requestsByFileId = new Map<string, Array<{ custom_id?: string }>>();
+    const outputByFileId = new Map<string, string>();
+    const debug = vi.fn();
+    let fileIndex = 0;
+    let batchIndex = 0;
+    const fetchImpl = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
+      const url = fetchInputUrl(input);
+      if (url.endsWith("/files") && init?.method === "POST") {
+        const form = init.body as FormData;
+        const file = form.get("file");
+        if (!(file instanceof Blob)) {
+          throw new Error("missing batch upload file");
+        }
+        const uploadedRequests = (await file.text())
+          .split("\n")
+          .map((line) => JSON.parse(line) as { custom_id?: string });
+        const customIds = uploadedRequests.map((request) => request.custom_id ?? "");
+        uploadedGroups.push(customIds);
+        if (uploadedRequests.length > 2) {
+          return jsonResponse(
+            {
+              error: {
+                message: "Request body too large. Maximum allowed: 10 MB",
+                type: "payload_too_large",
+                code: "PAYLOAD_TOO_LARGE",
+              },
+            },
+            413,
+          );
+        }
+        const fileId = `file-${fileIndex}`;
+        fileIndex += 1;
+        requestsByFileId.set(fileId, uploadedRequests);
+        return jsonResponse({ id: fileId });
+      }
+      if (url.endsWith("/batches") && init?.method === "POST") {
+        const body = parseStringBody(init) as { input_file_id?: string };
+        const batchId = `batch-${batchIndex}`;
+        const outputFileId = `output-${batchIndex}`;
+        batchIndex += 1;
+        const uploadedRequests = requestsByFileId.get(body.input_file_id ?? "") ?? [];
+        outputByFileId.set(
+          outputFileId,
+          uploadedRequests
+            .map((request) =>
+              JSON.stringify({
+                custom_id: request.custom_id,
+                response: {
+                  status_code: 200,
+                  body: { data: [{ embedding: [Number(request.custom_id) + 1] }] },
+                },
+              }),
+            )
+            .join("\n"),
+        );
+        return jsonResponse({ id: batchId, status: "completed", output_file_id: outputFileId });
+      }
+      const contentMatch = url.match(/\/files\/([^/]+)\/content$/);
+      if (contentMatch) {
+        return new Response(outputByFileId.get(contentMatch[1] ?? "") ?? "", { status: 200 });
+      }
+      return new Response("unexpected request", { status: 500 });
+    });
+
+    const byCustomId = await runOpenAiEmbeddingBatches({
+      openAi: {
+        baseUrl: "https://openai-compatible.example/v1",
+        headers: { Authorization: "Bearer test" },
+        model: "text-embedding-3-small",
+        fetchImpl,
+      },
+      agentId: "main",
+      requests,
+      wait: true,
+      concurrency: 1,
+      pollIntervalMs: 1000,
+      timeoutMs: 60_000,
+      debug,
+    });
+
+    expect(uploadedGroups).toEqual([
+      ["0", "1", "2", "3"],
+      ["0", "1"],
+      ["2", "3"],
+    ]);
+    expect(debug).toHaveBeenCalledWith(
+      "memory embeddings: openai batch upload too large; splitting group",
+      expect.objectContaining({
+        requests: 4,
+        parts: [2, 2],
+      }),
+    );
+    expect([...byCustomId.entries()]).toEqual([
+      ["0", [1]],
+      ["1", [2]],
+      ["2", [3]],
+      ["3", [4]],
+    ]);
+  });
 });
diff --git a/extensions/openai/embedding-batch.ts b/extensions/openai/embedding-batch.ts
@@ -134,6 +134,25 @@ async function fetchOpenAiBatchResource<T>(params: {
   });
 }
 
+function formatOpenAiBatchError(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+
+function isOpenAiBatchUploadTooLargeError(error: unknown): boolean {
+  const message = formatOpenAiBatchError(error);
+  if (!/openai batch file upload failed/i.test(message)) {
+    return false;
+  }
+  return (
+    /\b413\b/.test(message) ||
+    /payload too large/i.test(message) ||
+    /request body too large/i.test(message) ||
+    /file too large/i.test(message) ||
+    /maximum allowed/i.test(message) ||
+    /max(?:imum)? (?:body|payload|file) (?:size )?(?:exceeded|limit)/i.test(message)
+  );
+}
+
 export function parseOpenAiBatchOutput(text: string): OpenAiBatchOutputLine[] {
   if (!text.trim()) {
     return [];
@@ -294,6 +313,15 @@ export async function runOpenAiEmbeddingBatches(
       maxJsonlBytes: params.maxJsonlBytes ?? OPENAI_BATCH_MAX_JSONL_BYTES,
       debugLabel: "memory embeddings: openai batch submit",
     }),
+    shouldSplitGroupOnError: isOpenAiBatchUploadTooLargeError,
+    onSplitGroup: ({ error, group, parts, depth }) => {
+      params.debug?.("memory embeddings: openai batch upload too large; splitting group", {
+        requests: group.length,
+        parts: parts.map((part) => part.length),
+        depth,
+        error: formatOpenAiBatchError(error),
+      });
+    },
     runGroup: async ({ group, groupIndex, groups, byCustomId, pollIntervalMs, timeoutMs }) => {
       const batchInfo = await submitOpenAiBatch({
         openAi: params.openAi,
diff --git a/packages/memory-host-sdk/src/host/batch-runner.test.ts b/packages/memory-host-sdk/src/host/batch-runner.test.ts
@@ -93,4 +93,65 @@ describe("buildEmbeddingBatchGroupOptions", () => {
 
     expect(groups).toEqual([["one", "two"], ["three"]]);
   });
+
+  it("splits provider-rejected batch groups when the error is splittable", async () => {
+    const uploadTooLarge = new Error("batch upload failed: 413 payload too large");
+    const calls: string[][] = [];
+    const onSplitGroup = vi.fn();
+
+    await runEmbeddingBatchGroups({
+      requests: ["one", "two", "three", "four"],
+      maxRequests: 100,
+      wait: true,
+      pollIntervalMs: 1000,
+      timeoutMs: 60_000,
+      concurrency: 1,
+      debugLabel: "embedding batch submit",
+      shouldSplitGroupOnError: (error) => error === uploadTooLarge,
+      onSplitGroup,
+      runGroup: async ({ group }) => {
+        calls.push([...group]);
+        if (group.length === 4) {
+          throw uploadTooLarge;
+        }
+      },
+    });
+
+    expect(calls).toEqual([
+      ["one", "two", "three", "four"],
+      ["one", "two"],
+      ["three", "four"],
+    ]);
+    expect(onSplitGroup).toHaveBeenCalledWith(
+      expect.objectContaining({
+        error: uploadTooLarge,
+        group: ["one", "two", "three", "four"],
+        parts: [
+          ["one", "two"],
+          ["three", "four"],
+        ],
+        depth: 0,
+      }),
+    );
+  });
+
+  it("does not split a single rejected batch request", async () => {
+    const uploadTooLarge = new Error("batch upload failed: 413 payload too large");
+
+    await expect(
+      runEmbeddingBatchGroups({
+        requests: ["one"],
+        maxRequests: 100,
+        wait: true,
+        pollIntervalMs: 1000,
+        timeoutMs: 60_000,
+        concurrency: 1,
+        debugLabel: "embedding batch submit",
+        shouldSplitGroupOnError: () => true,
+        runGroup: async () => {
+          throw uploadTooLarge;
+        },
+      }),
+    ).rejects.toThrow(uploadTooLarge);
+  });
 });
diff --git a/packages/memory-host-sdk/src/host/batch-runner.ts b/packages/memory-host-sdk/src/host/batch-runner.ts
@@ -14,6 +14,24 @@ export type EmbeddingBatchExecutionParams = {
   debug?: (message: string, data?: Record<string, unknown>) => void;
 };
 
+type EmbeddingBatchGroupRunArgs<TRequest> = {
+  group: TRequest[];
+  groupIndex: number;
+  groups: number;
+  byCustomId: Map<string, number[]>;
+  pollIntervalMs: number;
+  timeoutMs: number;
+};
+
+type EmbeddingBatchSplitArgs<TRequest> = {
+  error: unknown;
+  group: TRequest[];
+  parts: TRequest[][];
+  groupIndex: number;
+  groups: number;
+  depth: number;
+};
+
 /** Clamp polling to both configured poll interval and total timeout budget. */
 function resolveEmbeddingBatchPollIntervalMs(params: {
   pollIntervalMs: number;
@@ -40,14 +58,9 @@ export async function runEmbeddingBatchGroups<TRequest>(params: {
   concurrency: EmbeddingBatchExecutionParams["concurrency"];
   debugLabel: string;
   debug?: EmbeddingBatchExecutionParams["debug"];
-  runGroup: (args: {
-    group: TRequest[];
-    groupIndex: number;
-    groups: number;
-    byCustomId: Map<string, number[]>;
-    pollIntervalMs: number;
-    timeoutMs: number;
-  }) => Promise<void>;
+  shouldSplitGroupOnError?: (error: unknown, group: TRequest[]) => boolean;
+  onSplitGroup?: (args: EmbeddingBatchSplitArgs<TRequest>) => void;
+  runGroup: (args: EmbeddingBatchGroupRunArgs<TRequest>) => Promise<void>;
 }): Promise<Map<string, number[]>> {
   if (params.requests.length === 0) {
     return new Map();
@@ -58,15 +71,39 @@ export async function runEmbeddingBatchGroups<TRequest>(params: {
   });
   const byCustomId = new Map<string, number[]>();
   const pollIntervalMs = resolveEmbeddingBatchPollIntervalMs(params);
+  const runGroup = async (group: TRequest[], groupIndex: number, depth = 0): Promise<void> => {
+    try {
+      await params.runGroup({
+        group,
+        groupIndex,
+        groups: groups.length,
+        byCustomId,
+        pollIntervalMs,
+        timeoutMs: params.timeoutMs,
+      });
+    } catch (error) {
+      if (group.length <= 1 || !params.shouldSplitGroupOnError?.(error, group)) {
+        throw error;
+      }
+      const splitAt = Math.ceil(group.length / 2);
+      const parts = [group.slice(0, splitAt), group.slice(splitAt)].filter(
+        (part) => part.length > 0,
+      );
+      params.onSplitGroup?.({
+        error,
+        group,
+        parts,
+        groupIndex,
+        groups: groups.length,
+        depth,
+      });
+      for (const part of parts) {
+        await runGroup(part, groupIndex, depth + 1);
+      }
+    }
+  };
   const tasks = groups.map((group, groupIndex) => async () => {
-    await params.runGroup({
-      group,
-      groupIndex,
-      groups: groups.length,
-      byCustomId,
-      pollIntervalMs,
-      timeoutMs: params.timeoutMs,
-    });
+    await runGroup(group, groupIndex);
   });
 
   params.debug?.(params.debugLabel, {