openclaw
diff --git a/‎src/agents/memory-search.ts‎
Lines changed: 11 additions & 4 deletions b/‎src/agents/memory-search.ts‎
Lines changed: 11 additions & 4 deletions
diff --git a/‎src/config/schema.ts‎
Lines changed: 2 additions & 1 deletion b/‎src/config/schema.ts‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/config/types.tools.ts‎
Lines changed: 2 additions & 2 deletions b/‎src/config/types.tools.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/config/zod-schema.agent-runtime.ts‎
Lines changed: 10 additions & 2 deletions b/‎src/config/zod-schema.agent-runtime.ts‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎src/memory/batch-voyage.test.ts‎
Lines changed: 170 additions & 0 deletions b/‎src/memory/batch-voyage.test.ts‎
Lines changed: 170 additions & 0 deletions
@@ -9,7 +9,7 @@ export type ResolvedMemorySearchConfig = {
   enabled: boolean;
   sources: Array<"memory" | "sessions">;
   extraPaths: string[];
-  provider: "openai" | "local" | "gemini" | "auto";
+  provider: "openai" | "local" | "gemini" | "voyage" | "auto";
   remote?: {
     baseUrl?: string;
     apiKey?: string;
@@ -25,7 +25,7 @@ export type ResolvedMemorySearchConfig = {
   experimental: {
     sessionMemory: boolean;
   };
-  fallback: "openai" | "gemini" | "local" | "none";
+  fallback: "openai" | "gemini" | "local" | "voyage" | "none";
   model: string;
   local: {
     modelPath?: string;
@@ -72,6 +72,7 @@ export type ResolvedMemorySearchConfig = {
 
 const DEFAULT_OPENAI_MODEL = "text-embedding-3-small";
 const DEFAULT_GEMINI_MODEL = "gemini-embedding-001";
+const DEFAULT_VOYAGE_MODEL = "voyage-4-large";
 const DEFAULT_CHUNK_TOKENS = 400;
 const DEFAULT_CHUNK_OVERLAP = 80;
 const DEFAULT_WATCH_DEBOUNCE_MS = 1500;
@@ -136,7 +137,11 @@ function mergeConfig(
     defaultRemote?.headers,
   );
   const includeRemote =
-    hasRemoteConfig || provider === "openai" || provider === "gemini" || provider === "auto";
+    hasRemoteConfig ||
+    provider === "openai" ||
+    provider === "gemini" ||
+    provider === "voyage" ||
+    provider === "auto";
   const batch = {
     enabled: overrideRemote?.batch?.enabled ?? defaultRemote?.batch?.enabled ?? true,
     wait: overrideRemote?.batch?.wait ?? defaultRemote?.batch?.wait ?? true,
@@ -163,7 +168,9 @@ function mergeConfig(
       ? DEFAULT_GEMINI_MODEL
       : provider === "openai"
         ? DEFAULT_OPENAI_MODEL
-        : undefined;
+        : provider === "voyage"
+          ? DEFAULT_VOYAGE_MODEL
+          : undefined;
   const model = overrides?.model ?? defaults?.model ?? modelDefault ?? "";
   const local = {
     modelPath: overrides?.local?.modelPath ?? defaults?.local?.modelPath,
 
@@ -542,7 +542,8 @@ const FIELD_HELP: Record<string, string> = {
     "Extra paths to include in memory search (directories or .md files; relative paths resolved from workspace).",
   "agents.defaults.memorySearch.experimental.sessionMemory":
     "Enable experimental session transcript indexing for memory search (default: false).",
-  "agents.defaults.memorySearch.provider": 'Embedding provider ("openai", "gemini", or "local").',
+  "agents.defaults.memorySearch.provider":
+    'Embedding provider ("openai", "gemini", "voyage", or "local").',
   "agents.defaults.memorySearch.remote.baseUrl":
     "Custom base URL for remote embeddings (OpenAI-compatible proxies or Gemini overrides).",
   "agents.defaults.memorySearch.remote.apiKey": "Custom API key for the remote embedding provider.",
 
@@ -234,7 +234,7 @@ export type MemorySearchConfig = {
     sessionMemory?: boolean;
   };
   /** Embedding provider mode. */
-  provider?: "openai" | "gemini" | "local";
+  provider?: "openai" | "gemini" | "local" | "voyage";
   remote?: {
     baseUrl?: string;
     apiKey?: string;
@@ -253,7 +253,7 @@ export type MemorySearchConfig = {
     };
   };
   /** Fallback behavior when embeddings fail. */
-  fallback?: "openai" | "gemini" | "local" | "none";
+  fallback?: "openai" | "gemini" | "local" | "voyage" | "none";
   /** Embedding model id (remote) or alias (local). */
   model?: string;
   /** Local embedding settings (node-llama-cpp). */
 
@@ -318,7 +318,9 @@ export const MemorySearchSchema = z
       })
       .strict()
       .optional(),
-    provider: z.union([z.literal("openai"), z.literal("local"), z.literal("gemini")]).optional(),
+    provider: z
+      .union([z.literal("openai"), z.literal("local"), z.literal("gemini"), z.literal("voyage")])
+      .optional(),
     remote: z
       .object({
         baseUrl: z.string().optional(),
@@ -338,7 +340,13 @@ export const MemorySearchSchema = z
       .strict()
       .optional(),
     fallback: z
-      .union([z.literal("openai"), z.literal("gemini"), z.literal("local"), z.literal("none")])
+      .union([
+        z.literal("openai"),
+        z.literal("gemini"),
+        z.literal("local"),
+        z.literal("voyage"),
+        z.literal("none"),
+      ])
       .optional(),
     model: z.string().optional(),
     local: z
 
@@ -0,0 +1,170 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { ReadableStream } from "node:stream/web";
+import type { VoyageBatchOutputLine, VoyageBatchRequest } from "./batch-voyage.js";
+import type { VoyageEmbeddingClient } from "./embeddings-voyage.js";
+
+// Mock internal.js if needed, but runWithConcurrency is simple enough to keep real.
+// We DO need to mock retryAsync to avoid actual delays/retries logic complicating tests
+vi.mock("../infra/retry.js", () => ({
+  retryAsync: async <T>(fn: () => Promise<T>) => fn(),
+}));
+
+describe("runVoyageEmbeddingBatches", () => {
+  afterEach(() => {
+    vi.resetAllMocks();
+    vi.unstubAllGlobals();
+  });
+
+  const mockClient: VoyageEmbeddingClient = {
+    baseUrl: "https://api.voyageai.com/v1",
+    headers: { Authorization: "Bearer test-key" },
+    model: "voyage-4-large",
+  };
+
+  const mockRequests: VoyageBatchRequest[] = [
+    { custom_id: "req-1", body: { input: "text1" } },
+    { custom_id: "req-2", body: { input: "text2" } },
+  ];
+
+  it("successfully submits batch, waits, and streams results", async () => {
+    const fetchMock = vi.fn();
+    vi.stubGlobal("fetch", fetchMock);
+
+    // Sequence of fetch calls:
+    // 1. Upload file
+    fetchMock.mockResolvedValueOnce({
+      ok: true,
+      json: async () => ({ id: "file-123" }),
+    });
+
+    // 2. Create batch
+    fetchMock.mockResolvedValueOnce({
+      ok: true,
+      json: async () => ({ id: "batch-abc", status: "pending" }),
+    });
+
+    // 3. Poll status (pending) - Optional depending on wait loop, let's say it finishes immediately for this test
+    // Actually the code does: initial check (if completed) -> wait loop.
+    // If create returns "pending", it enters waitForVoyageBatch.
+    // waitForVoyageBatch fetches status.
+
+    // 3. Poll status (completed)
+    fetchMock.mockResolvedValueOnce({
+      ok: true,
+      json: async () => ({
+        id: "batch-abc",
+        status: "completed",
+        output_file_id: "file-out-999",
+      }),
+    });
+
+    // 4. Download content (Streaming)
+    const outputLines: VoyageBatchOutputLine[] = [
+      {
+        custom_id: "req-1",
+        response: { status_code: 200, body: { data: [{ embedding: [0.1, 0.1] }] } },
+      },
+      {
+        custom_id: "req-2",
+        response: { status_code: 200, body: { data: [{ embedding: [0.2, 0.2] }] } },
+      },
+    ];
+
+    // Create a stream that emits the NDJSON lines
+    const stream = new ReadableStream({
+      start(controller) {
+        const text = outputLines.map((l) => JSON.stringify(l)).join("\n");
+        controller.enqueue(new TextEncoder().encode(text));
+        controller.close();
+      },
+    });
+
+    fetchMock.mockResolvedValueOnce({
+      ok: true,
+      body: stream,
+    });
+
+    const { runVoyageEmbeddingBatches } = await import("./batch-voyage.js");
+
+    const results = await runVoyageEmbeddingBatches({
+      client: mockClient,
+      agentId: "agent-1",
+      requests: mockRequests,
+      wait: true,
+      pollIntervalMs: 1, // fast poll
+      timeoutMs: 1000,
+      concurrency: 1,
+    });
+
+    expect(results.size).toBe(2);
+    expect(results.get("req-1")).toEqual([0.1, 0.1]);
+    expect(results.get("req-2")).toEqual([0.2, 0.2]);
+
+    // Verify calls
+    expect(fetchMock).toHaveBeenCalledTimes(4);
+
+    // Verify File Upload
+    expect(fetchMock.mock.calls[0][0]).toContain("/files");
+    const uploadBody = fetchMock.mock.calls[0][1].body as FormData;
+    expect(uploadBody).toBeInstanceOf(FormData);
+    expect(uploadBody.get("purpose")).toBe("batch");
+
+    // Verify Batch Create
+    expect(fetchMock.mock.calls[1][0]).toContain("/batches");
+    const createBody = JSON.parse(fetchMock.mock.calls[1][1].body);
+    expect(createBody.input_file_id).toBe("file-123");
+    expect(createBody.completion_window).toBe("12h");
+
+    // Verify Content Fetch
+    expect(fetchMock.mock.calls[3][0]).toContain("/files/file-out-999/content");
+  });
+
+  it("handles empty lines and stream chunks correctly", async () => {
+    const fetchMock = vi.fn();
+    vi.stubGlobal("fetch", fetchMock);
+
+    // 1. Upload
+    fetchMock.mockResolvedValueOnce({ ok: true, json: async () => ({ id: "f1" }) });
+    // 2. Create (completed immediately)
+    fetchMock.mockResolvedValueOnce({
+      ok: true,
+      json: async () => ({ id: "b1", status: "completed", output_file_id: "out1" }),
+    });
+    // 3. Download Content (Streaming with chunks and newlines)
+    const stream = new ReadableStream({
+      start(controller) {
+        const line1 = JSON.stringify({
+          custom_id: "req-1",
+          response: { body: { data: [{ embedding: [1] }] } },
+        });
+        const line2 = JSON.stringify({
+          custom_id: "req-2",
+          response: { body: { data: [{ embedding: [2] }] } },
+        });
+
+        // Split across chunks
+        controller.enqueue(new TextEncoder().encode(line1 + "\n"));
+        controller.enqueue(new TextEncoder().encode("\n")); // empty line
+        controller.enqueue(new TextEncoder().encode(line2)); // no newline at EOF
+        controller.close();
+      },
+    });
+
+    fetchMock.mockResolvedValueOnce({ ok: true, body: stream });
+
+    const { runVoyageEmbeddingBatches } = await import("./batch-voyage.js");
+
+    const results = await runVoyageEmbeddingBatches({
+      client: mockClient,
+      agentId: "a1",
+      requests: mockRequests,
+      wait: true,
+      pollIntervalMs: 1,
+      timeoutMs: 1000,
+      concurrency: 1,
+    });
+
+    expect(results.get("req-1")).toEqual([1]);
+    expect(results.get("req-2")).toEqual([2]);
+  });
+});