fix(agents): parse prompt_tokens/completion_tokens in CLI usage output for llama.cpp (#77992)

Beandon13 · claude · Beandon13 · commit 383946ff45ff · 2026-05-06T08:46:23.000-04:00
llama.cpp and other OpenAI-compatible local providers return usage as
{ prompt_tokens, completion_tokens } instead of { input_tokens, output_tokens }.
The toCliUsage() function in cli-output.ts only accepted input_tokens /
output_tokens (and their camelCase aliases), so llama.cpp usage was silently
dropped and context display showed "?/131k" for all llama.cpp users.

Add prompt_tokens and completion_tokens as fallback keys for totalInput and
output respectively in toCliUsage(). Both parseCliJson and parseCliJsonl go
through this function, so the fix covers all CLI output parsing paths.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/agents/cli-output.test.ts b/src/agents/cli-output.test.ts
@@ -383,6 +383,43 @@ describe("parseCliJsonl", () => {
     });
   });
 
+  it("parses llama.cpp OpenAI-compatible prompt_tokens/completion_tokens usage fields (#77992)", () => {
+    // llama.cpp and other OpenAI-compatible local providers return prompt_tokens
+    // and completion_tokens instead of input_tokens and output_tokens. Without
+    // the fallback, context display shows "?/131k" for all llama.cpp users.
+    const result = parseCliJsonl(
+      [
+        JSON.stringify({ type: "init", session_id: "session-llamacpp" }),
+        JSON.stringify({
+          type: "result",
+          session_id: "session-llamacpp",
+          result: "Hello from llama.cpp",
+          usage: {
+            prompt_tokens: 11,
+            completion_tokens: 7,
+            total_tokens: 18,
+          },
+        }),
+      ].join("\n"),
+      {
+        command: "claude",
+        output: "jsonl",
+        sessionIdFields: ["session_id"],
+      },
+      "claude-cli",
+    );
+
+    expect(result).toMatchObject({
+      text: "Hello from llama.cpp",
+      sessionId: "session-llamacpp",
+      usage: {
+        input: 11,
+        output: 7,
+        total: 18,
+      },
+    });
+  });
+
   it("parses multiple JSON objects embedded on the same line", () => {
     const result = parseCliJsonl(
       '{"type":"init","session_id":"session-999"} {"type":"result","session_id":"session-999","result":"done"}',
diff --git a/src/agents/cli-output.ts b/src/agents/cli-output.ts
@@ -122,8 +122,15 @@ function toCliUsage(raw: Record<string, unknown>): CliUsage | undefined {
   };
   const pick = (key: string) =>
     typeof raw[key] === "number" && raw[key] > 0 ? raw[key] : undefined;
-  const totalInput = pick("input_tokens") ?? pick("inputTokens");
-  const output = pick("output_tokens") ?? pick("outputTokens");
+  // llama.cpp and other OpenAI-compatible providers use prompt_tokens /
+  // completion_tokens instead of input_tokens / output_tokens (#77992).
+  const totalInput =
+    pick("input_tokens") ?? pick("inputTokens") ?? pick("prompt_tokens") ?? pick("promptTokens");
+  const output =
+    pick("output_tokens") ??
+    pick("outputTokens") ??
+    pick("completion_tokens") ??
+    pick("completionTokens");
   const nestedCached =
     readNestedCached("input_tokens_details") ?? readNestedCached("prompt_tokens_details");
   const cacheRead =