Skip to content

Commit 383946f

Browse files
Beandon13claude
andcommitted
fix(agents): parse prompt_tokens/completion_tokens in CLI usage output for llama.cpp (#77992)
llama.cpp and other OpenAI-compatible local providers return usage as { prompt_tokens, completion_tokens } instead of { input_tokens, output_tokens }. The toCliUsage() function in cli-output.ts only accepted input_tokens / output_tokens (and their camelCase aliases), so llama.cpp usage was silently dropped and context display showed "?/131k" for all llama.cpp users. Add prompt_tokens and completion_tokens as fallback keys for totalInput and output respectively in toCliUsage(). Both parseCliJson and parseCliJsonl go through this function, so the fix covers all CLI output parsing paths. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ea391c6 commit 383946f

2 files changed

Lines changed: 46 additions & 2 deletions

File tree

src/agents/cli-output.test.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,43 @@ describe("parseCliJsonl", () => {
383383
});
384384
});
385385

386+
it("parses llama.cpp OpenAI-compatible prompt_tokens/completion_tokens usage fields (#77992)", () => {
387+
// llama.cpp and other OpenAI-compatible local providers return prompt_tokens
388+
// and completion_tokens instead of input_tokens and output_tokens. Without
389+
// the fallback, context display shows "?/131k" for all llama.cpp users.
390+
const result = parseCliJsonl(
391+
[
392+
JSON.stringify({ type: "init", session_id: "session-llamacpp" }),
393+
JSON.stringify({
394+
type: "result",
395+
session_id: "session-llamacpp",
396+
result: "Hello from llama.cpp",
397+
usage: {
398+
prompt_tokens: 11,
399+
completion_tokens: 7,
400+
total_tokens: 18,
401+
},
402+
}),
403+
].join("\n"),
404+
{
405+
command: "claude",
406+
output: "jsonl",
407+
sessionIdFields: ["session_id"],
408+
},
409+
"claude-cli",
410+
);
411+
412+
expect(result).toMatchObject({
413+
text: "Hello from llama.cpp",
414+
sessionId: "session-llamacpp",
415+
usage: {
416+
input: 11,
417+
output: 7,
418+
total: 18,
419+
},
420+
});
421+
});
422+
386423
it("parses multiple JSON objects embedded on the same line", () => {
387424
const result = parseCliJsonl(
388425
'{"type":"init","session_id":"session-999"} {"type":"result","session_id":"session-999","result":"done"}',

src/agents/cli-output.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,15 @@ function toCliUsage(raw: Record<string, unknown>): CliUsage | undefined {
122122
};
123123
const pick = (key: string) =>
124124
typeof raw[key] === "number" && raw[key] > 0 ? raw[key] : undefined;
125-
const totalInput = pick("input_tokens") ?? pick("inputTokens");
126-
const output = pick("output_tokens") ?? pick("outputTokens");
125+
// llama.cpp and other OpenAI-compatible providers use prompt_tokens /
126+
// completion_tokens instead of input_tokens / output_tokens (#77992).
127+
const totalInput =
128+
pick("input_tokens") ?? pick("inputTokens") ?? pick("prompt_tokens") ?? pick("promptTokens");
129+
const output =
130+
pick("output_tokens") ??
131+
pick("outputTokens") ??
132+
pick("completion_tokens") ??
133+
pick("completionTokens");
127134
const nestedCached =
128135
readNestedCached("input_tokens_details") ?? readNestedCached("prompt_tokens_details");
129136
const cacheRead =

0 commit comments

Comments
 (0)