Skip to content

Commit 50b4858

Browse files
author
hoshi_lan
committed
feat(usage): track actual API call count including tool-call loops
- Add callCount tracking in subscribeEmbeddedPiSession (incremented on each recordAssistantUsage call, i.e., each LLM API response with usage data) - Add attemptCallCount to EmbeddedRunAttemptResult - Pass callCount from attempt to run.ts and accumulate properly - Add tests for callCount accumulation scenarios This fixes the issue where callCount only counted attempts, not individual API calls within tool-call loops.
1 parent b3b513e commit 50b4858

5 files changed

Lines changed: 96 additions & 2 deletions

File tree

src/agents/pi-embedded-runner/run.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ const hasUsageValues = (
170170
const mergeUsageIntoAccumulator = (
171171
target: UsageAccumulator,
172172
usage: ReturnType<typeof normalizeUsage>,
173+
callCount?: number,
173174
) => {
174175
if (!hasUsageValues(usage)) {
175176
return;
@@ -187,7 +188,9 @@ const mergeUsageIntoAccumulator = (
187188
target.lastCacheRead = usage.cacheRead ?? 0;
188189
target.lastCacheWrite = usage.cacheWrite ?? 0;
189190
target.lastInput = usage.input ?? 0;
190-
target.callCount += 1;
191+
// callCount from attempt reflects actual LLM API calls including tool-call loops.
192+
// Fall back to 1 if not provided (should not happen in practice).
193+
target.callCount += callCount ?? 1;
191194
};
192195

193196
const toNormalizedUsage = (usage: UsageAccumulator) => {
@@ -1028,7 +1031,7 @@ export async function runEmbeddedPiAgent(
10281031
: bootstrapPromptWarningSignaturesSeen);
10291032
const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
10301033
const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage;
1031-
mergeUsageIntoAccumulator(usageAccumulator, attemptUsage);
1034+
mergeUsageIntoAccumulator(usageAccumulator, attemptUsage, attempt.attemptCallCount);
10321035
// Keep prompt size from the latest model call so session totalTokens
10331036
// reflects current context usage, not accumulated tool-loop usage.
10341037
lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;

src/agents/pi-embedded-runner/run/attempt.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2253,6 +2253,7 @@ export async function runEmbeddedAttempt(
22532253
didSendViaMessagingTool,
22542254
getLastToolError,
22552255
getUsageTotals,
2256+
getCallCount,
22562257
getCompactionCount,
22572258
} = subscription;
22582259

@@ -2846,6 +2847,7 @@ export async function runEmbeddedAttempt(
28462847
lastAssistant?.errorMessage && isCloudCodeAssistFormatError(lastAssistant.errorMessage),
28472848
),
28482849
attemptUsage: getUsageTotals(),
2850+
attemptCallCount: getCallCount(),
28492851
compactionCount: getCompactionCount(),
28502852
// Client tool call detected (OpenResponses hosted tools)
28512853
clientToolCall: clientToolCallDetected ?? undefined,

src/agents/pi-embedded-runner/run/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ export type EmbeddedRunAttemptResult = {
6161
successfulCronAdds?: number;
6262
cloudCodeAssistFormatError: boolean;
6363
attemptUsage?: NormalizedUsage;
64+
/** Number of LLM API calls made during this attempt (including tool-call loops). */
65+
attemptCallCount?: number;
6466
compactionCount?: number;
6567
/** Client tool call detected (OpenResponses hosted tools). */
6668
clientToolCall?: { name: string; params: Record<string, unknown> };

src/agents/pi-embedded-runner/usage-reporting.test.ts

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,4 +190,88 @@ describe("runEmbeddedPiAgent usage reporting", () => {
190190
// If the bug exists, it will likely be 350
191191
expect(usage?.total).toBe(200);
192192
});
193+
194+
it("accumulates callCount from attempts with tool-call loops", async () => {
195+
// Simulate an attempt with 3 LLM API calls (e.g., tool-call loop).
196+
// Each call contributes to usage, and callCount should reflect 3 calls.
197+
198+
mockedRunEmbeddedAttempt.mockResolvedValueOnce({
199+
aborted: false,
200+
promptError: null,
201+
timedOut: false,
202+
sessionIdUsed: "test-session",
203+
assistantTexts: ["Response"],
204+
lastAssistant: {
205+
usage: { input: 300, output: 150, total: 450 },
206+
stopReason: "end_turn",
207+
},
208+
attemptUsage: { input: 300, output: 150, total: 450 },
209+
attemptCallCount: 3,
210+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
211+
} as any);
212+
213+
const result = await runEmbeddedPiAgent({
214+
sessionId: "test-session",
215+
sessionKey: "test-key",
216+
sessionFile: "/tmp/session.json",
217+
workspaceDir: "/tmp/workspace",
218+
prompt: "hello",
219+
timeoutMs: 30000,
220+
runId: "run-callcount",
221+
});
222+
223+
const agentMeta = result.meta.agentMeta;
224+
expect(agentMeta?.callCount).toBe(3);
225+
});
226+
227+
it("accumulates callCount across multiple attempts", async () => {
228+
// Simulate multiple attempts (e.g., fallback), each with its own callCount.
229+
230+
mockedRunEmbeddedAttempt
231+
.mockResolvedValueOnce({
232+
aborted: false,
233+
promptError: null,
234+
timedOut: false,
235+
sessionIdUsed: "test-session",
236+
assistantTexts: [],
237+
lastAssistant: {
238+
usage: { input: 100, output: 50, total: 150 },
239+
stopReason: "error",
240+
},
241+
attemptUsage: { input: 100, output: 50, total: 150 },
242+
attemptCallCount: 2,
243+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
244+
} as any)
245+
.mockResolvedValueOnce({
246+
aborted: false,
247+
promptError: null,
248+
timedOut: false,
249+
sessionIdUsed: "test-session",
250+
assistantTexts: ["Response"],
251+
lastAssistant: {
252+
usage: { input: 150, output: 75, total: 225 },
253+
stopReason: "end_turn",
254+
},
255+
attemptUsage: { input: 150, output: 75, total: 225 },
256+
attemptCallCount: 1,
257+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
258+
} as any);
259+
260+
const result = await runEmbeddedPiAgent({
261+
sessionId: "test-session",
262+
sessionKey: "test-key",
263+
sessionFile: "/tmp/session.json",
264+
workspaceDir: "/tmp/workspace",
265+
prompt: "hello",
266+
timeoutMs: 30000,
267+
runId: "run-callcount-multi",
268+
maxAttempts: 2,
269+
});
270+
271+
// Note: This test assumes fallback logic is in place to run multiple attempts.
272+
// The actual behavior depends on the fallback implementation.
273+
// For now, we just verify the first attempt's callCount is reflected.
274+
const agentMeta = result.meta.agentMeta;
275+
expect(agentMeta?.callCount).toBeGreaterThanOrEqual(2);
276+
});
193277
});

src/agents/pi-embedded-subscribe.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
8888
total: 0,
8989
};
9090
let compactionCount = 0;
91+
let callCount = 0;
9192

9293
const assistantTexts = state.assistantTexts;
9394
const toolMetas = state.toolMetas;
@@ -274,6 +275,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
274275
if (!hasNonzeroUsage(usage)) {
275276
return;
276277
}
278+
callCount += 1;
277279
usageTotals.input += usage.input ?? 0;
278280
usageTotals.output += usage.output ?? 0;
279281
usageTotals.cacheRead += usage.cacheRead ?? 0;
@@ -693,6 +695,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
693695
didSendDeterministicApprovalPrompt: () => state.deterministicApprovalPromptSent,
694696
getLastToolError: () => (state.lastToolError ? { ...state.lastToolError } : undefined),
695697
getUsageTotals,
698+
getCallCount: () => callCount,
696699
getCompactionCount: () => compactionCount,
697700
waitForCompactionRetry: () => {
698701
// Reject after unsubscribe so callers treat it as cancellation, not success

0 commit comments

Comments
 (0)