Skip to content

Commit 2ccd183

Browse files
Lellansinobviyus
andauthored
fix: return real usage for OpenAI-compatible chat completions (#62986) (thanks @Lellansin)
* Gateway: fix chat completions usage compatibility * Gateway: clarify usage-gated stream wait * Gateway: preserve aggregate usage totals * Agents: clamp usage components before total * fix(gateway): bound usage stream finalization * fix: add OpenAI compat usage changelog (#62986) (thanks @Lellansin) * fix(agents): emit lifecycle terminal events after flush --------- Co-authored-by: Ayaan Zaidi <hi@obviy.us>
1 parent f64c84a commit 2ccd183

7 files changed

Lines changed: 713 additions & 30 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ Docs: https://docs.openclaw.ai
111111
- Daemon/gateway: prevent systemd restart storms on configuration errors by exiting with `EX_CONFIG` and adding generated unit restart-prevention guards. (#63913) Thanks @neo1027144-creator.
112112
- Agents/exec: prevent gateway crash ("Agent listener invoked outside active run") when a subagent exec tool produces stdout/stderr after the agent run has ended or been aborted. (#62821) Thanks @openperf.
113113
- Browser/tabs: route `/tabs/action` close/select through the same browser endpoint reachability and policy checks as list/new (including Playwright-backed remote tab operations), reject CDP HTTP redirects on probe requests, and sanitize blocked-endpoint error responses so tab list/focus/close flows fail closed without echoing raw policy details back to callers. (#63332)
114+
- Gateway/OpenAI compat: return real `usage` for non-stream `/v1/chat/completions` responses, emit the final usage chunk when `stream_options.include_usage=true`, and bound usage-gated stream finalization after lifecycle end. (#62986) Thanks @Lellansin.
114115

115116
## 2026.4.9
116117

src/agents/pi-embedded-subscribe.handlers.lifecycle.test.ts

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,4 +224,63 @@ describe("handleAgentEnd", () => {
224224
resolveChannelFlush?.();
225225
await endPromise;
226226
});
227+
228+
it("emits lifecycle end after async channel flush completes", async () => {
229+
let resolveChannelFlush: (() => void) | undefined;
230+
const onAgentEvent = vi.fn();
231+
const onBlockReplyFlush = vi.fn(
232+
() =>
233+
new Promise<void>((resolve) => {
234+
resolveChannelFlush = resolve;
235+
}),
236+
);
237+
const ctx = createContext(undefined, { onAgentEvent, onBlockReplyFlush });
238+
239+
const endPromise = handleAgentEnd(ctx);
240+
241+
expect(onAgentEvent).not.toHaveBeenCalled();
242+
243+
resolveChannelFlush?.();
244+
await endPromise;
245+
246+
expect(onAgentEvent).toHaveBeenCalledWith({
247+
stream: "lifecycle",
248+
data: { phase: "end" },
249+
});
250+
});
251+
252+
it("emits lifecycle error after async channel flush completes", async () => {
253+
let resolveChannelFlush: (() => void) | undefined;
254+
const onAgentEvent = vi.fn();
255+
const onBlockReplyFlush = vi.fn(
256+
() =>
257+
new Promise<void>((resolve) => {
258+
resolveChannelFlush = resolve;
259+
}),
260+
);
261+
const ctx = createContext(
262+
{
263+
role: "assistant",
264+
stopReason: "error",
265+
errorMessage: "connection refused",
266+
content: [{ type: "text", text: "" }],
267+
},
268+
{ onAgentEvent, onBlockReplyFlush },
269+
);
270+
271+
const endPromise = handleAgentEnd(ctx);
272+
273+
expect(onAgentEvent).not.toHaveBeenCalled();
274+
275+
resolveChannelFlush?.();
276+
await endPromise;
277+
278+
expect(onAgentEvent).toHaveBeenCalledWith({
279+
stream: "lifecycle",
280+
data: {
281+
phase: "error",
282+
error: "LLM request failed: connection refused by the provider endpoint.",
283+
},
284+
});
285+
});
227286
});

src/agents/pi-embedded-subscribe.handlers.lifecycle.ts

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ export function handleAgentStart(ctx: EmbeddedPiSubscribeContext) {
3838
export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) {
3939
const lastAssistant = ctx.state.lastAssistant;
4040
const isError = isAssistantMessage(lastAssistant) && lastAssistant.stopReason === "error";
41+
let lifecycleErrorText: string | undefined;
4142

4243
if (isError && lastAssistant) {
4344
const friendlyError = formatAssistantErrorText(lastAssistant, {
@@ -54,6 +55,7 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) {
5455
const observedError = buildApiErrorObservationFields(rawError);
5556
const safeErrorText =
5657
buildTextObservationFields(errorText).textPreview ?? "LLM request failed.";
58+
lifecycleErrorText = safeErrorText;
5759
const safeRunId = sanitizeForConsole(ctx.params.runId) ?? "-";
5860
const safeModel = sanitizeForConsole(lastAssistant.model) ?? "unknown";
5961
const safeProvider = sanitizeForConsole(lastAssistant.provider) ?? "unknown";
@@ -71,24 +73,30 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) {
7173
...observedError,
7274
consoleMessage: `embedded run agent end: runId=${safeRunId} isError=true model=${safeModel} provider=${safeProvider} error=${safeErrorText}${rawErrorConsoleSuffix}`,
7375
});
74-
emitAgentEvent({
75-
runId: ctx.params.runId,
76-
stream: "lifecycle",
77-
data: {
78-
phase: "error",
79-
error: safeErrorText,
80-
endedAt: Date.now(),
81-
},
82-
});
83-
void ctx.params.onAgentEvent?.({
84-
stream: "lifecycle",
85-
data: {
86-
phase: "error",
87-
error: safeErrorText,
88-
},
89-
});
9076
} else {
9177
ctx.log.debug(`embedded run agent end: runId=${ctx.params.runId} isError=${isError}`);
78+
}
79+
80+
const emitLifecycleTerminal = () => {
81+
if (isError) {
82+
emitAgentEvent({
83+
runId: ctx.params.runId,
84+
stream: "lifecycle",
85+
data: {
86+
phase: "error",
87+
error: lifecycleErrorText ?? "LLM request failed.",
88+
endedAt: Date.now(),
89+
},
90+
});
91+
void ctx.params.onAgentEvent?.({
92+
stream: "lifecycle",
93+
data: {
94+
phase: "error",
95+
error: lifecycleErrorText ?? "LLM request failed.",
96+
},
97+
});
98+
return;
99+
}
92100
emitAgentEvent({
93101
runId: ctx.params.runId,
94102
stream: "lifecycle",
@@ -101,7 +109,7 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) {
101109
stream: "lifecycle",
102110
data: { phase: "end" },
103111
});
104-
}
112+
};
105113

106114
const finalizeAgentEnd = () => {
107115
ctx.state.blockState.thinking = false;
@@ -140,11 +148,14 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) {
140148
const flushBlockReplyBufferResult = ctx.flushBlockReplyBuffer();
141149
finalizeAgentEnd();
142150
if (isPromiseLike<void>(flushBlockReplyBufferResult)) {
143-
return flushBlockReplyBufferResult.then(() => flushPendingMediaAndChannel());
151+
return flushBlockReplyBufferResult
152+
.then(() => flushPendingMediaAndChannel())
153+
.then(() => emitLifecycleTerminal());
144154
}
145155

146156
const flushPendingMediaAndChannelResult = flushPendingMediaAndChannel();
147157
if (isPromiseLike<void>(flushPendingMediaAndChannelResult)) {
148-
return flushPendingMediaAndChannelResult;
158+
return flushPendingMediaAndChannelResult.then(() => emitLifecycleTerminal());
149159
}
160+
emitLifecycleTerminal();
150161
}

src/agents/usage.test.ts

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
hasNonzeroUsage,
55
derivePromptTokens,
66
deriveSessionTotalTokens,
7+
toOpenAiChatCompletionsUsage,
78
} from "./usage.js";
89

910
describe("normalizeUsage", () => {
@@ -146,6 +147,90 @@ describe("normalizeUsage", () => {
146147
});
147148
});
148149

150+
describe("toOpenAiChatCompletionsUsage", () => {
151+
it("uses max(component sum, aggregate total) when breakdown is partial", () => {
152+
const usage = normalizeUsage({ output_tokens: 20, total_tokens: 100 });
153+
expect(toOpenAiChatCompletionsUsage(usage)).toEqual({
154+
prompt_tokens: 0,
155+
completion_tokens: 20,
156+
total_tokens: 100,
157+
});
158+
});
159+
160+
it("uses component sum when it exceeds aggregate total", () => {
161+
expect(
162+
toOpenAiChatCompletionsUsage({
163+
input: 30,
164+
output: 40,
165+
total: 50,
166+
}),
167+
).toEqual({
168+
prompt_tokens: 30,
169+
completion_tokens: 40,
170+
total_tokens: 70,
171+
});
172+
});
173+
174+
it("uses aggregate total when only total is present", () => {
175+
const usage = normalizeUsage({ total_tokens: 42 });
176+
expect(toOpenAiChatCompletionsUsage(usage)).toEqual({
177+
prompt_tokens: 0,
178+
completion_tokens: 0,
179+
total_tokens: 42,
180+
});
181+
});
182+
183+
it("returns zeros for undefined usage", () => {
184+
expect(toOpenAiChatCompletionsUsage(undefined)).toEqual({
185+
prompt_tokens: 0,
186+
completion_tokens: 0,
187+
total_tokens: 0,
188+
});
189+
});
190+
191+
it("raises total_tokens with aggregate when cache write is excluded from prompt sum", () => {
192+
expect(
193+
toOpenAiChatCompletionsUsage({
194+
input: 10,
195+
output: 5,
196+
cacheWrite: 100,
197+
total: 200,
198+
}),
199+
).toEqual({
200+
prompt_tokens: 10,
201+
completion_tokens: 5,
202+
total_tokens: 200,
203+
});
204+
});
205+
206+
it("clamps negative completion before deriving total_tokens", () => {
207+
expect(
208+
toOpenAiChatCompletionsUsage({
209+
input: 3,
210+
output: -5,
211+
}),
212+
).toEqual({
213+
prompt_tokens: 3,
214+
completion_tokens: 0,
215+
total_tokens: 3,
216+
});
217+
});
218+
219+
it("preserves aggregate total when components are partially negative", () => {
220+
expect(
221+
toOpenAiChatCompletionsUsage({
222+
input: 3,
223+
output: -5,
224+
total: 7,
225+
}),
226+
).toEqual({
227+
prompt_tokens: 3,
228+
completion_tokens: 0,
229+
total_tokens: 7,
230+
});
231+
});
232+
});
233+
149234
describe("hasNonzeroUsage", () => {
150235
it("returns true when cache read is nonzero", () => {
151236
const usage = { cacheRead: 100 };

src/agents/usage.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,41 @@ export function normalizeUsage(raw?: UsageLike | null): NormalizedUsage | undefi
143143
};
144144
}
145145

146+
/**
147+
* Maps normalized usage to OpenAI Chat Completions `usage` fields.
148+
*
149+
* `prompt_tokens` is input + cacheRead (cache write is excluded to match the
150+
* OpenAI-style breakdown used by the compat endpoint).
151+
*
152+
* `total_tokens` is the greater of the component sum and aggregate `total` when
153+
* present, so a partial breakdown cannot discard a valid upstream total.
154+
*/
155+
export function toOpenAiChatCompletionsUsage(usage: NormalizedUsage | undefined): {
156+
prompt_tokens: number;
157+
completion_tokens: number;
158+
total_tokens: number;
159+
} {
160+
const input = usage?.input ?? 0;
161+
const output = usage?.output ?? 0;
162+
const cacheRead = usage?.cacheRead ?? 0;
163+
const promptTokens = Math.max(0, input + cacheRead);
164+
const completionTokens = Math.max(0, output);
165+
const componentTotal = promptTokens + completionTokens;
166+
const aggregateRaw = usage?.total;
167+
const aggregateTotal =
168+
typeof aggregateRaw === "number" && Number.isFinite(aggregateRaw)
169+
? Math.max(0, aggregateRaw)
170+
: undefined;
171+
const totalTokens =
172+
aggregateTotal !== undefined ? Math.max(componentTotal, aggregateTotal) : componentTotal;
173+
174+
return {
175+
prompt_tokens: promptTokens,
176+
completion_tokens: completionTokens,
177+
total_tokens: totalTokens,
178+
};
179+
}
180+
146181
export function derivePromptTokens(usage?: {
147182
input?: number;
148183
cacheRead?: number;

0 commit comments

Comments
 (0)