Skip to content

Commit 7c09ba7

Browse files
authored
fix(trace command): Improve trace raw diagnostics and trace command UX (#66089)
* improve trace raw diagnostics and command acks * address trace review feedback * avoid sync transcript reads in raw trace * preserve raw cli output for trace * gate trace emission at reply time * reflect raw trace mode in status surfaces
1 parent 6157933 commit 7c09ba7

32 files changed

Lines changed: 2081 additions & 101 deletions

src/agents/cli-output.test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,39 @@ describe("parseCliJson", () => {
123123
},
124124
});
125125
});
126+
127+
it("parses nested OpenAI-style cached token details from CLI json payloads", () => {
128+
const result = parseCliJson(
129+
JSON.stringify({
130+
session_id: "openai-session-123",
131+
response: "OpenAI says hello",
132+
usage: {
133+
input_tokens: 15,
134+
output_tokens: 4,
135+
input_tokens_details: {
136+
cached_tokens: 6,
137+
},
138+
},
139+
}),
140+
{
141+
command: "codex",
142+
output: "json",
143+
sessionIdFields: ["session_id"],
144+
},
145+
);
146+
147+
expect(result).toEqual({
148+
text: "OpenAI says hello",
149+
sessionId: "openai-session-123",
150+
usage: {
151+
input: 9,
152+
output: 4,
153+
cacheRead: 6,
154+
cacheWrite: undefined,
155+
total: undefined,
156+
},
157+
});
158+
});
126159
});
127160

128161
describe("parseCliJsonl", () => {

src/agents/cli-output.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@ type CliUsage = {
1212

1313
export type CliOutput = {
1414
text: string;
15+
rawText?: string;
1516
sessionId?: string;
1617
usage?: CliUsage;
18+
finalPromptText?: string;
1719
};
1820

1921
export type CliStreamingDelta = {
@@ -149,18 +151,30 @@ function unwrapCliErrorText(raw: string): string {
149151
}
150152

151153
function toCliUsage(raw: Record<string, unknown>): CliUsage | undefined {
154+
const readNestedCached = (key: "input_tokens_details" | "prompt_tokens_details") => {
155+
const nested = raw[key];
156+
if (!isRecord(nested)) {
157+
return undefined;
158+
}
159+
return typeof nested.cached_tokens === "number" && nested.cached_tokens > 0
160+
? nested.cached_tokens
161+
: undefined;
162+
};
152163
const pick = (key: string) =>
153164
typeof raw[key] === "number" && raw[key] > 0 ? raw[key] : undefined;
154165
const totalInput = pick("input_tokens") ?? pick("inputTokens");
155166
const output = pick("output_tokens") ?? pick("outputTokens");
167+
const nestedCached =
168+
readNestedCached("input_tokens_details") ?? readNestedCached("prompt_tokens_details");
156169
const cacheRead =
157170
pick("cache_read_input_tokens") ??
158171
pick("cached_input_tokens") ??
159172
pick("cacheRead") ??
160-
pick("cached");
173+
pick("cached") ??
174+
nestedCached;
161175
const input =
162176
pick("input") ??
163-
(Object.hasOwn(raw, "cached") && typeof totalInput === "number"
177+
((Object.hasOwn(raw, "cached") || nestedCached !== undefined) && typeof totalInput === "number"
164178
? Math.max(0, totalInput - (cacheRead ?? 0))
165179
: totalInput);
166180
const cacheWrite =

src/agents/cli-runner.reliability.test.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ function buildPreparedContext(params?: {
3333
prompt: "hi",
3434
provider: "codex-cli",
3535
model: "gpt-5.4",
36+
thinkLevel: "low",
3637
timeoutMs: 1_000,
3738
runId: params?.runId ?? "run-2",
3839
},
@@ -177,6 +178,74 @@ describe("runCliAgent reliability", () => {
177178

178179
expect(supervisorSpawnMock).toHaveBeenCalledTimes(2);
179180
});
181+
182+
it("returns the assembled CLI prompt in meta for raw trace consumers", async () => {
183+
supervisorSpawnMock.mockResolvedValueOnce(
184+
createManagedRun({
185+
reason: "exit",
186+
exitCode: 0,
187+
exitSignal: null,
188+
durationMs: 50,
189+
stdout: "hello from cli",
190+
stderr: "",
191+
timedOut: false,
192+
noOutputTimedOut: false,
193+
}),
194+
);
195+
196+
const result = await runPreparedCliAgent({
197+
...buildPreparedContext(),
198+
bootstrapPromptWarningLines: ["Warning: prompt budget low."],
199+
});
200+
201+
expect(result.meta.finalPromptText).toContain("Warning: prompt budget low.");
202+
expect(result.meta.finalPromptText).toContain("hi");
203+
expect(result.meta.finalAssistantRawText).toBe("hello from cli");
204+
expect(result.meta.executionTrace).toMatchObject({
205+
winnerProvider: "codex-cli",
206+
winnerModel: "gpt-5.4",
207+
fallbackUsed: false,
208+
runner: "cli",
209+
attempts: [{ provider: "codex-cli", model: "gpt-5.4", result: "success" }],
210+
});
211+
expect(result.meta.requestShaping).toMatchObject({
212+
thinking: "low",
213+
});
214+
expect(result.meta.completion).toMatchObject({
215+
finishReason: "stop",
216+
stopReason: "completed",
217+
refusal: false,
218+
});
219+
});
220+
221+
it("keeps raw assistant output separate from transformed visible CLI output", async () => {
222+
supervisorSpawnMock.mockResolvedValueOnce(
223+
createManagedRun({
224+
reason: "exit",
225+
exitCode: 0,
226+
exitSignal: null,
227+
durationMs: 50,
228+
stdout: "hello from cli",
229+
stderr: "",
230+
timedOut: false,
231+
noOutputTimedOut: false,
232+
}),
233+
);
234+
235+
const result = await runPreparedCliAgent({
236+
...buildPreparedContext(),
237+
backendResolved: {
238+
...buildPreparedContext().backendResolved,
239+
textTransforms: {
240+
output: [{ from: "hello", to: "goodbye" }],
241+
},
242+
},
243+
});
244+
245+
expect(result.payloads).toEqual([{ text: "goodbye from cli" }]);
246+
expect(result.meta.finalAssistantVisibleText).toBe("goodbye from cli");
247+
expect(result.meta.finalAssistantRawText).toBe("hello from cli");
248+
});
180249
});
181250

182251
describe("resolveCliNoOutputTimeoutMs", () => {

src/agents/cli-runner.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,45 @@ export async function runPreparedCliAgent(
2020
effectiveCliSessionId?: string;
2121
}): EmbeddedPiRunResult => {
2222
const text = resultParams.output.text?.trim();
23+
const rawText = resultParams.output.rawText?.trim();
2324
const payloads = text ? [{ text }] : undefined;
2425

2526
return {
2627
payloads,
2728
meta: {
2829
durationMs: Date.now() - context.started,
30+
...(resultParams.output.finalPromptText
31+
? { finalPromptText: resultParams.output.finalPromptText }
32+
: {}),
33+
...((text || rawText)
34+
? {
35+
...(text ? { finalAssistantVisibleText: text } : {}),
36+
...(rawText ? { finalAssistantRawText: rawText } : {}),
37+
}
38+
: {}),
2939
systemPromptReport: context.systemPromptReport,
40+
executionTrace: {
41+
winnerProvider: params.provider,
42+
winnerModel: context.modelId,
43+
attempts: [
44+
{
45+
provider: params.provider,
46+
model: context.modelId,
47+
result: "success",
48+
},
49+
],
50+
fallbackUsed: false,
51+
runner: "cli",
52+
},
53+
requestShaping: {
54+
...(params.thinkLevel ? { thinking: params.thinkLevel } : {}),
55+
...(params.authProfileId ? { authMode: "auth-profile" } : {}),
56+
},
57+
completion: {
58+
finishReason: "stop",
59+
stopReason: "completed",
60+
refusal: false,
61+
},
3062
agentMeta: {
3163
sessionId: resultParams.effectiveCliSessionId ?? params.sessionId ?? "",
3264
provider: params.provider,

src/agents/cli-runner/execute.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -462,12 +462,12 @@ export async function executePreparedCliRun(
462462
outputMode: useResume ? (backend.resumeOutput ?? backend.output) : backend.output,
463463
fallbackSessionId: resolvedSessionId,
464464
});
465+
const rawText = parsed.text;
465466
return {
466467
...parsed,
467-
text: applyPluginTextReplacements(
468-
parsed.text,
469-
context.backendResolved.textTransforms?.output,
470-
),
468+
rawText,
469+
finalPromptText: prompt,
470+
text: applyPluginTextReplacements(rawText, context.backendResolved.textTransforms?.output),
471471
};
472472
} finally {
473473
restoreSkillEnv?.();

0 commit comments

Comments
 (0)