Skip to content

Commit 2987980

Browse files
committed
fix(agents): surface exec failures after claimed success
1 parent c58e015 commit 2987980

3 files changed

Lines changed: 59 additions & 12 deletions

File tree

src/agents/pi-embedded-runner/run/payloads.errors.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,23 @@ describe("buildEmbeddedRunPayloads", () => {
378378
expect(payloads[1]?.text).not.toContain("missing");
379379
});
380380

381+
it("shows exec tool errors when assistant output claims success", () => {
382+
const payloads = buildPayloads({
383+
assistantTexts: ["The script is ready to use and saved in your workspace."],
384+
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
385+
lastToolError: {
386+
toolName: "exec",
387+
error: "/bin/bash: line 1: python: command not found",
388+
},
389+
});
390+
391+
expect(payloads).toHaveLength(2);
392+
expect(payloads[0]?.text).toBe("The script is ready to use and saved in your workspace.");
393+
expect(payloads[1]?.isError).toBe(true);
394+
expect(payloads[1]?.text).toContain("Exec");
395+
expect(payloads[1]?.text).not.toContain("python: command not found");
396+
});
397+
381398
it("shows mutating tool errors when assistant output does not acknowledge the failure", () => {
382399
const payloads = buildPayloads({
383400
assistantTexts: ["No issues found. The update is complete."],
@@ -435,6 +452,17 @@ describe("buildEmbeddedRunPayloads", () => {
435452
expectSinglePayloadSummary(payloads, { text });
436453
});
437454

455+
it("suppresses exec warnings when assistant output explicitly acknowledges the command failure", () => {
456+
const text = "I couldn't run the command because python was not found.";
457+
const payloads = buildPayloads({
458+
assistantTexts: [text],
459+
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
460+
lastToolError: { toolName: "exec", error: "/bin/bash: line 1: python: command not found" },
461+
});
462+
463+
expectSinglePayloadSummary(payloads, { text });
464+
});
465+
438466
it("does not treat session_status read failures as mutating when explicitly flagged", () => {
439467
const payloads = buildPayloads({
440468
assistantTexts: ["Status loaded."],

src/agents/pi-embedded-runner/run/payloads.test.ts

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,28 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => {
8888
expectSinglePayloadText(payloads, "Fixed.");
8989
});
9090

91-
it("suppresses exec tool errors when verbose mode is off", () => {
92-
expectNoPayloads({
91+
it("surfaces concise exec tool errors when verbose mode is off", () => {
92+
const payloads = buildPayloads({
9393
lastToolError: { toolName: "exec", error: "command failed" },
9494
verboseLevel: "off",
9595
});
96+
97+
expectSingleToolErrorPayload(payloads, {
98+
title: "Exec",
99+
absentDetail: "command failed",
100+
});
101+
});
102+
103+
it("surfaces concise bash tool errors when verbose mode is off", () => {
104+
const payloads = buildPayloads({
105+
lastToolError: { toolName: "bash", error: "command failed" },
106+
verboseLevel: "off",
107+
});
108+
109+
expectSingleToolErrorPayload(payloads, {
110+
title: "Bash",
111+
absentDetail: "command failed",
112+
});
96113
});
97114

98115
it("surfaces exec tool errors for cron sessions even when verbose mode is off", () => {
@@ -132,12 +149,17 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => {
132149
});
133150
});
134151

135-
it("keeps non-timeout exec tool errors suppressed for cron sessions when verbose mode is off", () => {
136-
expectNoPayloads({
152+
it("surfaces non-timeout exec tool errors for cron sessions without raw details", () => {
153+
const payloads = buildPayloads({
137154
lastToolError: { toolName: "exec", error: "Command not found" },
138155
sessionKey: "agent:main:cron:job-1",
139156
verboseLevel: "off",
140157
});
158+
159+
expectSingleToolErrorPayload(payloads, {
160+
title: "Exec",
161+
absentDetail: "Command not found",
162+
});
141163
});
142164

143165
it("shows exec tool errors when verbose mode is on", () => {

src/agents/pi-embedded-runner/run/payloads.ts

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,7 @@ import {
2424
normalizeTextForComparison,
2525
} from "../../pi-embedded-helpers.js";
2626
import type { ToolResultFormat } from "../../pi-embedded-subscribe.shared-types.js";
27-
import {
28-
extractAssistantThinking,
29-
extractAssistantVisibleText,
30-
} from "../../pi-embedded-utils.js";
27+
import { extractAssistantThinking, extractAssistantVisibleText } from "../../pi-embedded-utils.js";
3128
import { isExecLikeToolName, type ToolErrorSummary } from "../../tool-error-summary.js";
3229
import { isLikelyMutatingToolName } from "../../tool-mutation.js";
3330

@@ -48,7 +45,7 @@ const RECOVERABLE_TOOL_ERROR_KEYWORDS = [
4845
] as const;
4946

5047
const MUTATING_FAILURE_ACTION_PATTERN =
51-
"(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|tool|action|operation)";
48+
"(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|run|execute|execution|command|script|shell|bash|exec|tool|action|operation)";
5249

5350
const MUTATING_FAILURE_INABILITY_PATTERN = new RegExp(
5451
`\\b(?:couldn't|could not|can't|cannot|unable to|am unable to|wasn't able to|was not able to|were unable to)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`,
@@ -143,9 +140,6 @@ function resolveToolErrorWarningPolicy(params: {
143140
if (params.suppressToolErrorWarnings) {
144141
return { showWarning: false, includeDetails };
145142
}
146-
if (isExecLikeToolName(params.lastToolError.toolName) && !includeDetails) {
147-
return { showWarning: false, includeDetails };
148-
}
149143
// sessions_send timeouts and errors are transient inter-session communication
150144
// issues — the message may still have been delivered. Suppress warnings to
151145
// prevent raw error text from leaking into the chat surface (#23989).
@@ -160,6 +154,9 @@ function resolveToolErrorWarningPolicy(params: {
160154
includeDetails,
161155
};
162156
}
157+
if (isExecLikeToolName(params.lastToolError.toolName) && !includeDetails) {
158+
return { showWarning: false, includeDetails };
159+
}
163160
if (params.suppressToolErrors) {
164161
return { showWarning: false, includeDetails };
165162
}

0 commit comments

Comments
 (0)