openclaw
diff --git a/‎extensions/openai/index.test.ts‎
Lines changed: 21 additions & 7 deletions b/‎extensions/openai/index.test.ts‎
Lines changed: 21 additions & 7 deletions
diff --git a/‎extensions/openai/prompt-overlay.ts‎
Lines changed: 19 additions & 4 deletions b/‎extensions/openai/prompt-overlay.ts‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎src/agents/pi-embedded-runner/run.incomplete-turn.test.ts‎
Lines changed: 159 additions & 1 deletion b/‎src/agents/pi-embedded-runner/run.incomplete-turn.test.ts‎
Lines changed: 159 additions & 1 deletion
@@ -14,6 +14,7 @@ import {
   OPENAI_FRIENDLY_PROMPT_OVERLAY,
   OPENAI_GPT5_EXECUTION_BIAS,
   OPENAI_GPT5_OUTPUT_CONTRACT,
+  OPENAI_GPT5_TOOL_CALL_STYLE,
 } from "./prompt-overlay.js";
 
 const runtimeMocks = vi.hoisted(() => ({
@@ -365,7 +366,7 @@ describe("openai plugin", () => {
     };
 
     expect(openaiProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -382,7 +383,7 @@ describe("openai plugin", () => {
       "Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
     );
     expect(codexProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -454,9 +455,22 @@ describe("openai plugin", () => {
     expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
       "Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
     );
+    expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
+      "Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.",
+    );
+    expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
+      "If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.",
+    );
     expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
       "Do prerequisite lookup or discovery before dependent actions.",
     );
+    expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
+      "Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.",
+    );
+    expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
+      "When a first-class tool exists for an action, use the tool instead of asking the user to run a command.",
+    );
+    expect(OPENAI_GPT5_TOOL_CALL_STYLE).not.toContain("/approve");
     expect(OPENAI_GPT5_OUTPUT_CONTRACT).toContain(
       "Return the requested sections only, in the requested order.",
     );
@@ -486,7 +500,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -514,7 +528,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
       },
@@ -540,7 +554,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
       },
@@ -567,7 +581,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -594,7 +608,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
 
@@ -64,11 +64,20 @@ Do not use em dashes unless the user explicitly asks for them or they are requir
 
 export const OPENAI_GPT5_EXECUTION_BIAS = `## Execution Bias
 
-Start the real work in the same turn when the next step is clear.
+Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.
+Commentary-only turns are incomplete when tools are available and the next action is clear.
+If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.
 Do prerequisite lookup or discovery before dependent actions.
-If another tool call would likely improve correctness or completeness, keep going instead of stopping at partial progress.
 Multi-part requests stay incomplete until every requested item is handled or clearly marked blocked.
-Before the final answer, quickly verify correctness, coverage, formatting, and obvious side effects.`;
+Act first, then verify if needed. Do not pause to summarize or verify before taking the next action.`;
+
+export const OPENAI_GPT5_TOOL_CALL_STYLE = `## Tool Call Style
+
+Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.
+When a first-class tool exists for an action, use the tool instead of asking the user to run a command.
+If multiple tool calls are needed, call them in sequence without stopping to explain between calls.
+Default: do not narrate routine, low-risk tool calls (just call the tool).
+Narrate only when it genuinely helps: complex multi-step work, sensitive actions like deletions, or when the user explicitly asks for commentary.`;
 
 export type OpenAIPromptOverlayMode = "friendly" | "off";
 
@@ -103,8 +112,14 @@ export function resolveOpenAISystemPromptContribution(params: {
   ) {
     return undefined;
   }
+  // tool_call_style is NOT overridden via sectionOverrides because the
+  // default section includes dynamic channel-specific approval guidance
+  // from buildExecApprovalPromptGuidance() that varies per runtime
+  // channel. Overriding it with a static string would lose that dynamic
+  // content. Instead, the tool-first reinforcement lives in stablePrefix
+  // so it's always present alongside the default tool_call_style section.
   return {
-    stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+    stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
     sectionOverrides: {
       execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
       ...(params.mode === "friendly" ? { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY } : {}),
 
@@ -10,8 +10,10 @@ import {
   resetRunOverflowCompactionHarnessMocks,
 } from "./run.overflow-compaction.harness.js";
 import {
+  buildAttemptReplayMetadata,
   extractPlanningOnlyPlanDetails,
   isLikelyExecutionAckPrompt,
+  PLANNING_ONLY_RETRY_INSTRUCTION,
   resolveAckExecutionFastPathInstruction,
   resolvePlanningOnlyRetryLimit,
   resolvePlanningOnlyRetryInstruction,
@@ -281,7 +283,10 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
       timedOut: false,
       attempt: makeAttemptResult({
         assistantTexts: ["I'll inspect the code, make the change, and run the checks."],
-        toolMetas: [{ toolName: "bash", meta: "ls" }],
+        toolMetas: [
+          { toolName: "read", meta: "path=src/index.ts" },
+          { toolName: "search", meta: "pattern=runEmbeddedPiAgent" },
+        ],
       }),
     });
 
@@ -436,3 +441,156 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
     ).toBe("paused");
   });
 });
+
+describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
+  const openaiParams = { provider: "openai", modelId: "gpt-5.4" } as const;
+
+  function makeAttemptWithTools(
+    toolNames: string[],
+    assistantText: string,
+  ): Parameters<typeof resolvePlanningOnlyRetryInstruction>[0]["attempt"] {
+    const toolMetas = toolNames.map((toolName) => ({ toolName }));
+    return {
+      toolMetas,
+      assistantTexts: [assistantText],
+      lastAssistant: { stopReason: "stop" },
+      itemLifecycle: { startedCount: toolNames.length },
+      replayMetadata: buildAttemptReplayMetadata({
+        toolMetas,
+        didSendViaMessagingTool: false,
+      }),
+      clientToolCall: null,
+      yieldDetected: false,
+      didSendDeterministicApprovalPrompt: false,
+      didSendViaMessagingTool: false,
+      lastToolError: null,
+    } as unknown as Parameters<typeof resolvePlanningOnlyRetryInstruction>[0]["attempt"];
+  }
+
+  it("retries when exactly 1 non-plan tool call plus 'i can do that' prose is detected", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "I can do that next."),
+    });
+
+    expect(result).toBe(PLANNING_ONLY_RETRY_INSTRUCTION);
+  });
+
+  it("retries when exactly 1 non-plan tool call plus planning prose is detected", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "I'll analyze the structure next."),
+    });
+
+    expect(result).toBe(PLANNING_ONLY_RETRY_INSTRUCTION);
+  });
+
+  it("does not retry when 2+ non-plan tool calls are present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read", "search"], "I'll verify the output."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus completion language is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "Done. The file looks correct."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus 'let me know' handoff is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "Let me know if you need anything else."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus an answer-style summary is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(
+        ["read"],
+        "I'll summarize the root cause: the provider auth scope is missing.",
+      ),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus a future-tense description is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(
+        ["read"],
+        "I'll describe the issue: the provider auth scope is missing.",
+      ),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 safe tool call is followed by answer prose joined with 'and'", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "I'll explain and recommend a fix."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus a bare 'i can do that' reply is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "I can do that."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when the lone tool call already had side effects", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["sessions_spawn"], "I'll continue from there next."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when the lone tool call is unclassified", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["vendor_widget"], "I'll continue from there next."),
+    });
+
+    expect(result).toBeNull();
+  });
+});