fix(auto-reply): preserve post-compaction failure context

sweetcornna · sweetcornna · commit 5b8a9e294839 · 2026-06-03T16:07:53.000+08:00
diff --git a/src/auto-reply/reply/agent-runner-execution.test.ts b/src/auto-reply/reply/agent-runner-execution.test.ts
@@ -4798,6 +4798,149 @@ describe("runAgentTurnWithFallback", () => {
     }
   });
 
+  it("surfaces post-compaction context when the retried turn times out (#67750)", async () => {
+    // Embedded run announces a successful compaction (so autoCompactionCount > 0),
+    // then the retried turn throws. The fallback layer rejects with a structured
+    // FallbackSummaryError carrying timeout + billing-skip attempts. The user
+    // message must preserve that compaction succeeded plus the cause chain,
+    // not collapse to BILLING_ERROR_USER_MESSAGE or the generic /new copy.
+    state.runEmbeddedAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => {
+      await params.onAgentEvent?.({ stream: "compaction", data: { phase: "start" } });
+      await params.onAgentEvent?.({
+        stream: "compaction",
+        data: { phase: "end", completed: true },
+      });
+      throw new Error("LLM idle timeout (120s): no response from model");
+    });
+    state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => {
+      try {
+        await params.run("openai-codex", "gpt-5.4");
+      } catch {
+        // Swallow the per-candidate error; the fallback layer reports the
+        // aggregate summary below, mirroring the production failure shape.
+      }
+      throw Object.assign(
+        new Error(
+          "All models failed (3): openai-codex/gpt-5.4: LLM request timed out. (timeout) | " +
+            "anthropic/claude-opus-4-7: Provider anthropic has billing issue (billing) | " +
+            "anthropic/claude-sonnet-4-7: Provider anthropic has billing issue (billing)",
+        ),
+        {
+          name: "FallbackSummaryError",
+          attempts: [
+            {
+              provider: "openai-codex",
+              model: "gpt-5.4",
+              error: "LLM request timed out.",
+              reason: "timeout",
+            },
+            {
+              provider: "anthropic",
+              model: "claude-opus-4-7",
+              error: "Provider anthropic has billing issue",
+              reason: "billing",
+            },
+            {
+              provider: "anthropic",
+              model: "claude-sonnet-4-7",
+              error: "Provider anthropic has billing issue",
+              reason: "billing",
+            },
+          ],
+          soonestCooldownExpiry: null,
+        },
+      );
+    });
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const result = await runAgentTurnWithFallback({
+      commandBody: "hello",
+      followupRun: createFollowupRun(),
+      sessionCtx: {
+        Provider: "whatsapp",
+        MessageSid: "msg",
+      } as unknown as TemplateContext,
+      opts: {},
+      typingSignals: createMockTypingSignaler(),
+      blockReplyPipeline: null,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      applyReplyToMode: (payload) => payload,
+      shouldEmitToolResult: () => true,
+      shouldEmitToolOutput: () => false,
+      pendingToolTasks: new Set(),
+      resetSessionAfterRoleOrderingConflict: async () => false,
+      isHeartbeat: false,
+      sessionKey: "main",
+      getActiveSessionEntry: () => undefined,
+      resolvedVerboseLevel: "off",
+    });
+
+    expect(result.kind).toBe("final");
+    if (result.kind === "final") {
+      // Compaction context preserved.
+      expect(result.payload.text).toContain("Auto-compaction succeeded");
+      // Retried turn cause preserved (billing was the resolved cause-specific copy).
+      expect(result.payload.text).toContain("billing");
+      // Per-attempt summary preserved (timeout + billing skips).
+      expect(result.payload.text).toContain("openai-codex/gpt-5.4 timed out");
+      expect(result.payload.text).toContain("anthropic/claude-opus-4-7 skipped (billing)");
+      // Not the bare GENERIC_RUN_FAILURE_TEXT.
+      expect(result.payload.text).not.toBe(GENERIC_RUN_FAILURE_TEXT);
+      // Not the unwrapped BILLING_ERROR_USER_MESSAGE on its own.
+      expect(result.payload.text).not.toBe("billing");
+    }
+  });
+
+  it("surfaces post-compaction context for plain failures without a fallback summary", async () => {
+    // Plain (non-FallbackSummaryError) failure after a successful compaction.
+    // The generic fallback copy would normally erase compaction history; with
+    // the post-compaction wrap, the user still learns compaction succeeded and
+    // gets specific next-step guidance instead of just "/new".
+    state.runEmbeddedAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => {
+      await params.onAgentEvent?.({ stream: "compaction", data: { phase: "start" } });
+      await params.onAgentEvent?.({
+        stream: "compaction",
+        data: { phase: "end", completed: true },
+      });
+      throw new Error("INVALID_ARGUMENT: some opaque post-compaction failure");
+    });
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const result = await runAgentTurnWithFallback({
+      ...createMinimalRunAgentTurnParams(),
+    });
+
+    expect(result.kind).toBe("final");
+    if (result.kind === "final") {
+      expect(result.payload.text).toContain("Auto-compaction succeeded");
+      // Generic-runner-failure path → guidance, not bare /new fallback.
+      expect(result.payload.text).toContain(
+        "The context was compacted but no candidate could finish the turn",
+      );
+      expect(result.payload.text).not.toBe(GENERIC_RUN_FAILURE_TEXT);
+    }
+  });
+
+  it("does not add post-compaction context when no compaction succeeded", async () => {
+    // Regression guard: ordinary unknown error without a prior successful
+    // compaction must still produce the generic fallback copy.
+    state.runEmbeddedAgentMock.mockRejectedValueOnce(
+      new Error("INVALID_ARGUMENT: some other failure"),
+    );
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const result = await runAgentTurnWithFallback({
+      ...createMinimalRunAgentTurnParams(),
+    });
+
+    expect(result.kind).toBe("final");
+    if (result.kind === "final") {
+      expect(result.payload.text).toBe(GENERIC_RUN_FAILURE_TEXT);
+      expect(result.payload.text).not.toContain("Auto-compaction succeeded");
+    }
+  });
+
   it("surfaces gateway restart text when fallback exhaustion wraps a drain error", async () => {
     const { replyOperation, failMock } = createMockReplyOperation();
     state.runWithModelFallbackMock.mockRejectedValueOnce(
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
@@ -627,6 +627,95 @@ function collapseRepeatedFailureDetail(message: string): string {
   return message.trim();
 }
 
+const FAILOVER_REASON_VERB: Record<string, string> = {
+  timeout: "timed out",
+  rate_limit: "rate-limited",
+  overloaded: "overloaded",
+  billing: "skipped (billing)",
+  auth: "auth failure",
+  auth_permanent: "auth permanently revoked",
+  server_error: "server error",
+  format: "response format error",
+  model_not_found: "model not found",
+  session_expired: "session expired",
+  empty_response: "empty response",
+  no_error_details: "unspecified error",
+  unclassified: "failed",
+  unknown: "failed",
+};
+
+const POST_COMPACTION_ATTEMPT_SUMMARY_MAX_CHARS = 240;
+const POST_COMPACTION_FAILURE_PREFIX =
+  "⚠️ Auto-compaction succeeded, but the retried turn still failed";
+
+/**
+ * Summarize FallbackSummaryError attempts as a short, user-readable list:
+ * "openai/gpt-5.4 timed out; anthropic/claude skipped (billing)".
+ * Returns undefined when the error is not a structured fallback summary
+ * (e.g. raw embedded error) so callers fall back to the resolved cause text.
+ */
+function summarizeFallbackAttemptsForUser(err: unknown): string | undefined {
+  if (!isFallbackSummaryError(err)) {
+    return undefined;
+  }
+  const parts: string[] = [];
+  for (const attempt of err.attempts) {
+    const ref = `${attempt.provider}/${attempt.model}`;
+    const verb = attempt.reason ? (FAILOVER_REASON_VERB[attempt.reason] ?? "failed") : "failed";
+    parts.push(`${ref} ${verb}`);
+  }
+  if (parts.length === 0) {
+    return undefined;
+  }
+  const summary = parts.join("; ");
+  if (summary.length <= POST_COMPACTION_ATTEMPT_SUMMARY_MAX_CHARS) {
+    return summary;
+  }
+  return `${summary.slice(0, POST_COMPACTION_ATTEMPT_SUMMARY_MAX_CHARS - 1).trimEnd()}…`;
+}
+
+/**
+ * Wrap a resolved fallback failure message with post-compaction context so the
+ * user sees that compaction succeeded before the retried turn failed — instead
+ * of the generic "Something went wrong" or bare cause-specific text that erases
+ * the compaction history. The base text is preserved so existing cause-specific
+ * guidance (billing, rate-limit, timeout) still reaches the user.
+ *
+ * See: https://github.com/openclaw/openclaw/issues/67750
+ */
+function buildPostCompactionFailureRecoveryText(params: {
+  baseText: string;
+  err: unknown;
+  autoCompactionCount: number;
+  isGenericRunnerFailure: boolean;
+}): string {
+  if (params.autoCompactionCount <= 0) {
+    return params.baseText;
+  }
+  const countSuffix =
+    params.autoCompactionCount > 1 ? ` (${params.autoCompactionCount} compactions)` : "";
+  const attemptSummary = summarizeFallbackAttemptsForUser(params.err);
+  const baseTrimmed = params.baseText.replace(/^⚠️\s*/u, "").trim();
+  // Generic catch-all (`isGenericRunnerFailure`) carries no cause signal,
+  // so drop the base text and rely on the attempt summary or the
+  // generic-mode guidance below. Cause-specific copy (billing, rate-limit,
+  // timeout, oauth) is preserved verbatim so the user still gets the actionable
+  // hint that already covers their failure mode.
+  const causeDetail = params.isGenericRunnerFailure
+    ? attemptSummary
+      ? `: ${attemptSummary}.`
+      : "."
+    : baseTrimmed
+      ? ` — ${baseTrimmed}`
+      : ".";
+  const attemptTrailer =
+    !params.isGenericRunnerFailure && attemptSummary ? `\n\nAttempts: ${attemptSummary}.` : "";
+  const guidance = params.isGenericRunnerFailure
+    ? "\n\nThe context was compacted but no candidate could finish the turn. Try again in a moment, or use /new only if the session stays stuck."
+    : "";
+  return `${POST_COMPACTION_FAILURE_PREFIX}${countSuffix}${causeDetail}${attemptTrailer}${guidance}`;
+}
+
 const SAFE_MISSING_API_KEY_PROVIDERS = new Set(["anthropic", "google", "openai"]);
 const EXTERNAL_RUN_FAILURE_DETAIL_MAX_CHARS = 900;
 const AGENT_FAILED_BEFORE_REPLY_TEXT = "Agent failed before reply:";
@@ -2921,8 +3010,23 @@ export async function runAgentTurnWithFallback(params: {
               : shouldSurfaceToControlUi
                 ? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`
                 : (externalRunFailureReply?.text ?? genericFallbackText);
+      // When auto-compaction succeeded earlier in this turn (#67750), prepend
+      // the post-compaction context so the user does not see the cause-specific
+      // or generic text without learning that the retried turn was the one
+      // that failed. Skip for heartbeat probes (separate copy contract) and
+      // control-UI surfaces (already includes raw cause + logs link).
+      const shouldSurfacePostCompactionContext =
+        autoCompactionCount > 0 && !params.isHeartbeat && !shouldSurfaceToControlUi;
+      const fallbackTextWithPostCompactionContext = shouldSurfacePostCompactionContext
+        ? buildPostCompactionFailureRecoveryText({
+            baseText: fallbackText,
+            err,
+            autoCompactionCount,
+            isGenericRunnerFailure: externalRunFailureReply?.isGenericRunnerFailure ?? false,
+          })
+        : fallbackText;
       const userVisibleFallbackText = resolveExternalRunFailureTextForConversation({
-        text: fallbackText,
+        text: fallbackTextWithPostCompactionContext,
         sessionCtx: params.sessionCtx,
         isGenericRunnerFailure: externalRunFailureReply?.isGenericRunnerFailure ?? false,
         suppressInNonDirect: Boolean(isRateLimit || rateLimitOrOverloadedCopy),