Skip to content

Commit 5b8a9e2

Browse files
committed
fix(auto-reply): preserve post-compaction failure context
1 parent 5820378 commit 5b8a9e2

2 files changed

Lines changed: 248 additions & 1 deletion

File tree

src/auto-reply/reply/agent-runner-execution.test.ts

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4798,6 +4798,149 @@ describe("runAgentTurnWithFallback", () => {
47984798
}
47994799
});
48004800

4801+
it("surfaces post-compaction context when the retried turn times out (#67750)", async () => {
4802+
// Embedded run announces a successful compaction (so autoCompactionCount > 0),
4803+
// then the retried turn throws. The fallback layer rejects with a structured
4804+
// FallbackSummaryError carrying timeout + billing-skip attempts. The user
4805+
// message must preserve that compaction succeeded plus the cause chain,
4806+
// not collapse to BILLING_ERROR_USER_MESSAGE or the generic /new copy.
4807+
state.runEmbeddedAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => {
4808+
await params.onAgentEvent?.({ stream: "compaction", data: { phase: "start" } });
4809+
await params.onAgentEvent?.({
4810+
stream: "compaction",
4811+
data: { phase: "end", completed: true },
4812+
});
4813+
throw new Error("LLM idle timeout (120s): no response from model");
4814+
});
4815+
state.runWithModelFallbackMock.mockImplementationOnce(async (params: FallbackRunnerParams) => {
4816+
try {
4817+
await params.run("openai-codex", "gpt-5.4");
4818+
} catch {
4819+
// Swallow the per-candidate error; the fallback layer reports the
4820+
// aggregate summary below, mirroring the production failure shape.
4821+
}
4822+
throw Object.assign(
4823+
new Error(
4824+
"All models failed (3): openai-codex/gpt-5.4: LLM request timed out. (timeout) | " +
4825+
"anthropic/claude-opus-4-7: Provider anthropic has billing issue (billing) | " +
4826+
"anthropic/claude-sonnet-4-7: Provider anthropic has billing issue (billing)",
4827+
),
4828+
{
4829+
name: "FallbackSummaryError",
4830+
attempts: [
4831+
{
4832+
provider: "openai-codex",
4833+
model: "gpt-5.4",
4834+
error: "LLM request timed out.",
4835+
reason: "timeout",
4836+
},
4837+
{
4838+
provider: "anthropic",
4839+
model: "claude-opus-4-7",
4840+
error: "Provider anthropic has billing issue",
4841+
reason: "billing",
4842+
},
4843+
{
4844+
provider: "anthropic",
4845+
model: "claude-sonnet-4-7",
4846+
error: "Provider anthropic has billing issue",
4847+
reason: "billing",
4848+
},
4849+
],
4850+
soonestCooldownExpiry: null,
4851+
},
4852+
);
4853+
});
4854+
4855+
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
4856+
const result = await runAgentTurnWithFallback({
4857+
commandBody: "hello",
4858+
followupRun: createFollowupRun(),
4859+
sessionCtx: {
4860+
Provider: "whatsapp",
4861+
MessageSid: "msg",
4862+
} as unknown as TemplateContext,
4863+
opts: {},
4864+
typingSignals: createMockTypingSignaler(),
4865+
blockReplyPipeline: null,
4866+
blockStreamingEnabled: false,
4867+
resolvedBlockStreamingBreak: "message_end",
4868+
applyReplyToMode: (payload) => payload,
4869+
shouldEmitToolResult: () => true,
4870+
shouldEmitToolOutput: () => false,
4871+
pendingToolTasks: new Set(),
4872+
resetSessionAfterRoleOrderingConflict: async () => false,
4873+
isHeartbeat: false,
4874+
sessionKey: "main",
4875+
getActiveSessionEntry: () => undefined,
4876+
resolvedVerboseLevel: "off",
4877+
});
4878+
4879+
expect(result.kind).toBe("final");
4880+
if (result.kind === "final") {
4881+
// Compaction context preserved.
4882+
expect(result.payload.text).toContain("Auto-compaction succeeded");
4883+
// Retried turn cause preserved (billing was the resolved cause-specific copy).
4884+
expect(result.payload.text).toContain("billing");
4885+
// Per-attempt summary preserved (timeout + billing skips).
4886+
expect(result.payload.text).toContain("openai-codex/gpt-5.4 timed out");
4887+
expect(result.payload.text).toContain("anthropic/claude-opus-4-7 skipped (billing)");
4888+
// Not the bare GENERIC_RUN_FAILURE_TEXT.
4889+
expect(result.payload.text).not.toBe(GENERIC_RUN_FAILURE_TEXT);
4890+
// Not the unwrapped BILLING_ERROR_USER_MESSAGE on its own.
4891+
expect(result.payload.text).not.toBe("billing");
4892+
}
4893+
});
4894+
4895+
it("surfaces post-compaction context for plain failures without a fallback summary", async () => {
4896+
// Plain (non-FallbackSummaryError) failure after a successful compaction.
4897+
// The generic fallback copy would normally erase compaction history; with
4898+
// the post-compaction wrap, the user still learns compaction succeeded and
4899+
// gets specific next-step guidance instead of just "/new".
4900+
state.runEmbeddedAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => {
4901+
await params.onAgentEvent?.({ stream: "compaction", data: { phase: "start" } });
4902+
await params.onAgentEvent?.({
4903+
stream: "compaction",
4904+
data: { phase: "end", completed: true },
4905+
});
4906+
throw new Error("INVALID_ARGUMENT: some opaque post-compaction failure");
4907+
});
4908+
4909+
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
4910+
const result = await runAgentTurnWithFallback({
4911+
...createMinimalRunAgentTurnParams(),
4912+
});
4913+
4914+
expect(result.kind).toBe("final");
4915+
if (result.kind === "final") {
4916+
expect(result.payload.text).toContain("Auto-compaction succeeded");
4917+
// Generic-runner-failure path → guidance, not bare /new fallback.
4918+
expect(result.payload.text).toContain(
4919+
"The context was compacted but no candidate could finish the turn",
4920+
);
4921+
expect(result.payload.text).not.toBe(GENERIC_RUN_FAILURE_TEXT);
4922+
}
4923+
});
4924+
4925+
it("does not add post-compaction context when no compaction succeeded", async () => {
4926+
// Regression guard: ordinary unknown error without a prior successful
4927+
// compaction must still produce the generic fallback copy.
4928+
state.runEmbeddedAgentMock.mockRejectedValueOnce(
4929+
new Error("INVALID_ARGUMENT: some other failure"),
4930+
);
4931+
4932+
const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
4933+
const result = await runAgentTurnWithFallback({
4934+
...createMinimalRunAgentTurnParams(),
4935+
});
4936+
4937+
expect(result.kind).toBe("final");
4938+
if (result.kind === "final") {
4939+
expect(result.payload.text).toBe(GENERIC_RUN_FAILURE_TEXT);
4940+
expect(result.payload.text).not.toContain("Auto-compaction succeeded");
4941+
}
4942+
});
4943+
48014944
it("surfaces gateway restart text when fallback exhaustion wraps a drain error", async () => {
48024945
const { replyOperation, failMock } = createMockReplyOperation();
48034946
state.runWithModelFallbackMock.mockRejectedValueOnce(

src/auto-reply/reply/agent-runner-execution.ts

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,95 @@ function collapseRepeatedFailureDetail(message: string): string {
627627
return message.trim();
628628
}
629629

630+
const FAILOVER_REASON_VERB: Record<string, string> = {
631+
timeout: "timed out",
632+
rate_limit: "rate-limited",
633+
overloaded: "overloaded",
634+
billing: "skipped (billing)",
635+
auth: "auth failure",
636+
auth_permanent: "auth permanently revoked",
637+
server_error: "server error",
638+
format: "response format error",
639+
model_not_found: "model not found",
640+
session_expired: "session expired",
641+
empty_response: "empty response",
642+
no_error_details: "unspecified error",
643+
unclassified: "failed",
644+
unknown: "failed",
645+
};
646+
647+
const POST_COMPACTION_ATTEMPT_SUMMARY_MAX_CHARS = 240;
648+
const POST_COMPACTION_FAILURE_PREFIX =
649+
"⚠️ Auto-compaction succeeded, but the retried turn still failed";
650+
651+
/**
652+
* Summarize FallbackSummaryError attempts as a short, user-readable list:
653+
* "openai/gpt-5.4 timed out; anthropic/claude skipped (billing)".
654+
* Returns undefined when the error is not a structured fallback summary
655+
* (e.g. raw embedded error) so callers fall back to the resolved cause text.
656+
*/
657+
function summarizeFallbackAttemptsForUser(err: unknown): string | undefined {
658+
if (!isFallbackSummaryError(err)) {
659+
return undefined;
660+
}
661+
const parts: string[] = [];
662+
for (const attempt of err.attempts) {
663+
const ref = `${attempt.provider}/${attempt.model}`;
664+
const verb = attempt.reason ? (FAILOVER_REASON_VERB[attempt.reason] ?? "failed") : "failed";
665+
parts.push(`${ref} ${verb}`);
666+
}
667+
if (parts.length === 0) {
668+
return undefined;
669+
}
670+
const summary = parts.join("; ");
671+
if (summary.length <= POST_COMPACTION_ATTEMPT_SUMMARY_MAX_CHARS) {
672+
return summary;
673+
}
674+
return `${summary.slice(0, POST_COMPACTION_ATTEMPT_SUMMARY_MAX_CHARS - 1).trimEnd()}…`;
675+
}
676+
677+
/**
678+
* Wrap a resolved fallback failure message with post-compaction context so the
679+
* user sees that compaction succeeded before the retried turn failed — instead
680+
* of the generic "Something went wrong" or bare cause-specific text that erases
681+
* the compaction history. The base text is preserved so existing cause-specific
682+
* guidance (billing, rate-limit, timeout) still reaches the user.
683+
*
684+
* See: https://github.com/openclaw/openclaw/issues/67750
685+
*/
686+
function buildPostCompactionFailureRecoveryText(params: {
687+
baseText: string;
688+
err: unknown;
689+
autoCompactionCount: number;
690+
isGenericRunnerFailure: boolean;
691+
}): string {
692+
if (params.autoCompactionCount <= 0) {
693+
return params.baseText;
694+
}
695+
const countSuffix =
696+
params.autoCompactionCount > 1 ? ` (${params.autoCompactionCount} compactions)` : "";
697+
const attemptSummary = summarizeFallbackAttemptsForUser(params.err);
698+
const baseTrimmed = params.baseText.replace(/^\s*/u, "").trim();
699+
// Generic catch-all (`isGenericRunnerFailure`) carries no cause signal,
700+
// so drop the base text and rely on the attempt summary or the
701+
// generic-mode guidance below. Cause-specific copy (billing, rate-limit,
702+
// timeout, oauth) is preserved verbatim so the user still gets the actionable
703+
// hint that already covers their failure mode.
704+
const causeDetail = params.isGenericRunnerFailure
705+
? attemptSummary
706+
? `: ${attemptSummary}.`
707+
: "."
708+
: baseTrimmed
709+
? ` — ${baseTrimmed}`
710+
: ".";
711+
const attemptTrailer =
712+
!params.isGenericRunnerFailure && attemptSummary ? `\n\nAttempts: ${attemptSummary}.` : "";
713+
const guidance = params.isGenericRunnerFailure
714+
? "\n\nThe context was compacted but no candidate could finish the turn. Try again in a moment, or use /new only if the session stays stuck."
715+
: "";
716+
return `${POST_COMPACTION_FAILURE_PREFIX}${countSuffix}${causeDetail}${attemptTrailer}${guidance}`;
717+
}
718+
630719
const SAFE_MISSING_API_KEY_PROVIDERS = new Set(["anthropic", "google", "openai"]);
631720
const EXTERNAL_RUN_FAILURE_DETAIL_MAX_CHARS = 900;
632721
const AGENT_FAILED_BEFORE_REPLY_TEXT = "Agent failed before reply:";
@@ -2921,8 +3010,23 @@ export async function runAgentTurnWithFallback(params: {
29213010
: shouldSurfaceToControlUi
29223011
? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`
29233012
: (externalRunFailureReply?.text ?? genericFallbackText);
3013+
// When auto-compaction succeeded earlier in this turn (#67750), prepend
3014+
// the post-compaction context so the user does not see the cause-specific
3015+
// or generic text without learning that the retried turn was the one
3016+
// that failed. Skip for heartbeat probes (separate copy contract) and
3017+
// control-UI surfaces (already includes raw cause + logs link).
3018+
const shouldSurfacePostCompactionContext =
3019+
autoCompactionCount > 0 && !params.isHeartbeat && !shouldSurfaceToControlUi;
3020+
const fallbackTextWithPostCompactionContext = shouldSurfacePostCompactionContext
3021+
? buildPostCompactionFailureRecoveryText({
3022+
baseText: fallbackText,
3023+
err,
3024+
autoCompactionCount,
3025+
isGenericRunnerFailure: externalRunFailureReply?.isGenericRunnerFailure ?? false,
3026+
})
3027+
: fallbackText;
29243028
const userVisibleFallbackText = resolveExternalRunFailureTextForConversation({
2925-
text: fallbackText,
3029+
text: fallbackTextWithPostCompactionContext,
29263030
sessionCtx: params.sessionCtx,
29273031
isGenericRunnerFailure: externalRunFailureReply?.isGenericRunnerFailure ?? false,
29283032
suppressInNonDirect: Boolean(isRateLimit || rateLimitOrOverloadedCopy),

0 commit comments

Comments
 (0)