Skip to content

Commit 12a5691

Browse files
openperfclawsweeper[bot]Takhoffman
authored
fix(agents): detect unsigned thinking-only stall when reasoning payload inflates payloadCount (#89874)
Summary: - Merged fix(agents): detect unsigned thinking-only stall when reasoning payload inflates payloadCount after ClawSweeper review. Automerge notes: - No ClawSweeper repair was needed after automerge opt-in. Validation: - ClawSweeper review passed for head c613c38. - Required merge gates passed before the squash merge. Prepared head SHA: c613c38 Review: #89874 (comment) Co-authored-by: openperf <16864032@qq.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: takhoffman Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
1 parent 1a3ce7c commit 12a5691

2 files changed

Lines changed: 136 additions & 5 deletions

File tree

src/agents/embedded-agent-runner/run.incomplete-turn.test.ts

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,6 +1528,63 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
15281528
expect(incompleteTurnText).toBeNull();
15291529
});
15301530

1531+
it("surfaces stall on clean stop with only an unsigned thinking payload (payloadCount=1, no visible text)", () => {
1532+
// Regression: unsigned thinking payloads increment payloadCount but carry no
1533+
// user-visible content. The visible-text guard must not suppress incomplete-turn
1534+
// detection when the model produced only a thinking block and no answer. (#89787)
1535+
const incompleteTurnText = resolveIncompleteTurnPayloadText({
1536+
payloadCount: 1,
1537+
aborted: false,
1538+
timedOut: false,
1539+
attempt: makeAttemptResult({
1540+
assistantTexts: [],
1541+
lastAssistant: {
1542+
role: "assistant",
1543+
stopReason: "stop",
1544+
provider: "openai",
1545+
model: "qwen3.6-35b-a3b",
1546+
content: [
1547+
{
1548+
type: "thinking",
1549+
thinking: "let me plan the tool calls I need to make...",
1550+
// no signature — unsigned thinking block
1551+
},
1552+
],
1553+
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
1554+
}),
1555+
});
1556+
1557+
expect(incompleteTurnText).toContain("couldn't generate a response");
1558+
});
1559+
1560+
it("does not surface a stall when unsigned thinking accompanies visible text (payloadCount=1)", () => {
1561+
// When the model emits both a thinking block and a visible text answer, the turn
1562+
// succeeded and no stall should be surfaced even though thinking is unsigned.
1563+
const incompleteTurnText = resolveIncompleteTurnPayloadText({
1564+
payloadCount: 1,
1565+
aborted: false,
1566+
timedOut: false,
1567+
attempt: makeAttemptResult({
1568+
assistantTexts: ["Here is the answer to your question."],
1569+
lastAssistant: {
1570+
role: "assistant",
1571+
stopReason: "stop",
1572+
provider: "openai",
1573+
model: "qwen3.6-35b-a3b",
1574+
content: [
1575+
{
1576+
type: "thinking",
1577+
thinking: "let me answer this...",
1578+
},
1579+
{ type: "text", text: "Here is the answer to your question." },
1580+
],
1581+
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
1582+
}),
1583+
});
1584+
1585+
expect(incompleteTurnText).toBeNull();
1586+
});
1587+
15311588
it("surfaces an error for tool-use terminal turn with pre-tool text via runEmbeddedAgent (#76477)", async () => {
15321589
mockedClassifyFailoverReason.mockReturnValue(null);
15331590
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
@@ -1696,6 +1753,59 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
16961753
expect(retryInstruction).toBe(REASONING_ONLY_RETRY_INSTRUCTION);
16971754
});
16981755

1756+
it("retries unsigned thinking-only turns via the reasoning-only path (openai-completions)", () => {
1757+
const retryInstruction = resolveReasoningOnlyRetryInstruction({
1758+
provider: "openai",
1759+
modelId: "qwen3.6-35b-a3b",
1760+
modelApi: "openai-completions",
1761+
aborted: false,
1762+
timedOut: false,
1763+
attempt: makeAttemptResult({
1764+
assistantTexts: [],
1765+
lastAssistant: {
1766+
role: "assistant",
1767+
stopReason: "stop",
1768+
provider: "openai",
1769+
model: "qwen3.6-35b-a3b",
1770+
content: [
1771+
{
1772+
type: "thinking",
1773+
thinking: "let me plan the tool calls I need to make...",
1774+
},
1775+
],
1776+
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
1777+
}),
1778+
});
1779+
1780+
expect(retryInstruction).toBe(REASONING_ONLY_RETRY_INSTRUCTION);
1781+
});
1782+
1783+
it("retries unsigned thinking-only Ollama turns via the reasoning-only path", () => {
1784+
const retryInstruction = resolveReasoningOnlyRetryInstruction({
1785+
provider: "ollama",
1786+
modelId: "gemma4:31b",
1787+
aborted: false,
1788+
timedOut: false,
1789+
attempt: makeAttemptResult({
1790+
assistantTexts: [],
1791+
lastAssistant: {
1792+
role: "assistant",
1793+
stopReason: "end_turn",
1794+
provider: "ollama",
1795+
model: "gemma4:31b",
1796+
content: [
1797+
{
1798+
type: "thinking",
1799+
thinking: "internal reasoning",
1800+
},
1801+
],
1802+
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
1803+
}),
1804+
});
1805+
1806+
expect(retryInstruction).toBe(REASONING_ONLY_RETRY_INSTRUCTION);
1807+
});
1808+
16991809
it("retries unsigned-thinking Ollama turns via the empty-response path", () => {
17001810
const retryInstruction = resolveEmptyResponseRetryInstruction({
17011811
provider: "ollama",

src/agents/embedded-agent-runner/run/incomplete-turn.ts

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,17 @@ export function resolveIncompleteTurnPayloadText(params: {
280280
// turn check in that case — the final post-tool response was never
281281
// produced. (#76477)
282282
const toolUseTerminal = params.attempt.lastAssistant?.stopReason === "toolUse";
283+
const assistant = params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant;
284+
// Unsigned thinking payloads count toward payloadCount but carry no user-visible
285+
// content; bypass the visible-text guard when unsigned thinking was the only output
286+
// so that incomplete-turn stall detection fires below. (#89787)
287+
const unsignedThinkingOnlyTerminal =
288+
params.payloadCount !== 0 &&
289+
!joinAssistantTexts(params.attempt.assistantTexts).length &&
290+
isUnsignedThinkingOnlyAssistantTurn(assistant);
283291

284292
if (
285-
(params.payloadCount !== 0 && !toolUseTerminal) ||
293+
(params.payloadCount !== 0 && !toolUseTerminal && !unsignedThinkingOnlyTerminal) ||
286294
(params.aborted && params.externalAbort) ||
287295
params.timedOut ||
288296
params.attempt.clientToolCalls ||
@@ -314,16 +322,15 @@ export function resolveIncompleteTurnPayloadText(params: {
314322
hasAssistantVisibleText: params.payloadCount > 0,
315323
lastAssistant: params.attempt.lastAssistant,
316324
});
317-
const reasoningOnlyAssistant = isReasoningOnlyAssistantTurn(
318-
params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant,
319-
);
325+
const reasoningOnlyAssistant = isReasoningOnlyAssistantTurn(assistant);
320326
const emptyResponseAssistant = isEmptyResponseAssistantTurn({
321327
payloadCount: params.payloadCount,
322328
attempt: params.attempt,
323329
});
324330
if (
325331
!incompleteTerminalAssistant &&
326332
!reasoningOnlyAssistant &&
333+
!unsignedThinkingOnlyTerminal &&
327334
!emptyResponseAssistant &&
328335
stopReason !== "error"
329336
) {
@@ -534,6 +541,20 @@ function isReasoningOnlyAssistantTurn(message: unknown): boolean {
534541
return assessLastAssistantMessage(message as AgentMessage) === "incomplete-text";
535542
}
536543

544+
// Unsigned thinking blocks have no cryptographic signature; assessLastAssistantMessage
545+
// returns "incomplete-thinking" for them. Empty content also returns "incomplete-thinking",
546+
// so the content.length > 0 guard is required to distinguish the two cases.
547+
function isUnsignedThinkingOnlyAssistantTurn(message: unknown): boolean {
548+
if (message == null || typeof message !== "object") {
549+
return false;
550+
}
551+
const content = (message as { content?: unknown }).content;
552+
if (!Array.isArray(content) || content.length === 0) {
553+
return false;
554+
}
555+
return assessLastAssistantMessage(message as AgentMessage) === "incomplete-thinking";
556+
}
557+
537558
function isEmptyResponseAssistantTurn(params: {
538559
payloadCount: number;
539560
attempt: Pick<
@@ -669,7 +690,7 @@ export function resolveReasoningOnlyRetryInstruction(params: {
669690
if (assistant?.stopReason === "error") {
670691
return null;
671692
}
672-
if (!isReasoningOnlyAssistantTurn(assistant)) {
693+
if (!isReasoningOnlyAssistantTurn(assistant) && !isUnsignedThinkingOnlyAssistantTurn(assistant)) {
673694
return null;
674695
}
675696

0 commit comments

Comments
 (0)