Skip to content

Commit da92887

Browse files
fix(interactive-proxy): harden sub-agent classifier (layered, multi-signal)
Replace the single-`Agent`-tool heuristic with an exported, unit-tested classifyRequest. Layered, first-decisive-wins: positive primary detection via a configurable spawner-tool set plus a conservative structural matcher (so a renamed/disguised Task spawner is still recognized as the primary); keep the web-search sub-agent catch; gate the by-absence rule on a spawner having been seen this run, so a deny-listed-Agent run can't be mis-suppressed into a hang. Add request-classifier.test.ts (12 cases) covering each layer.
1 parent 24c278d commit da92887

2 files changed

Lines changed: 350 additions & 128 deletions

File tree

extensions/anthropic/interactive-proxy/mitm-server.ts

Lines changed: 200 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,194 @@ export type MitmProxyHandle = {
2727

2828
const UPSTREAM_HOST = "api.anthropic.com";
2929

30+
// ---------------------------------------------------------------------------
31+
// Request classification (primary turn vs. sub-agent).
32+
//
33+
// The wrapper needs to know whether a given /v1/messages stream's `end_turn`
34+
// should end the USER-facing turn (the primary) or be neutralized (a
35+
// sub-agent: Task/research/Explore, web search, or any disguised agent).
36+
// Exported as a pure function so it can be unit-tested independently of the
37+
// proxy. Layered, first-decisive-layer-wins; biased to never mis-suppress the
38+
// primary (which would hang) while still catching every sub-agent form.
39+
// ---------------------------------------------------------------------------
40+
41+
export type InteractiveRequestType =
42+
| "normal"
43+
| "compaction"
44+
| "tool_followup"
45+
| "auxiliary"
46+
| "subagent";
47+
48+
export type ClassifyState = {
49+
// True once any request this run advertised a primary "spawner" tool (the
50+
// Task/Agent tool that launches sub-agents). Gates the by-absence sub-agent
51+
// layer: an Agent-less request only becomes a sub-agent once a spawner has
52+
// actually been seen, so a deny-listed-Agent run keeps its primary turn-end
53+
// instead of hanging.
54+
primarySpawnerSeen: boolean;
55+
};
56+
57+
export type ClassifyOptions = {
58+
// Tool names that mark the PRIMARY turn (matched case-insensitively, exact).
59+
// A conservative structural matcher additionally catches renamed/"disguised"
60+
// spawner tools by shape. Defaults to DEFAULT_SPAWNER_TOOL_NAMES.
61+
spawnerToolNames?: readonly string[];
62+
// System-prompt substrings that POSITIVELY mark a sub-agent request. OFF by
63+
// default (empty) — enable only once a live capture confirms a stable
64+
// marker. Correctness never depends on this layer.
65+
subagentSystemMarkers?: readonly string[];
66+
};
67+
68+
export const DEFAULT_SPAWNER_TOOL_NAMES: readonly string[] = ["Agent", "Task", "TaskCreate"];
69+
70+
function isSpawnerTool(tool: Record<string, unknown>, names: readonly string[]): boolean {
71+
const name = typeof tool?.name === "string" ? tool.name : "";
72+
if (!name) {
73+
return false; // server tools (type-only, e.g. web_search) never spawn agents
74+
}
75+
const lower = name.toLowerCase();
76+
if (names.some((n) => n.toLowerCase() === lower)) {
77+
return true;
78+
}
79+
// Conservative shape match for renamed spawners on the primary turn. Kept
80+
// tight so a sub-agent's ordinary tool can't be mistaken for a spawner
81+
// (which would wrongly keep that sub-agent's turn-end live).
82+
if (/^(agent|task)$/i.test(name)) {
83+
return true;
84+
}
85+
if (/^(dispatch|launch|spawn|create|run)_?(sub_?)?agent$/i.test(name)) {
86+
return true;
87+
}
88+
if (/^task(create|run|spawn|launch|dispatch)$/i.test(name)) {
89+
return true;
90+
}
91+
return false;
92+
}
93+
94+
function isWebSearchTool(tool: Record<string, unknown>): boolean {
95+
const type = typeof tool?.type === "string" ? tool.type : "";
96+
if (type.includes("web_search")) {
97+
return true;
98+
}
99+
const name = typeof tool?.name === "string" ? tool.name : "";
100+
return name.toLowerCase() === "web_search";
101+
}
102+
103+
function systemPromptText(parsed: Record<string, unknown>): string {
104+
const sys = parsed.system;
105+
if (typeof sys === "string") {
106+
return sys;
107+
}
108+
if (Array.isArray(sys)) {
109+
return (sys as Record<string, unknown>[])
110+
.map((b) => (typeof b?.text === "string" ? b.text : ""))
111+
.join("");
112+
}
113+
return "";
114+
}
115+
116+
/**
117+
* Classify a /v1/messages request body. `state` is mutated in place (the
118+
* per-run spawner-seen flag); same (body, state, opts) -> same result + state
119+
* mutation, so it is deterministic and unit-testable.
120+
*/
121+
export function classifyRequest(
122+
body: string,
123+
state: ClassifyState,
124+
opts?: ClassifyOptions,
125+
): InteractiveRequestType {
126+
let parsed: Record<string, unknown>;
127+
try {
128+
parsed = JSON.parse(body) as Record<string, unknown>;
129+
} catch {
130+
// Body isn't JSON (shouldn't happen on /v1/messages). Default to "normal";
131+
// the wrapper's response-content fingerprint backstops compaction.
132+
return "normal";
133+
}
134+
if (!parsed || typeof parsed !== "object") {
135+
return "normal";
136+
}
137+
138+
const spawnerNames = opts?.spawnerToolNames ?? DEFAULT_SPAWNER_TOOL_NAMES;
139+
const toolList: Record<string, unknown>[] = Array.isArray(parsed.tools)
140+
? (parsed.tools as Record<string, unknown>[])
141+
: [];
142+
const hasTools = toolList.length > 0;
143+
const msgs: unknown[] = Array.isArray(parsed.messages) ? parsed.messages : [];
144+
const lastMsg = msgs[msgs.length - 1] as Record<string, unknown> | undefined;
145+
146+
let requestType: InteractiveRequestType = "normal";
147+
148+
if (lastMsg) {
149+
if (lastMsg.role === "tool") {
150+
requestType = "tool_followup";
151+
} else if (Array.isArray(lastMsg.content)) {
152+
const hasToolResult = (lastMsg.content as Record<string, unknown>[]).some(
153+
(b) => typeof b?.type === "string" && (b.type as string).endsWith("_result"),
154+
);
155+
if (hasToolResult) {
156+
requestType = "tool_followup";
157+
}
158+
}
159+
if (requestType === "normal" && lastMsg.role === "user") {
160+
const lastContent =
161+
typeof lastMsg.content === "string"
162+
? lastMsg.content
163+
: Array.isArray(lastMsg.content)
164+
? (lastMsg.content as Record<string, unknown>[])
165+
.map((b) => (typeof b?.text === "string" ? b.text : ""))
166+
.join("")
167+
: "";
168+
if (
169+
lastContent.includes("summary should include the following sections") &&
170+
(lastContent.includes("continuation summary") || lastContent.includes("detailed summary"))
171+
) {
172+
requestType = "compaction";
173+
}
174+
}
175+
}
176+
177+
// Tool-less, non-followup, non-compaction request -> claude-code internal
178+
// side-call (title-gen, classifier, skill-search).
179+
if (requestType === "normal" && !hasTools) {
180+
requestType = "auxiliary";
181+
}
182+
183+
// Primary-vs-subagent discrimination, layered (first decisive layer wins),
184+
// only for tool-bearing user-facing turns.
185+
if (requestType === "normal" || requestType === "tool_followup") {
186+
// 5a — positive PRIMARY signal: the turn carries a spawner tool. Record it
187+
// and keep the turn-end. A max_tokens retry of the primary still carries
188+
// the spawner, so it is never mis-suppressed.
189+
if (toolList.some((t) => isSpawnerTool(t, spawnerNames))) {
190+
state.primarySpawnerSeen = true;
191+
return requestType;
192+
}
193+
// 5b — positive SUB-AGENT fingerprint (guarded; only if markers supplied).
194+
const markers = opts?.subagentSystemMarkers ?? [];
195+
if (markers.length > 0) {
196+
const sys = systemPromptText(parsed);
197+
if (sys && markers.some((m) => sys.includes(m))) {
198+
return "subagent";
199+
}
200+
}
201+
// 5c — web-search sub-agent: a tiny dedicated stream (server web_search,
202+
// no spawner, narrow toolset). Independent of state, so it is caught even
203+
// when the spawner is deny-listed.
204+
if (toolList.some((t) => isWebSearchTool(t)) && toolList.length <= 3) {
205+
return "subagent";
206+
}
207+
// 5d — by-absence Task sub-agent: no spawner here, but a spawner HAS been
208+
// seen this run, so sub-agents are possible. Gated on primarySpawnerSeen so
209+
// a no-spawner run keeps its primary turn-end.
210+
if (state.primarySpawnerSeen) {
211+
return "subagent";
212+
}
213+
}
214+
215+
return requestType;
216+
}
217+
30218
export async function startMitmProxy(certs: CertPaths): Promise<MitmProxyHandle> {
31219
const eventHandlers: Array<(evt: Record<string, unknown>) => void> = [];
32220
// Monotonic per-request identifier. claude-code can hold multiple
@@ -37,13 +225,11 @@ export async function startMitmProxy(certs: CertPaths): Promise<MitmProxyHandle>
37225
// accumulator. Reset is unnecessary — the counter only needs to be
38226
// unique within a single wrapper invocation's lifetime.
39227
let nextReqId = 1;
40-
// True once any request this run advertised the `Agent` (Task) tool. The
41-
// Agent tool is what spawns Task/research sub-agents, so a tool-bearing
42-
// request that LACKS Agent is only a sub-agent once Agent has been seen (i.e.
43-
// it's enabled). If Agent never appears (operator deny-listed it), no Task
44-
// sub-agent can exist and an Agent-less request is the primary — which must
45-
// keep its turn-end rather than be suppressed into a hang.
46-
let agentToolSeenThisRun = false;
228+
// Per-run classifier state: whether a primary spawner (Task/Agent-style)
229+
// tool has been advertised yet this run. Gates the by-absence sub-agent
230+
// layer so an Agent-less request stays primary until a spawner is seen.
231+
// See classifyRequest above.
232+
const classifyState: ClassifyState = { primarySpawnerSeen: false };
47233

48234
function emitEvent(evt: Record<string, unknown>): void {
49235
for (const h of eventHandlers) {
@@ -83,130 +269,16 @@ export async function startMitmProxy(certs: CertPaths): Promise<MitmProxyHandle>
83269
headers.delete(hop);
84270
}
85271

86-
// Classify the outbound /v1/messages request from its body shape, so
87-
// the wrapper can route the resulting SSE stream without inspecting
88-
// the request itself. Four categories, applied in order:
89-
//
90-
// "tool_followup" — last message has role "tool" OR its content
91-
// array contains a `tool_result` block. Means
92-
// we're inside Claude's tool-use loop; the
93-
// stream will be tool_use deltas or interim
94-
// reasoning, then the final user-facing turn.
95-
// "compaction" — last user message contains compact.ts's
96-
// summarize prompt markers ("summary should
97-
// include the following sections" plus either
98-
// "continuation summary" or "detailed summary").
99-
// The summary content gets re-streamed as
100-
// thinking_delta downstream.
101-
// "auxiliary" — request carries NO tools. OpenClaw always
102-
// injects `mcp__openclaw__*` tools into the
103-
// claude invocation, so the user's real turn
104-
// always has tools. Internal claude-code
105-
// side-requests (title-gen, classifier,
106-
// skill-search) call /v1/messages without
107-
// tools — that's the structural signal. Model
108-
// family is NOT used because Haiku is a
109-
// legitimate user-facing model on this backend
110-
// (defaultModelRef includes claude-haiku-4-5).
111-
// "normal" — everything else: the real user-facing turn
112-
// that should produce a `result` record.
113-
//
114-
// Content markers are the only definitive compaction signal. A prior
115-
// `max_tokens` stop_reason is tempting as a structural hint, but Claude
116-
// Code's max_output_tokens_recovery flow ALSO follows max_tokens with
117-
// the same last user message — using stop_reason as a classifier would
118-
// misclassify those retries as compaction and drop them.
272+
// Classify the outbound /v1/messages request from its body shape so the
273+
// wrapper can route the resulting SSE stream (see classifyRequest above
274+
// for the layered primary-vs-subagent logic). The only downstream effect
275+
// is which streams are tagged "subagent" (turn-end suppressed) vs the
276+
// user-facing "normal"/"tool_followup" turn.
119277
let reqBody: string | undefined;
120-
let requestType: "normal" | "compaction" | "tool_followup" | "auxiliary" | "subagent" =
121-
"normal";
278+
let requestType: InteractiveRequestType = "normal";
122279
if (req.method === "POST") {
123280
reqBody = await req.text();
124-
try {
125-
const parsed = JSON.parse(reqBody);
126-
const hasTools = Array.isArray(parsed.tools) && parsed.tools.length > 0;
127-
const msgs: unknown[] = Array.isArray(parsed.messages) ? parsed.messages : [];
128-
const lastMsg = msgs[msgs.length - 1] as Record<string, unknown> | undefined;
129-
if (lastMsg) {
130-
if (lastMsg.role === "tool") {
131-
requestType = "tool_followup";
132-
} else if (Array.isArray(lastMsg.content)) {
133-
const hasToolResult = (lastMsg.content as Record<string, unknown>[]).some(
134-
(b) => b.type === "tool_result",
135-
);
136-
if (hasToolResult) {
137-
requestType = "tool_followup";
138-
}
139-
}
140-
if (requestType === "normal" && lastMsg.role === "user") {
141-
const lastContent =
142-
typeof lastMsg.content === "string"
143-
? lastMsg.content
144-
: Array.isArray(lastMsg.content)
145-
? (lastMsg.content as Record<string, unknown>[])
146-
.map((b) => (typeof b.text === "string" ? b.text : ""))
147-
.join("")
148-
: "";
149-
if (
150-
lastContent.includes("summary should include the following sections") &&
151-
(lastContent.includes("continuation summary") ||
152-
lastContent.includes("detailed summary"))
153-
) {
154-
requestType = "compaction";
155-
}
156-
}
157-
}
158-
// Tool-less requests that don't match tool_followup or compaction
159-
// are claude-code's internal side-calls (title-gen, classifier,
160-
// skill-search). Classify last so a legitimate user turn that
161-
// happens to be the FIRST message in a session (lastMsg.role
162-
// === "user", content is plain text, no tools array yet) still
163-
// gets "normal" iff hasTools — which OpenClaw guarantees by
164-
// injecting MCP tools into every interactive claude invocation.
165-
if (requestType === "normal" && !hasTools) {
166-
requestType = "auxiliary";
167-
}
168-
// Sub-agent detection. claude-code's primary turn carries the `Agent`
169-
// (Task) tool; sub-agents (research/Explore Tasks, web search) are
170-
// spawned WITHOUT it (no recursion), so their end_turn must NOT end
171-
// the primary turn (the wrapper handles "subagent" like
172-
// compaction-plus). The discriminator is the Agent tool — but absence
173-
// of Agent only implies a sub-agent once we've actually SEEN Agent
174-
// this run, because the Agent tool is also what *spawns* Task
175-
// sub-agents: if an operator deny-lists Agent, the primary itself has
176-
// no Agent and no Task sub-agent can exist, so it must NOT be
177-
// suppressed. A max_tokens retry of the primary still carries `Agent`.
178-
if (requestType === "normal" || requestType === "tool_followup") {
179-
const toolList = Array.isArray(parsed.tools) ? parsed.tools : [];
180-
const hasAgentTool = toolList.some((t) => t?.name === "Agent");
181-
if (hasAgentTool) {
182-
agentToolSeenThisRun = true;
183-
}
184-
const usesServerWebSearch = toolList.some(
185-
(t) => typeof t?.type === "string" && t.type.includes("web_search"),
186-
);
187-
// Task/research sub-agents only appear AFTER the Agent-bearing
188-
// primary, so a tool-bearing no-Agent request is one only once Agent
189-
// has been seen this run. If Agent never appears, this IS the
190-
// primary — keep its turn-end, else it rewrites to thinking, emits
191-
// no result, and hangs until the watchdog kills it.
192-
const isTaskSubagent = !hasAgentTool && agentToolSeenThisRun;
193-
// A web_search sub-agent is a tiny dedicated stream (server-side
194-
// web_search, no Agent, no broad toolset); it is NOT gated by the
195-
// Agent tool, so catch it even when Agent is off. The bounded tool
196-
// count keeps a full primary — which always carries many tools —
197-
// from being misread when it requests web_search itself.
198-
const isWebSearchSubagent =
199-
usesServerWebSearch && !hasAgentTool && toolList.length <= 3;
200-
if (isTaskSubagent || isWebSearchSubagent) {
201-
requestType = "subagent";
202-
}
203-
}
204-
} catch {
205-
// Body isn't JSON (shouldn't happen on /v1/messages). Leave the
206-
// default "normal" classification — if it turns out to be
207-
// compaction-shaped, the wrapper's response-content fingerprint
208-
// backup catches it.
209-
}
281+
requestType = classifyRequest(reqBody, classifyState);
210282
}
211283
const reqId = nextReqId++;
212284

0 commit comments

Comments
 (0)