@@ -27,6 +27,194 @@ export type MitmProxyHandle = {
2727
2828const UPSTREAM_HOST = "api.anthropic.com" ;
2929
30+ // ---------------------------------------------------------------------------
31+ // Request classification (primary turn vs. sub-agent).
32+ //
33+ // The wrapper needs to know whether a given /v1/messages stream's `end_turn`
34+ // should end the USER-facing turn (the primary) or be neutralized (a
35+ // sub-agent: Task/research/Explore, web search, or any disguised agent).
36+ // Exported as a pure function so it can be unit-tested independently of the
37+ // proxy. Layered, first-decisive-layer-wins; biased to never mis-suppress the
38+ // primary (which would hang) while still catching every sub-agent form.
39+ // ---------------------------------------------------------------------------
40+
41+ export type InteractiveRequestType =
42+ | "normal"
43+ | "compaction"
44+ | "tool_followup"
45+ | "auxiliary"
46+ | "subagent" ;
47+
48+ export type ClassifyState = {
49+ // True once any request this run advertised a primary "spawner" tool (the
50+ // Task/Agent tool that launches sub-agents). Gates the by-absence sub-agent
51+ // layer: an Agent-less request only becomes a sub-agent once a spawner has
52+ // actually been seen, so a deny-listed-Agent run keeps its primary turn-end
53+ // instead of hanging.
54+ primarySpawnerSeen : boolean ;
55+ } ;
56+
57+ export type ClassifyOptions = {
58+ // Tool names that mark the PRIMARY turn (matched case-insensitively, exact).
59+ // A conservative structural matcher additionally catches renamed/"disguised"
60+ // spawner tools by shape. Defaults to DEFAULT_SPAWNER_TOOL_NAMES.
61+ spawnerToolNames ?: readonly string [ ] ;
62+ // System-prompt substrings that POSITIVELY mark a sub-agent request. OFF by
63+ // default (empty) — enable only once a live capture confirms a stable
64+ // marker. Correctness never depends on this layer.
65+ subagentSystemMarkers ?: readonly string [ ] ;
66+ } ;
67+
68+ export const DEFAULT_SPAWNER_TOOL_NAMES : readonly string [ ] = [ "Agent" , "Task" , "TaskCreate" ] ;
69+
70+ function isSpawnerTool ( tool : Record < string , unknown > , names : readonly string [ ] ) : boolean {
71+ const name = typeof tool ?. name === "string" ? tool . name : "" ;
72+ if ( ! name ) {
73+ return false ; // server tools (type-only, e.g. web_search) never spawn agents
74+ }
75+ const lower = name . toLowerCase ( ) ;
76+ if ( names . some ( ( n ) => n . toLowerCase ( ) === lower ) ) {
77+ return true ;
78+ }
79+ // Conservative shape match for renamed spawners on the primary turn. Kept
80+ // tight so a sub-agent's ordinary tool can't be mistaken for a spawner
81+ // (which would wrongly keep that sub-agent's turn-end live).
82+ if ( / ^ ( a g e n t | t a s k ) $ / i. test ( name ) ) {
83+ return true ;
84+ }
85+ if ( / ^ ( d i s p a t c h | l a u n c h | s p a w n | c r e a t e | r u n ) _ ? ( s u b _ ? ) ? a g e n t $ / i. test ( name ) ) {
86+ return true ;
87+ }
88+ if ( / ^ t a s k ( c r e a t e | r u n | s p a w n | l a u n c h | d i s p a t c h ) $ / i. test ( name ) ) {
89+ return true ;
90+ }
91+ return false ;
92+ }
93+
94+ function isWebSearchTool ( tool : Record < string , unknown > ) : boolean {
95+ const type = typeof tool ?. type === "string" ? tool . type : "" ;
96+ if ( type . includes ( "web_search" ) ) {
97+ return true ;
98+ }
99+ const name = typeof tool ?. name === "string" ? tool . name : "" ;
100+ return name . toLowerCase ( ) === "web_search" ;
101+ }
102+
103+ function systemPromptText ( parsed : Record < string , unknown > ) : string {
104+ const sys = parsed . system ;
105+ if ( typeof sys === "string" ) {
106+ return sys ;
107+ }
108+ if ( Array . isArray ( sys ) ) {
109+ return ( sys as Record < string , unknown > [ ] )
110+ . map ( ( b ) => ( typeof b ?. text === "string" ? b . text : "" ) )
111+ . join ( "" ) ;
112+ }
113+ return "" ;
114+ }
115+
116+ /**
117+ * Classify a /v1/messages request body. `state` is mutated in place (the
118+ * per-run spawner-seen flag); same (body, state, opts) -> same result + state
119+ * mutation, so it is deterministic and unit-testable.
120+ */
121+ export function classifyRequest (
122+ body : string ,
123+ state : ClassifyState ,
124+ opts ?: ClassifyOptions ,
125+ ) : InteractiveRequestType {
126+ let parsed : Record < string , unknown > ;
127+ try {
128+ parsed = JSON . parse ( body ) as Record < string , unknown > ;
129+ } catch {
130+ // Body isn't JSON (shouldn't happen on /v1/messages). Default to "normal";
131+ // the wrapper's response-content fingerprint backstops compaction.
132+ return "normal" ;
133+ }
134+ if ( ! parsed || typeof parsed !== "object" ) {
135+ return "normal" ;
136+ }
137+
138+ const spawnerNames = opts ?. spawnerToolNames ?? DEFAULT_SPAWNER_TOOL_NAMES ;
139+ const toolList : Record < string , unknown > [ ] = Array . isArray ( parsed . tools )
140+ ? ( parsed . tools as Record < string , unknown > [ ] )
141+ : [ ] ;
142+ const hasTools = toolList . length > 0 ;
143+ const msgs : unknown [ ] = Array . isArray ( parsed . messages ) ? parsed . messages : [ ] ;
144+ const lastMsg = msgs [ msgs . length - 1 ] as Record < string , unknown > | undefined ;
145+
146+ let requestType : InteractiveRequestType = "normal" ;
147+
148+ if ( lastMsg ) {
149+ if ( lastMsg . role === "tool" ) {
150+ requestType = "tool_followup" ;
151+ } else if ( Array . isArray ( lastMsg . content ) ) {
152+ const hasToolResult = ( lastMsg . content as Record < string , unknown > [ ] ) . some (
153+ ( b ) => typeof b ?. type === "string" && ( b . type as string ) . endsWith ( "_result" ) ,
154+ ) ;
155+ if ( hasToolResult ) {
156+ requestType = "tool_followup" ;
157+ }
158+ }
159+ if ( requestType === "normal" && lastMsg . role === "user" ) {
160+ const lastContent =
161+ typeof lastMsg . content === "string"
162+ ? lastMsg . content
163+ : Array . isArray ( lastMsg . content )
164+ ? ( lastMsg . content as Record < string , unknown > [ ] )
165+ . map ( ( b ) => ( typeof b ?. text === "string" ? b . text : "" ) )
166+ . join ( "" )
167+ : "" ;
168+ if (
169+ lastContent . includes ( "summary should include the following sections" ) &&
170+ ( lastContent . includes ( "continuation summary" ) || lastContent . includes ( "detailed summary" ) )
171+ ) {
172+ requestType = "compaction" ;
173+ }
174+ }
175+ }
176+
177+ // Tool-less, non-followup, non-compaction request -> claude-code internal
178+ // side-call (title-gen, classifier, skill-search).
179+ if ( requestType === "normal" && ! hasTools ) {
180+ requestType = "auxiliary" ;
181+ }
182+
183+ // Primary-vs-subagent discrimination, layered (first decisive layer wins),
184+ // only for tool-bearing user-facing turns.
185+ if ( requestType === "normal" || requestType === "tool_followup" ) {
186+ // 5a — positive PRIMARY signal: the turn carries a spawner tool. Record it
187+ // and keep the turn-end. A max_tokens retry of the primary still carries
188+ // the spawner, so it is never mis-suppressed.
189+ if ( toolList . some ( ( t ) => isSpawnerTool ( t , spawnerNames ) ) ) {
190+ state . primarySpawnerSeen = true ;
191+ return requestType ;
192+ }
193+ // 5b — positive SUB-AGENT fingerprint (guarded; only if markers supplied).
194+ const markers = opts ?. subagentSystemMarkers ?? [ ] ;
195+ if ( markers . length > 0 ) {
196+ const sys = systemPromptText ( parsed ) ;
197+ if ( sys && markers . some ( ( m ) => sys . includes ( m ) ) ) {
198+ return "subagent" ;
199+ }
200+ }
201+ // 5c — web-search sub-agent: a tiny dedicated stream (server web_search,
202+ // no spawner, narrow toolset). Independent of state, so it is caught even
203+ // when the spawner is deny-listed.
204+ if ( toolList . some ( ( t ) => isWebSearchTool ( t ) ) && toolList . length <= 3 ) {
205+ return "subagent" ;
206+ }
207+ // 5d — by-absence Task sub-agent: no spawner here, but a spawner HAS been
208+ // seen this run, so sub-agents are possible. Gated on primarySpawnerSeen so
209+ // a no-spawner run keeps its primary turn-end.
210+ if ( state . primarySpawnerSeen ) {
211+ return "subagent" ;
212+ }
213+ }
214+
215+ return requestType ;
216+ }
217+
30218export async function startMitmProxy ( certs : CertPaths ) : Promise < MitmProxyHandle > {
31219 const eventHandlers : Array < ( evt : Record < string , unknown > ) => void > = [ ] ;
32220 // Monotonic per-request identifier. claude-code can hold multiple
@@ -37,13 +225,11 @@ export async function startMitmProxy(certs: CertPaths): Promise<MitmProxyHandle>
37225 // accumulator. Reset is unnecessary — the counter only needs to be
38226 // unique within a single wrapper invocation's lifetime.
39227 let nextReqId = 1 ;
40- // True once any request this run advertised the `Agent` (Task) tool. The
41- // Agent tool is what spawns Task/research sub-agents, so a tool-bearing
42- // request that LACKS Agent is only a sub-agent once Agent has been seen (i.e.
43- // it's enabled). If Agent never appears (operator deny-listed it), no Task
44- // sub-agent can exist and an Agent-less request is the primary — which must
45- // keep its turn-end rather than be suppressed into a hang.
46- let agentToolSeenThisRun = false ;
228+ // Per-run classifier state: whether a primary spawner (Task/Agent-style)
229+ // tool has been advertised yet this run. Gates the by-absence sub-agent
230+ // layer so an Agent-less request stays primary until a spawner is seen.
231+ // See classifyRequest above.
232+ const classifyState : ClassifyState = { primarySpawnerSeen : false } ;
47233
48234 function emitEvent ( evt : Record < string , unknown > ) : void {
49235 for ( const h of eventHandlers ) {
@@ -83,130 +269,16 @@ export async function startMitmProxy(certs: CertPaths): Promise<MitmProxyHandle>
83269 headers . delete ( hop ) ;
84270 }
85271
86- // Classify the outbound /v1/messages request from its body shape, so
87- // the wrapper can route the resulting SSE stream without inspecting
88- // the request itself. Four categories, applied in order:
89- //
90- // "tool_followup" — last message has role "tool" OR its content
91- // array contains a `tool_result` block. Means
92- // we're inside Claude's tool-use loop; the
93- // stream will be tool_use deltas or interim
94- // reasoning, then the final user-facing turn.
95- // "compaction" — last user message contains compact.ts's
96- // summarize prompt markers ("summary should
97- // include the following sections" plus either
98- // "continuation summary" or "detailed summary").
99- // The summary content gets re-streamed as
100- // thinking_delta downstream.
101- // "auxiliary" — request carries NO tools. OpenClaw always
102- // injects `mcp__openclaw__*` tools into the
103- // claude invocation, so the user's real turn
104- // always has tools. Internal claude-code
105- // side-requests (title-gen, classifier,
106- // skill-search) call /v1/messages without
107- // tools — that's the structural signal. Model
108- // family is NOT used because Haiku is a
109- // legitimate user-facing model on this backend
110- // (defaultModelRef includes claude-haiku-4-5).
111- // "normal" — everything else: the real user-facing turn
112- // that should produce a `result` record.
113- //
114- // Content markers are the only definitive compaction signal. A prior
115- // `max_tokens` stop_reason is tempting as a structural hint, but Claude
116- // Code's max_output_tokens_recovery flow ALSO follows max_tokens with
117- // the same last user message — using stop_reason as a classifier would
118- // misclassify those retries as compaction and drop them.
272+ // Classify the outbound /v1/messages request from its body shape so the
273+ // wrapper can route the resulting SSE stream (see classifyRequest above
274+ // for the layered primary-vs-subagent logic). The only downstream effect
275+ // is which streams are tagged "subagent" (turn-end suppressed) vs the
276+ // user-facing "normal"/"tool_followup" turn.
119277 let reqBody : string | undefined ;
120- let requestType : "normal" | "compaction" | "tool_followup" | "auxiliary" | "subagent" =
121- "normal" ;
278+ let requestType : InteractiveRequestType = "normal" ;
122279 if ( req . method === "POST" ) {
123280 reqBody = await req . text ( ) ;
124- try {
125- const parsed = JSON . parse ( reqBody ) ;
126- const hasTools = Array . isArray ( parsed . tools ) && parsed . tools . length > 0 ;
127- const msgs : unknown [ ] = Array . isArray ( parsed . messages ) ? parsed . messages : [ ] ;
128- const lastMsg = msgs [ msgs . length - 1 ] as Record < string , unknown > | undefined ;
129- if ( lastMsg ) {
130- if ( lastMsg . role === "tool" ) {
131- requestType = "tool_followup" ;
132- } else if ( Array . isArray ( lastMsg . content ) ) {
133- const hasToolResult = ( lastMsg . content as Record < string , unknown > [ ] ) . some (
134- ( b ) => b . type === "tool_result" ,
135- ) ;
136- if ( hasToolResult ) {
137- requestType = "tool_followup" ;
138- }
139- }
140- if ( requestType === "normal" && lastMsg . role === "user" ) {
141- const lastContent =
142- typeof lastMsg . content === "string"
143- ? lastMsg . content
144- : Array . isArray ( lastMsg . content )
145- ? ( lastMsg . content as Record < string , unknown > [ ] )
146- . map ( ( b ) => ( typeof b . text === "string" ? b . text : "" ) )
147- . join ( "" )
148- : "" ;
149- if (
150- lastContent . includes ( "summary should include the following sections" ) &&
151- ( lastContent . includes ( "continuation summary" ) ||
152- lastContent . includes ( "detailed summary" ) )
153- ) {
154- requestType = "compaction" ;
155- }
156- }
157- }
158- // Tool-less requests that don't match tool_followup or compaction
159- // are claude-code's internal side-calls (title-gen, classifier,
160- // skill-search). Classify last so a legitimate user turn that
161- // happens to be the FIRST message in a session (lastMsg.role
162- // === "user", content is plain text, no tools array yet) still
163- // gets "normal" iff hasTools — which OpenClaw guarantees by
164- // injecting MCP tools into every interactive claude invocation.
165- if ( requestType === "normal" && ! hasTools ) {
166- requestType = "auxiliary" ;
167- }
168- // Sub-agent detection. claude-code's primary turn carries the `Agent`
169- // (Task) tool; sub-agents (research/Explore Tasks, web search) are
170- // spawned WITHOUT it (no recursion), so their end_turn must NOT end
171- // the primary turn (the wrapper handles "subagent" like
172- // compaction-plus). The discriminator is the Agent tool — but absence
173- // of Agent only implies a sub-agent once we've actually SEEN Agent
174- // this run, because the Agent tool is also what *spawns* Task
175- // sub-agents: if an operator deny-lists Agent, the primary itself has
176- // no Agent and no Task sub-agent can exist, so it must NOT be
177- // suppressed. A max_tokens retry of the primary still carries `Agent`.
178- if ( requestType === "normal" || requestType === "tool_followup" ) {
179- const toolList = Array . isArray ( parsed . tools ) ? parsed . tools : [ ] ;
180- const hasAgentTool = toolList . some ( ( t ) => t ?. name === "Agent" ) ;
181- if ( hasAgentTool ) {
182- agentToolSeenThisRun = true ;
183- }
184- const usesServerWebSearch = toolList . some (
185- ( t ) => typeof t ?. type === "string" && t . type . includes ( "web_search" ) ,
186- ) ;
187- // Task/research sub-agents only appear AFTER the Agent-bearing
188- // primary, so a tool-bearing no-Agent request is one only once Agent
189- // has been seen this run. If Agent never appears, this IS the
190- // primary — keep its turn-end, else it rewrites to thinking, emits
191- // no result, and hangs until the watchdog kills it.
192- const isTaskSubagent = ! hasAgentTool && agentToolSeenThisRun ;
193- // A web_search sub-agent is a tiny dedicated stream (server-side
194- // web_search, no Agent, no broad toolset); it is NOT gated by the
195- // Agent tool, so catch it even when Agent is off. The bounded tool
196- // count keeps a full primary — which always carries many tools —
197- // from being misread when it requests web_search itself.
198- const isWebSearchSubagent =
199- usesServerWebSearch && ! hasAgentTool && toolList . length <= 3 ;
200- if ( isTaskSubagent || isWebSearchSubagent ) {
201- requestType = "subagent" ;
202- }
203- }
204- } catch {
205- // Body isn't JSON (shouldn't happen on /v1/messages). Leave the
206- // default "normal" classification — if it turns out to be
207- // compaction-shaped, the wrapper's response-content fingerprint
208- // backup catches it.
209- }
281+ requestType = classifyRequest ( reqBody , classifyState ) ;
210282 }
211283 const reqId = nextReqId ++ ;
212284
0 commit comments