fix(context_compressor): keep tool-call arguments JSON valid when shrinking

honghua · claude · teknium1 · commit 3128d9fcd24f · 2026-04-18T12:40:56.000-07:00
Pass 3 of `_prune_old_tool_results` previously shrunk long `function.arguments`
blobs by slicing the raw JSON string at byte 200 and appending the literal
text `...[truncated]`. That routinely produced payloads like::

    {"path": "/foo.md", "content": "# Long markdown
    ...[truncated]

— an unterminated string with no closing brace. Strict providers (observed
on MiniMax) reject this as `invalid function arguments json string` with a
non-retryable 400. Because the broken call survives in the session history,
every subsequent turn re-sends the same malformed payload and gets the same
400, locking the session into a re-send loop until the call falls out of
the window.

Fix: parse the arguments first, shrink long string leaves inside the parsed
structure, and re-serialise. Non-string values (paths, ints, booleans, lists)
pass through intact. Arguments that are not valid JSON to begin with (rare,
some backends use non-JSON tool args) are returned unchanged rather than
replaced with something neither we nor the provider can parse.

Observed in the wild: a `write_file` with ~800 chars of markdown `content`
triggered this on a real session against MiniMax-M2.7; every turn after
compression got rejected until the session was manually reset.

Tests:
- 7 direct tests of `_truncate_tool_call_args_json` covering valid-JSON
  output, non-JSON pass-through, nested structures, non-string leaves,
  scalar JSON, and Unicode preservation
- 1 end-to-end test through `_prune_old_tool_results` Pass 3 that
  reproduces the exact failure payload shape from the incident

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
@@ -63,6 +63,52 @@
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
 
 
+def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
+    """Shrink long string values inside a tool-call arguments JSON blob while
+    preserving JSON validity.
+
+    The ``function.arguments`` field on a tool call is a JSON-encoded string
+    passed through to the LLM provider; downstream providers strictly
+    validate it and return a non-retryable 400 when it is not well-formed.
+    An earlier implementation sliced the raw JSON at a fixed byte offset and
+    appended ``...[truncated]`` — which routinely produced strings like::
+
+        {"path": "/foo/bar", "content": "# long markdown
+        ...[truncated]
+
+    i.e. an unterminated string and a missing closing brace. MiniMax, for
+    example, rejects this with ``invalid function arguments json string``
+    and the session gets stuck re-sending the same broken history on every
+    turn. See issue #11762 for the observed loop.
+
+    This helper parses the arguments, shrinks long string leaves inside the
+    parsed structure, and re-serialises. Non-string values (paths, ints,
+    booleans) are preserved intact. If the arguments are not valid JSON
+    to begin with — some model backends use non-JSON tool arguments — the
+    original string is returned unchanged rather than replaced with
+    something neither we nor the backend can parse.
+    """
+    try:
+        parsed = json.loads(args)
+    except (ValueError, TypeError):
+        return args
+
+    def _shrink(obj: Any) -> Any:
+        if isinstance(obj, str):
+            if len(obj) > head_chars:
+                return obj[:head_chars] + "...[truncated]"
+            return obj
+        if isinstance(obj, dict):
+            return {k: _shrink(v) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [_shrink(v) for v in obj]
+        return obj
+
+    shrunken = _shrink(parsed)
+    # ensure_ascii=False preserves CJK/emoji instead of bloating with \uXXXX
+    return json.dumps(shrunken, ensure_ascii=False)
+
+
 def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
     """Create an informative 1-line summary of a tool call + result.
 
@@ -449,6 +495,11 @@ def _prune_old_tool_results(
         # Pass 3: Truncate large tool_call arguments in assistant messages
         # outside the protected tail. write_file with 50KB content, for
         # example, survives pruning entirely without this.
+        #
+        # The shrinking is done inside the parsed JSON structure so the
+        # result remains valid JSON — otherwise downstream providers 400
+        # on every subsequent turn until the broken call falls out of
+        # the window. See ``_truncate_tool_call_args_json`` docstring.
         for i in range(prune_boundary):
             msg = result[i]
             if msg.get("role") != "assistant" or not msg.get("tool_calls"):
@@ -459,8 +510,10 @@ def _prune_old_tool_results(
                 if isinstance(tc, dict):
                     args = tc.get("function", {}).get("arguments", "")
                     if len(args) > 500:
-                        tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
-                        modified = True
+                        new_args = _truncate_tool_call_args_json(args)
+                        if new_args != args:
+                            tc = {**tc, "function": {**tc["function"], "arguments": new_args}}
+                            modified = True
                 new_tcs.append(tc)
             if modified:
                 result[i] = {**msg, "tool_calls": new_tcs}
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
@@ -781,3 +781,127 @@ def test_prune_without_token_budget_uses_message_count(self, budget_compressor):
         # Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
         # so it might or might not be pruned depending on boundary
         assert isinstance(pruned, int)
+
+
+class TestTruncateToolCallArgsJson:
+    """Regression tests for #11762.
+
+    The previous implementation produced invalid JSON by slicing
+    ``function.arguments`` mid-string, which caused non-retryable 400s from
+    strict providers (observed on MiniMax) and stuck long sessions in a
+    re-send loop. The helper here must always emit parseable JSON whose
+    shape matches the original — shrunken, not corrupted.
+    """
+
+    def _helper(self):
+        from agent.context_compressor import _truncate_tool_call_args_json
+        return _truncate_tool_call_args_json
+
+    def test_shrunken_args_remain_valid_json(self):
+        import json as _json
+        shrink = self._helper()
+        original = _json.dumps({
+            "path": "~/.hermes/skills/shopping/browser-setup-notes.md",
+            "content": "# Shopping Browser Setup Notes\n\n" + "abc " * 400,
+        })
+        assert len(original) > 500
+        shrunk = shrink(original)
+        parsed = _json.loads(shrunk)  # must not raise
+        assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
+        assert parsed["content"].endswith("...[truncated]")
+        assert len(shrunk) < len(original)
+
+    def test_non_json_arguments_pass_through(self):
+        shrink = self._helper()
+        not_json = "this is not json at all, " * 50
+        assert shrink(not_json) == not_json
+
+    def test_short_string_leaves_unchanged(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({"command": "ls -la", "cwd": "/tmp"})
+        assert _json.loads(shrink(payload)) == {"command": "ls -la", "cwd": "/tmp"}
+
+    def test_nested_structures_are_walked(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({
+            "messages": [
+                {"role": "user", "content": "x" * 500},
+                {"role": "assistant", "content": "ok"},
+            ],
+            "meta": {"note": "y" * 500},
+        })
+        parsed = _json.loads(shrink(payload))
+        assert parsed["messages"][0]["content"].endswith("...[truncated]")
+        assert parsed["messages"][1]["content"] == "ok"
+        assert parsed["meta"]["note"].endswith("...[truncated]")
+
+    def test_non_string_leaves_preserved(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({
+            "retries": 3,
+            "enabled": True,
+            "timeout": None,
+            "items": [1, 2, 3],
+            "note": "z" * 500,
+        })
+        parsed = _json.loads(shrink(payload))
+        assert parsed["retries"] == 3
+        assert parsed["enabled"] is True
+        assert parsed["timeout"] is None
+        assert parsed["items"] == [1, 2, 3]
+        assert parsed["note"].endswith("...[truncated]")
+
+    def test_scalar_json_string_gets_shrunk(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps("q" * 500)
+        parsed = _json.loads(shrink(payload))
+        assert isinstance(parsed, str)
+        assert parsed.endswith("...[truncated]")
+
+    def test_unicode_preserved(self):
+        import json as _json
+        shrink = self._helper()
+        payload = _json.dumps({"content": "非德满" + ("a" * 500)})
+        out = shrink(payload)
+        # ensure_ascii=False keeps CJK intact rather than emitting \uXXXX
+        assert "非德满" in out
+
+    def test_pass3_emits_valid_json_for_downstream_provider(self):
+        """End-to-end: Pass 3 must never produce the exact failure payload
+        that caused the 400 loop (unterminated string, missing brace)."""
+        import json as _json
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="test/model",
+                threshold_percent=0.85,
+                protect_first_n=1,
+                protect_last_n=1,
+                quiet_mode=True,
+            )
+        huge_content = "# Shopping Browser Setup Notes\n\n## Overview\n" + "x " * 400
+        args_payload = _json.dumps({
+            "path": "~/.hermes/skills/shopping/browser-setup-notes.md",
+            "content": huge_content,
+        })
+        assert len(args_payload) > 500  # triggers the Pass-3 shrink
+        messages = [
+            {"role": "user", "content": "please write two files"},
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "call_1", "type": "function",
+                 "function": {"name": "write_file", "arguments": args_payload}},
+            ]},
+            {"role": "tool", "tool_call_id": "call_1",
+             "content": '{"bytes_written": 727}'},
+            {"role": "user", "content": "ok"},
+            {"role": "assistant", "content": "done"},
+        ]
+        result, _ = c._prune_old_tool_results(messages, protect_tail_count=2)
+        shrunk = result[1]["tool_calls"][0]["function"]["arguments"]
+        # Must parse — otherwise downstream provider returns 400
+        parsed = _json.loads(shrunk)
+        assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
+        assert parsed["content"].endswith("...[truncated]")