Skip to content

Commit c3f8dc3

Browse files
authored
Merge pull request #2491 from mpfo0106/feature/claude-code-safe-alignment-sentinels
test(claude): add compatibility sentinels and centralize builtin fallback handling
2 parents b851208 + 9b5ce8c commit c3f8dc3

9 files changed

Lines changed: 243 additions & 6 deletions

internal/runtime/executor/claude_executor.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -951,12 +951,9 @@ func applyClaudeToolPrefix(body []byte, prefix string) []byte {
951951
return body
952952
}
953953

954-
// Collect built-in tool names (those with a non-empty "type" field) so we can
955-
// skip them consistently in both tools and message history.
956-
builtinTools := map[string]bool{}
957-
for _, name := range []string{"web_search", "code_execution", "text_editor", "computer"} {
958-
builtinTools[name] = true
959-
}
954+
// Collect built-in tool names from the authoritative fallback seed list and
955+
// augment it with any typed built-ins present in the current request body.
956+
builtinTools := helps.AugmentClaudeBuiltinToolRegistry(body, nil)
960957

961958
if tools := gjson.GetBytes(body, "tools"); tools.Exists() && tools.IsArray() {
962959
tools.ForEach(func(index, tool gjson.Result) bool {

internal/runtime/executor/claude_executor_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,35 @@ func TestApplyClaudeToolPrefix_ToolChoiceBuiltin(t *testing.T) {
739739
}
740740
}
741741

742+
func TestApplyClaudeToolPrefix_KnownFallbackBuiltinsRemainUnprefixed(t *testing.T) {
743+
for _, builtin := range []string{"web_search", "code_execution", "text_editor", "computer"} {
744+
t.Run(builtin, func(t *testing.T) {
745+
input := []byte(fmt.Sprintf(`{
746+
"tools":[{"name":"Read"}],
747+
"tool_choice":{"type":"tool","name":%q},
748+
"messages":[{"role":"assistant","content":[{"type":"tool_use","name":%q,"id":"toolu_1","input":{}},{"type":"tool_reference","tool_name":%q},{"type":"tool_result","tool_use_id":"toolu_1","content":[{"type":"tool_reference","tool_name":%q}]}]}]
749+
}`, builtin, builtin, builtin, builtin))
750+
out := applyClaudeToolPrefix(input, "proxy_")
751+
752+
if got := gjson.GetBytes(out, "tool_choice.name").String(); got != builtin {
753+
t.Fatalf("tool_choice.name = %q, want %q", got, builtin)
754+
}
755+
if got := gjson.GetBytes(out, "messages.0.content.0.name").String(); got != builtin {
756+
t.Fatalf("messages.0.content.0.name = %q, want %q", got, builtin)
757+
}
758+
if got := gjson.GetBytes(out, "messages.0.content.1.tool_name").String(); got != builtin {
759+
t.Fatalf("messages.0.content.1.tool_name = %q, want %q", got, builtin)
760+
}
761+
if got := gjson.GetBytes(out, "messages.0.content.2.content.0.tool_name").String(); got != builtin {
762+
t.Fatalf("messages.0.content.2.content.0.tool_name = %q, want %q", got, builtin)
763+
}
764+
if got := gjson.GetBytes(out, "tools.0.name").String(); got != "proxy_Read" {
765+
t.Fatalf("tools.0.name = %q, want %q", got, "proxy_Read")
766+
}
767+
})
768+
}
769+
}
770+
742771
func TestStripClaudeToolPrefixFromResponse(t *testing.T) {
743772
input := []byte(`{"content":[{"type":"tool_use","name":"proxy_alpha","id":"t1","input":{}},{"type":"tool_use","name":"bravo","id":"t2","input":{}}]}`)
744773
out := stripClaudeToolPrefixFromResponse(input, "proxy_")
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package helps
2+
3+
import "github.com/tidwall/gjson"
4+
5+
var defaultClaudeBuiltinToolNames = []string{
6+
"web_search",
7+
"code_execution",
8+
"text_editor",
9+
"computer",
10+
}
11+
12+
func newClaudeBuiltinToolRegistry() map[string]bool {
13+
registry := make(map[string]bool, len(defaultClaudeBuiltinToolNames))
14+
for _, name := range defaultClaudeBuiltinToolNames {
15+
registry[name] = true
16+
}
17+
return registry
18+
}
19+
20+
func AugmentClaudeBuiltinToolRegistry(body []byte, registry map[string]bool) map[string]bool {
21+
if registry == nil {
22+
registry = newClaudeBuiltinToolRegistry()
23+
}
24+
tools := gjson.GetBytes(body, "tools")
25+
if !tools.Exists() || !tools.IsArray() {
26+
return registry
27+
}
28+
tools.ForEach(func(_, tool gjson.Result) bool {
29+
if tool.Get("type").String() == "" {
30+
return true
31+
}
32+
if name := tool.Get("name").String(); name != "" {
33+
registry[name] = true
34+
}
35+
return true
36+
})
37+
return registry
38+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package helps
2+
3+
import "testing"
4+
5+
func TestClaudeBuiltinToolRegistry_DefaultSeedFallback(t *testing.T) {
6+
registry := AugmentClaudeBuiltinToolRegistry(nil, nil)
7+
for _, name := range defaultClaudeBuiltinToolNames {
8+
if !registry[name] {
9+
t.Fatalf("default builtin %q missing from fallback registry", name)
10+
}
11+
}
12+
}
13+
14+
func TestClaudeBuiltinToolRegistry_AugmentsTypedBuiltinsFromBody(t *testing.T) {
15+
registry := AugmentClaudeBuiltinToolRegistry([]byte(`{
16+
"tools": [
17+
{"type": "web_search_20250305", "name": "web_search"},
18+
{"type": "custom_builtin_20250401", "name": "special_builtin"},
19+
{"name": "Read"}
20+
]
21+
}`), nil)
22+
23+
if !registry["web_search"] {
24+
t.Fatal("expected default typed builtin web_search in registry")
25+
}
26+
if !registry["special_builtin"] {
27+
t.Fatal("expected typed builtin from body to be added to registry")
28+
}
29+
if registry["Read"] {
30+
t.Fatal("expected untyped custom tool to stay out of builtin registry")
31+
}
32+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package test
2+
3+
import (
4+
"encoding/json"
5+
"os"
6+
"path/filepath"
7+
"testing"
8+
)
9+
10+
type jsonObject = map[string]any
11+
12+
func loadClaudeCodeSentinelFixture(t *testing.T, name string) jsonObject {
13+
t.Helper()
14+
path := filepath.Join("testdata", "claude_code_sentinels", name)
15+
data := mustReadFile(t, path)
16+
var payload jsonObject
17+
if err := json.Unmarshal(data, &payload); err != nil {
18+
t.Fatalf("unmarshal %s: %v", name, err)
19+
}
20+
return payload
21+
}
22+
23+
func mustReadFile(t *testing.T, path string) []byte {
24+
t.Helper()
25+
data, err := os.ReadFile(path)
26+
if err != nil {
27+
t.Fatalf("read %s: %v", path, err)
28+
}
29+
return data
30+
}
31+
32+
func requireStringField(t *testing.T, obj jsonObject, key string) string {
33+
t.Helper()
34+
value, ok := obj[key].(string)
35+
if !ok || value == "" {
36+
t.Fatalf("field %q missing or empty: %#v", key, obj[key])
37+
}
38+
return value
39+
}
40+
41+
func TestClaudeCodeSentinel_ToolProgressShape(t *testing.T) {
42+
payload := loadClaudeCodeSentinelFixture(t, "tool_progress.json")
43+
if got := requireStringField(t, payload, "type"); got != "tool_progress" {
44+
t.Fatalf("type = %q, want tool_progress", got)
45+
}
46+
requireStringField(t, payload, "tool_use_id")
47+
requireStringField(t, payload, "tool_name")
48+
requireStringField(t, payload, "session_id")
49+
if _, ok := payload["elapsed_time_seconds"].(float64); !ok {
50+
t.Fatalf("elapsed_time_seconds missing or non-number: %#v", payload["elapsed_time_seconds"])
51+
}
52+
}
53+
54+
func TestClaudeCodeSentinel_SessionStateShape(t *testing.T) {
55+
payload := loadClaudeCodeSentinelFixture(t, "session_state_changed.json")
56+
if got := requireStringField(t, payload, "type"); got != "system" {
57+
t.Fatalf("type = %q, want system", got)
58+
}
59+
if got := requireStringField(t, payload, "subtype"); got != "session_state_changed" {
60+
t.Fatalf("subtype = %q, want session_state_changed", got)
61+
}
62+
state := requireStringField(t, payload, "state")
63+
switch state {
64+
case "idle", "running", "requires_action":
65+
default:
66+
t.Fatalf("unexpected session state %q", state)
67+
}
68+
requireStringField(t, payload, "session_id")
69+
}
70+
71+
func TestClaudeCodeSentinel_ToolUseSummaryShape(t *testing.T) {
72+
payload := loadClaudeCodeSentinelFixture(t, "tool_use_summary.json")
73+
if got := requireStringField(t, payload, "type"); got != "tool_use_summary" {
74+
t.Fatalf("type = %q, want tool_use_summary", got)
75+
}
76+
requireStringField(t, payload, "summary")
77+
rawIDs, ok := payload["preceding_tool_use_ids"].([]any)
78+
if !ok || len(rawIDs) == 0 {
79+
t.Fatalf("preceding_tool_use_ids missing or empty: %#v", payload["preceding_tool_use_ids"])
80+
}
81+
for i, raw := range rawIDs {
82+
if id, ok := raw.(string); !ok || id == "" {
83+
t.Fatalf("preceding_tool_use_ids[%d] invalid: %#v", i, raw)
84+
}
85+
}
86+
}
87+
88+
func TestClaudeCodeSentinel_ControlRequestCanUseToolShape(t *testing.T) {
89+
payload := loadClaudeCodeSentinelFixture(t, "control_request_can_use_tool.json")
90+
if got := requireStringField(t, payload, "type"); got != "control_request" {
91+
t.Fatalf("type = %q, want control_request", got)
92+
}
93+
requireStringField(t, payload, "request_id")
94+
request, ok := payload["request"].(map[string]any)
95+
if !ok {
96+
t.Fatalf("request missing or invalid: %#v", payload["request"])
97+
}
98+
if got := requireStringField(t, request, "subtype"); got != "can_use_tool" {
99+
t.Fatalf("request.subtype = %q, want can_use_tool", got)
100+
}
101+
requireStringField(t, request, "tool_name")
102+
requireStringField(t, request, "tool_use_id")
103+
if input, ok := request["input"].(map[string]any); !ok || len(input) == 0 {
104+
t.Fatalf("request.input missing or empty: %#v", request["input"])
105+
}
106+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"type": "control_request",
3+
"request_id": "req_123",
4+
"request": {
5+
"subtype": "can_use_tool",
6+
"tool_name": "Bash",
7+
"input": {"command": "npm test"},
8+
"tool_use_id": "toolu_123",
9+
"description": "Running npm test"
10+
}
11+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"type": "system",
3+
"subtype": "session_state_changed",
4+
"state": "requires_action",
5+
"uuid": "22222222-2222-4222-8222-222222222222",
6+
"session_id": "sess_123"
7+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"type": "tool_progress",
3+
"tool_use_id": "toolu_123",
4+
"tool_name": "Bash",
5+
"parent_tool_use_id": null,
6+
"elapsed_time_seconds": 2.5,
7+
"task_id": "task_123",
8+
"uuid": "11111111-1111-4111-8111-111111111111",
9+
"session_id": "sess_123"
10+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"type": "tool_use_summary",
3+
"summary": "Searched in auth/",
4+
"preceding_tool_use_ids": ["toolu_1", "toolu_2"],
5+
"uuid": "33333333-3333-4333-8333-333333333333",
6+
"session_id": "sess_123"
7+
}

0 commit comments

Comments
 (0)