Defer low-value native tools by default#2102
Conversation
|
Warning You have reached your daily quota limit. Please wait up to 24 hours and I will start processing your requests again! |
6c9e6f3 to
6e64881
Compare
| if project.provider != ProviderKind::Deepseek || has_api_key { | ||
| self.provider = project.provider; | ||
| } |
There was a problem hiding this comment.
Undefined variable
has_api_key — compile error
has_api_key is not defined anywhere in this crate; it doesn't exist as a local variable, a module-level function, or an imported symbol in crates/config/src/lib.rs. This will fail to compile. The closest candidates are self.api_key.is_some() (user-level credential already present) or project.api_key.is_some() (project config has an explicit key), but neither is what's written here. Additionally, the docstring for merge_project_overrides explicitly states the function "intentionally ignores … provider selection", so any change to that contract should be deliberate and clearly explained.
| pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[ | ||
| "agent_open", | ||
| "apply_patch", | ||
| "edit_file", | ||
| "exec_shell", | ||
| "fetch_url", | ||
| "file_search", | ||
| "git_diff", | ||
| "git_status", | ||
| "grep_files", | ||
| "list_dir", | ||
| "read_file", | ||
| "run_tests", | ||
| "web_search", | ||
| "write_file", | ||
| ]; | ||
|
|
||
| pub(super) fn should_default_defer_tool( | ||
| name: &str, | ||
| _mode: AppMode, | ||
| always_load: &HashSet<String>, | ||
| ) -> bool { | ||
| if always_load.contains(name) { | ||
| return false; | ||
| } | ||
|
|
||
| // Shell exec tools are kept active in Agent so the model can run | ||
| // verification commands (build/test/git/cargo) without first having to | ||
| // discover them through ToolSearch. Plan mode does not register shell | ||
| // execution tools. | ||
| let always_loaded_in_action_modes = matches!(mode, AppMode::Agent) | ||
| && matches!( | ||
| name, | ||
| "exec_shell" | ||
| | "exec_shell_wait" | ||
| | "exec_shell_interact" | ||
| | "exec_wait" | ||
| | "exec_interact" | ||
| ); | ||
| if always_loaded_in_action_modes { | ||
| if is_tool_search_tool(name) { | ||
| return false; | ||
| } | ||
|
|
||
| !matches!( | ||
| name, | ||
| "read_file" | ||
| | "write_file" | ||
| | "list_dir" | ||
| | "grep_files" | ||
| | "file_search" | ||
| | "diagnostics" | ||
| | "rlm_open" | ||
| | "rlm_eval" | ||
| | "rlm_configure" | ||
| | "rlm_close" | ||
| | "rlm_session_objects" | ||
| | "handle_read" | ||
| | "recall_archive" | ||
| | "notify" | ||
| | MULTI_TOOL_PARALLEL_NAME | ||
| | "update_plan" | ||
| | "checklist_write" | ||
| | "todo_write" | ||
| | "task_create" | ||
| | "task_list" | ||
| | "task_read" | ||
| | "task_gate_run" | ||
| | "task_shell_start" | ||
| | "task_shell_wait" | ||
| | "github_issue_context" | ||
| | "github_pr_context" | ||
| | REQUEST_USER_INPUT_NAME | ||
| ) | ||
| !DEFAULT_ACTIVE_NATIVE_TOOLS | ||
| .iter() | ||
| .any(|core_tool| core_tool == &name) | ||
| } |
There was a problem hiding this comment.
update_plan deferral breaks the force_update_plan first-step optimization
update_plan is not in DEFAULT_ACTIVE_NATIVE_TOOLS, so apply_native_tool_deferral marks it with defer_loading = Some(true). When should_force_update_plan_first returns true (Plan mode + "quick plan" phrases), active_tools_for_step returns only the update_plan tool straight from the catalog — still with defer_loading = Some(true). The model's first call to update_plan hits maybe_hydrate_requested_deferred_tool, which intercepts it (the tool is not yet in active_tools_at_batch_start) and returns a schema-hydration response instead of executing. This uses an extra LLM round-trip before update_plan can actually run, and force_update_plan_this_step is false for all subsequent steps, so the forced-first-step guarantee is gone entirely.
| pub(super) fn should_default_defer_tool( | ||
| name: &str, | ||
| _mode: AppMode, | ||
| always_load: &HashSet<String>, | ||
| ) -> bool { | ||
| if always_load.contains(name) { | ||
| return false; | ||
| } | ||
|
|
||
| // Shell exec tools are kept active in Agent so the model can run | ||
| // verification commands (build/test/git/cargo) without first having to | ||
| // discover them through ToolSearch. Plan mode does not register shell | ||
| // execution tools. | ||
| let always_loaded_in_action_modes = matches!(mode, AppMode::Agent) | ||
| && matches!( | ||
| name, | ||
| "exec_shell" | ||
| | "exec_shell_wait" | ||
| | "exec_shell_interact" | ||
| | "exec_wait" | ||
| | "exec_interact" | ||
| ); | ||
| if always_loaded_in_action_modes { | ||
| if is_tool_search_tool(name) { | ||
| return false; | ||
| } | ||
|
|
||
| !matches!( | ||
| name, | ||
| "read_file" | ||
| | "write_file" | ||
| | "list_dir" | ||
| | "grep_files" | ||
| | "file_search" | ||
| | "diagnostics" | ||
| | "rlm_open" | ||
| | "rlm_eval" | ||
| | "rlm_configure" | ||
| | "rlm_close" | ||
| | "rlm_session_objects" | ||
| | "handle_read" | ||
| | "recall_archive" | ||
| | "notify" | ||
| | MULTI_TOOL_PARALLEL_NAME | ||
| | "update_plan" | ||
| | "checklist_write" | ||
| | "todo_write" | ||
| | "task_create" | ||
| | "task_list" | ||
| | "task_read" | ||
| | "task_gate_run" | ||
| | "task_shell_start" | ||
| | "task_shell_wait" | ||
| | "github_issue_context" | ||
| | "github_pr_context" | ||
| | REQUEST_USER_INPUT_NAME | ||
| ) | ||
| !DEFAULT_ACTIVE_NATIVE_TOOLS | ||
| .iter() | ||
| .any(|core_tool| core_tool == &name) | ||
| } |
There was a problem hiding this comment.
_mode parameter is now completely unused — Yolo mode and Plan mode get identical native-tool deferral
Previously, Yolo mode loaded all native tools and Agent mode kept exec tools always loaded. Now _mode is silently ignored. Meanwhile apply_mcp_tool_deferral still uses mode to give Yolo mode special treatment — MCP tools are not deferred in Yolo mode while non-core native tools are. This asymmetry means a user in Yolo mode gets full access to MCP tools but must still discover git_show, notify, diagnostics, etc. via ToolSearch, contradicting the "unrestricted" semantics of Yolo mode.
| pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[ | ||
| "agent_open", | ||
| "apply_patch", | ||
| "edit_file", | ||
| "exec_shell", | ||
| "fetch_url", | ||
| "file_search", | ||
| "git_diff", | ||
| "git_status", | ||
| "grep_files", | ||
| "list_dir", | ||
| "read_file", | ||
| "run_tests", | ||
| "web_search", | ||
| "write_file", | ||
| ]; |
There was a problem hiding this comment.
Several previously always-loaded workflow tools are silently deferred with no documentation
request_user_input, diagnostics, recall_archive, todo_write, checklist_write, and the task_* family were all in the old always-loaded list and are absent from DEFAULT_ACTIVE_NATIVE_TOOLS. They now require a ToolSearch call before first use. CONFIGURATION.md documents always_load but doesn't mention which tools moved to deferred, so users relying on these tools will silently hit the schema-hydration detour without any config change on their part.
Summary
[tools] always_load = ["..."]config support to opt native tools back into the active catalog.scripts/measure-tool-catalog.pyfor deterministic serialized catalog byte/token estimates.Closes #2076.
Partially addresses #1253.
Partially addresses #1177.
Partially addresses #1747.
Partially addresses #1120.
Measurement
CARGO_TARGET_DIR=/Volumes/VIXinSSD/whalebro/target-codewhale-issue-2076 scripts/measure-tool-catalog.pyTests
CARGO_TARGET_DIR=/Volumes/VIXinSSD/whalebro/target-codewhale-issue-2076 cargo test -p codewhale-tui tool_catalog -- --test-threads=1CARGO_TARGET_DIR=/Volumes/VIXinSSD/whalebro/target-codewhale-issue-2076 cargo test -p codewhale-tui non_yolo_mode_retains_default_defer_policy -- --test-threads=1CARGO_TARGET_DIR=/Volumes/VIXinSSD/whalebro/target-codewhale-issue-2076 cargo test -p codewhale-tui --bin codewhale-tui tools_always_load_parses_and_trims_names -- --test-threads=1CARGO_TARGET_DIR=/Volumes/VIXinSSD/whalebro/target-codewhale-issue-2076 cargo test -p codewhale-config tools -- --test-threads=1Greptile Summary
This PR reduces the model-visible native tool catalog from ~79 tools to ~16 by deferring low-frequency tools behind a ToolSearch gateway, achieving a ~73% token-count reduction. It also adds a
[tools] always_loadconfig option to opt specific tools back into the active catalog and ships a Python measurement script.DEFAULT_ACTIVE_NATIVE_TOOLSlist replaces the previous per-mode, per-name allowlist;apply_native_tool_deferralandensure_advanced_toolingnow accept and respect analways_loadset threaded fromEngineConfig.crates/config/src/lib.rsintroduces a compile error at line 391: a new provider-override guard referenceshas_api_key, a variable that does not exist anywhere in the config crate.update_planis no longer in the active catalog; whenforce_update_plan_firsttriggers on "quick plan" phrases in Plan mode, the forced first step now sends a deferred tool, causing a schema-hydration round-trip beforeupdate_plancan actually execute.Confidence Score: 1/5
Not safe to merge — the config crate will not compile, and Plan mode quick-plan optimization silently degrades to an extra round-trip.
The provider-override block added to merge_project_overrides references has_api_key, a name that does not exist anywhere in the config crate. Every crate that depends on codewhale-config will fail to build. Separately, update_plan was moved out of the active catalog without accounting for the force_update_plan first-step path, so quick-plan requests in Plan mode now pay an extra LLM round-trip before update_plan can execute.
crates/config/src/lib.rs (undefined variable at line 391) and crates/tui/src/core/engine/tool_catalog.rs (DEFAULT_ACTIVE_NATIVE_TOOLS missing update_plan)
Important Files Changed
has_api_keyin the new provider-override guard.update_planand several workflow tools are now deferred, breaking the force_update_plan first-step optimization and removing special Yolo-mode native-tool handling.Flowchart
%%{init: {'theme': 'neutral'}}%% flowchart TD A[Config loaded] --> B[tools_always_load set] B --> C[build_model_tool_catalog] C --> D[apply_native_tool_deferral] C --> E[apply_mcp_tool_deferral] D --> D1{tool in always_load?} D1 -- Yes --> D2[defer_loading = false] D1 -- No --> D3{is ToolSearch?} D3 -- Yes --> D2 D3 -- No --> D4{in DEFAULT_ACTIVE_NATIVE_TOOLS?} D4 -- Yes --> D2 D4 -- No --> D5[defer_loading = true] E --> E1{mode == Yolo?} E1 -- Yes --> E2[defer_loading = false ALL MCP] E1 -- No --> E3{in keep_mcp_loaded list?} E3 -- Yes --> E2 E3 -- No --> E4[defer_loading = true] D2 & D5 --> F[initial_active_tools] E2 & E4 --> F F --> G[active_tool_names HashSet] G --> H[active_tools_for_step] H --> I{force_update_plan?} I -- Yes --> J[return update_plan from catalog] I -- No --> K[active_tool_list_from_catalog] J --> L[Model calls update_plan] L --> M{in active_tools_at_batch_start?} M -- No --> N[Schema hydration - extra round-trip] M -- Yes --> O[Execute update_plan]Reviews (1): Last reviewed commit: "Merge main into codex/issue-2076-defer-n..." | Re-trigger Greptile