fix(llm-idle-timeout): honor models.providers.<id>.timeoutSeconds for cloud providers

yujiawei · yujiawei · commit bad0aa789675 · 2026-05-19T04:08:46.000Z
The schema.help text for `models.providers.*.timeoutSeconds` documents the key as the user-facing knob for "slow local or self-hosted model servers". In practice the option is also the only configurable lever for the LLM idle/first-token watchdog. However `resolveLlmIdleTimeoutMs` was still running the explicit provider timeout through `clampImplicitTimeoutMs`, clamping it back down to the implicit ~120s `DEFAULT_LLM_IDLE_TIMEOUT_MS` ceiling for any non-cron, non-local provider. Consequence (matches #77744 and #78361): - User sets `models.providers.llamacpp.timeoutSeconds: 14400` (or 600 for a slow Gemini/Opus turn with a large tool payload). - Hot reload accepts the value, runtime resolves `modelRequestTimeoutMs = 14_400_000`. - Idle watchdog still trips at ~120s with "LLM idle timeout (120s): no response from model", aborting an otherwise-healthy upstream that is mid-prefill or buffering thinking tokens. Fix: when the caller passes an explicit `modelRequestTimeoutMs` (sourced from `models.providers.<id>.timeoutSeconds` / `model.requestTimeoutMs`), treat it as a deliberate ceiling for cloud providers too. The run-timeout / agent-timeout bounds still apply via `timeoutBounds`, so a shorter explicit run timeout always wins. The implicit default watchdog still kicks in when the user has not set a provider timeout, preserving the network-silence-as-hang guard for default configs. Updated the two corresponding test cases that asserted the old clamp-on-cloud behavior; all 71 tests in `llm-idle-timeout.test.ts` and the wider 430-test `src/agents/pi-embedded-runner/run/` lane pass. Schema help text refreshed to call out that the same knob raises the idle watchdog ceiling. Refs: #77744, #78361
diff --git a/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts b/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts
@@ -43,13 +43,14 @@ describe("resolveLlmIdleTimeoutMs", () => {
     expect(resolveLlmIdleTimeoutMs({ runTimeoutMs: 2_147_000_000 })).toBe(0);
   });
 
-  it("caps remote provider request timeouts at the default idle watchdog", () => {
-    expect(resolveLlmIdleTimeoutMs({ modelRequestTimeoutMs: 300_000 })).toBe(
-      DEFAULT_LLM_IDLE_TIMEOUT_MS,
-    );
+  it("honors an explicit models.providers.<id>.timeoutSeconds for cloud providers (#77744, #78361)", () => {
+    // models.providers.<id>.timeoutSeconds is documented as the user-facing
+    // knob to extend slow model responses. The idle watchdog must respect it
+    // instead of clamping back to DEFAULT_LLM_IDLE_TIMEOUT_MS.
+    expect(resolveLlmIdleTimeoutMs({ modelRequestTimeoutMs: 300_000 })).toBe(300_000);
   });
 
-  it("uses remote provider request timeouts when shorter than the default idle watchdog", () => {
+  it("honors short explicit provider request timeouts", () => {
     expect(resolveLlmIdleTimeoutMs({ modelRequestTimeoutMs: 30_000 })).toBe(30_000);
   });
 
diff --git a/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts b/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts
@@ -154,11 +154,18 @@ export function resolveLlmIdleTimeoutMs(params?: {
     Number.isFinite(modelRequestTimeoutMs) &&
     modelRequestTimeoutMs > 0
   ) {
+    // `modelRequestTimeoutMs` is wired from `models.providers.<id>.timeoutSeconds`,
+    // which is an explicit per-provider opt-in. The schema help describes it as
+    // "Use this for slow local or self-hosted model servers instead of changing
+    // global agent timeouts." so we honor it as a deliberate ceiling rather
+    // than clamping it back down to the implicit `DEFAULT_LLM_IDLE_TIMEOUT_MS`
+    // network-silence-as-hang guard. Without this, users hitting #77744 /
+    // #78361 set provider timeoutSeconds to e.g. 600s, observe the value is
+    // accepted and hot-reloaded, yet the idle watchdog still aborts at 120s.
+    // The agent/run timeoutBounds still apply so an explicit shorter run
+    // timeout always wins.
     const boundedTimeoutMs = Math.min(modelRequestTimeoutMs, ...timeoutBounds);
-    if (params?.trigger === "cron" || isLocalProvider) {
-      return clampTimeoutMs(boundedTimeoutMs);
-    }
-    return clampImplicitTimeoutMs(boundedTimeoutMs);
+    return clampTimeoutMs(boundedTimeoutMs);
   }
 
   if (typeof runTimeoutMs === "number" && Number.isFinite(runTimeoutMs) && runTimeoutMs > 0) {
diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts
@@ -948,7 +948,7 @@ export const FIELD_HELP: Record<string, string> = {
   "models.providers.*.maxTokens":
     "Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.",
   "models.providers.*.timeoutSeconds":
-    "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
+    "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling, and also raises the LLM idle/stream watchdog ceiling for this provider above the implicit ~120s default. Use this for slow local or self-hosted model servers, or for cloud providers that buffer reasoning tokens silently on the wire (Gemini preview, large-tool-payload Claude/Opus), instead of changing global agent timeouts.",
   "models.providers.*.injectNumCtxForOpenAICompat":
     "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.",
   "models.providers.*.params":