Skip to content

Commit 5d4a8b0

Browse files
authored
fix(agents): make trajectory cleanup timeout configurable
Refs #75839.\n\nAdds OPENCLAW_TRAJECTORY_FLUSH_TIMEOUT_MS and OPENCLAW_AGENT_CLEANUP_TIMEOUT_MS for agent cleanup steps while preserving the 10s default. Includes focused timeout precedence tests, trajectory docs, and changelog coverage.\n\nVerification:\n- pnpm test src/agents/run-cleanup-timeout.test.ts\n- pnpm exec oxfmt --check --threads=1 src/agents/run-cleanup-timeout.ts src/agents/run-cleanup-timeout.test.ts\n- pnpm format:docs:check docs/tools/trajectory.md\n- git diff --check\n- pnpm check:changed\n- GitHub PR checks: 88 passing, CodeQL neutral, 21 skipped
1 parent 5496c0d commit 5d4a8b0

4 files changed

Lines changed: 154 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
1515
- Maintainer tooling: clarify which pnpm test/check commands are safe locally versus inside Codex worktrees, routing linked-worktree gates through node wrappers and Crabbox/Testbox.
1616
- iOS/chat: resize PhotosPicker image attachments to capped JPEGs before staging and sending, stripping source metadata and keeping oversized camera photos under the chat upload budget. Fixes #68524. Thanks @BunsDev.
1717
- Codex harness: classify native app-server token-refresh logout and relogin failures as authentication refresh errors, so users get re-authentication guidance instead of a raw runtime failure.
18+
- Agents/trajectory: make the trajectory flush cleanup timeout configurable with `OPENCLAW_TRAJECTORY_FLUSH_TIMEOUT_MS`, preserving the 10s default while slower stores drain. Refs #75839. Thanks @BunsDev.
1819
- Codex startup: treat selectable configured OpenAI agent models as Codex runtime requirements during plugin auto-enable, startup planning, and doctor install repair, so Anthropic-primary configs can still switch to OpenAI/Codex cleanly.
1920
- Agents: preserve source-reply delivery metadata when merging tool-returned media into the final reply, keeping message-tool-only replies deliverable and mirrored. Thanks @pashpashpash and @vincentkoc.
2021
- macOS/companion: require system TLS trust before pinning a first-use direct `wss://` gateway certificate and honor `gateway.remote.tlsFingerprint` as the explicit pin for remote node-mode sessions, so fresh endpoints fail closed when macOS cannot trust the certificate unless configured out of band. Fixes #50642. Thanks @BunsDev.

docs/tools/trajectory.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,20 @@ This disables runtime trajectory capture. `/export-trajectory` can still export
168168
the transcript branch, but runtime-only files such as compiled context,
169169
provider artifacts, and prompt metadata may be missing.
170170

171+
## Tune flush timeout
172+
173+
OpenClaw flushes runtime trajectory sidecars during agent cleanup. The default
174+
cleanup timeout is 10,000 ms. On slow disks or large stores, set
175+
`OPENCLAW_TRAJECTORY_FLUSH_TIMEOUT_MS` before starting OpenClaw:
176+
177+
```bash
178+
export OPENCLAW_TRAJECTORY_FLUSH_TIMEOUT_MS=30000
179+
```
180+
181+
This controls when OpenClaw logs a `pi-trajectory-flush` timeout and continues.
182+
It does not change the trajectory size caps. To tune all agent cleanup steps
183+
that do not pass an explicit timeout, set `OPENCLAW_AGENT_CLEANUP_TIMEOUT_MS`.
184+
171185
## Privacy and limits
172186

173187
Trajectory bundles are designed for support and debugging, not public posting.

src/agents/run-cleanup-timeout.test.ts

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2-
import { AGENT_CLEANUP_STEP_TIMEOUT_MS, runAgentCleanupStep } from "./run-cleanup-timeout.js";
2+
import {
3+
AGENT_CLEANUP_STEP_TIMEOUT_MS,
4+
resolveAgentCleanupStepTimeoutMs,
5+
runAgentCleanupStep,
6+
} from "./run-cleanup-timeout.js";
37

48
describe("agent cleanup timeout", () => {
59
const log = {
@@ -35,6 +39,91 @@ describe("agent cleanup timeout", () => {
3539
);
3640
});
3741

42+
it("uses the trajectory flush timeout environment override for trajectory cleanup", async () => {
43+
const cleanup = vi.fn(async () => new Promise<never>(() => {}));
44+
45+
const result = runAgentCleanupStep({
46+
runId: "run-trajectory",
47+
sessionId: "session-trajectory",
48+
step: "pi-trajectory-flush",
49+
cleanup,
50+
log,
51+
env: {
52+
OPENCLAW_TRAJECTORY_FLUSH_TIMEOUT_MS: "25000",
53+
},
54+
});
55+
56+
await vi.advanceTimersByTimeAsync(24_999);
57+
expect(log.warn).not.toHaveBeenCalled();
58+
59+
await vi.advanceTimersByTimeAsync(1);
60+
await expect(result).resolves.toBeUndefined();
61+
62+
expect(cleanup).toHaveBeenCalledTimes(1);
63+
expect(log.warn).toHaveBeenCalledWith(
64+
"agent cleanup timed out: runId=run-trajectory sessionId=session-trajectory step=pi-trajectory-flush timeoutMs=25000",
65+
);
66+
});
67+
68+
it("uses the general cleanup timeout environment override for other cleanup steps", async () => {
69+
const cleanup = vi.fn(async () => new Promise<never>(() => {}));
70+
71+
const result = runAgentCleanupStep({
72+
runId: "run-general",
73+
sessionId: "session-general",
74+
step: "bundle-mcp-retire",
75+
cleanup,
76+
log,
77+
env: {
78+
OPENCLAW_AGENT_CLEANUP_TIMEOUT_MS: "1500",
79+
},
80+
});
81+
82+
await vi.advanceTimersByTimeAsync(1_500);
83+
await expect(result).resolves.toBeUndefined();
84+
85+
expect(log.warn).toHaveBeenCalledWith(
86+
"agent cleanup timed out: runId=run-general sessionId=session-general step=bundle-mcp-retire timeoutMs=1500",
87+
);
88+
});
89+
90+
it("prefers explicit cleanup timeout values over environment overrides", () => {
91+
expect(
92+
resolveAgentCleanupStepTimeoutMs({
93+
step: "pi-trajectory-flush",
94+
timeoutMs: 2_000,
95+
env: {
96+
OPENCLAW_TRAJECTORY_FLUSH_TIMEOUT_MS: "25000",
97+
OPENCLAW_AGENT_CLEANUP_TIMEOUT_MS: "15000",
98+
},
99+
}),
100+
).toBe(2_000);
101+
});
102+
103+
it("keeps explicit zero cleanup timeouts as a one millisecond timeout", () => {
104+
expect(
105+
resolveAgentCleanupStepTimeoutMs({
106+
step: "pi-trajectory-flush",
107+
timeoutMs: 0,
108+
env: {
109+
OPENCLAW_TRAJECTORY_FLUSH_TIMEOUT_MS: "25000",
110+
},
111+
}),
112+
).toBe(1);
113+
});
114+
115+
it("ignores invalid cleanup timeout environment values", () => {
116+
expect(
117+
resolveAgentCleanupStepTimeoutMs({
118+
step: "pi-trajectory-flush",
119+
env: {
120+
OPENCLAW_TRAJECTORY_FLUSH_TIMEOUT_MS: "0",
121+
OPENCLAW_AGENT_CLEANUP_TIMEOUT_MS: "not-a-number",
122+
},
123+
}),
124+
).toBe(AGENT_CLEANUP_STEP_TIMEOUT_MS);
125+
});
126+
38127
it("logs cleanup rejection without throwing", async () => {
39128
await expect(
40129
runAgentCleanupStep({

src/agents/run-cleanup-timeout.ts

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,68 @@
11
import { formatErrorMessage } from "../infra/errors.js";
22

33
export const AGENT_CLEANUP_STEP_TIMEOUT_MS = 10_000;
4+
export const AGENT_CLEANUP_STEP_TIMEOUT_ENV = "OPENCLAW_AGENT_CLEANUP_TIMEOUT_MS";
5+
export const TRAJECTORY_FLUSH_TIMEOUT_ENV = "OPENCLAW_TRAJECTORY_FLUSH_TIMEOUT_MS";
46

57
type AgentCleanupLogger = {
68
warn: (message: string) => void;
79
};
810

11+
function normalizeExplicitTimeoutMs(value: unknown): number | undefined {
12+
if (typeof value !== "number" || !Number.isFinite(value)) {
13+
return undefined;
14+
}
15+
return Math.max(1, Math.floor(value));
16+
}
17+
18+
function parseTimeoutEnvValue(value: string | undefined): number | undefined {
19+
const trimmed = value?.trim();
20+
if (!trimmed) {
21+
return undefined;
22+
}
23+
const timeoutMs = Number(trimmed);
24+
if (!Number.isFinite(timeoutMs)) {
25+
return undefined;
26+
}
27+
const normalized = Math.floor(timeoutMs);
28+
return normalized > 0 ? normalized : undefined;
29+
}
30+
31+
export function resolveAgentCleanupStepTimeoutMs(params: {
32+
step: string;
33+
timeoutMs?: number;
34+
env?: NodeJS.ProcessEnv;
35+
}): number {
36+
const explicitTimeoutMs = normalizeExplicitTimeoutMs(params.timeoutMs);
37+
if (explicitTimeoutMs !== undefined) {
38+
return explicitTimeoutMs;
39+
}
40+
41+
const env = params.env ?? process.env;
42+
if (params.step === "pi-trajectory-flush") {
43+
const trajectoryTimeoutMs = parseTimeoutEnvValue(env[TRAJECTORY_FLUSH_TIMEOUT_ENV]);
44+
if (trajectoryTimeoutMs !== undefined) {
45+
return trajectoryTimeoutMs;
46+
}
47+
}
48+
49+
return parseTimeoutEnvValue(env[AGENT_CLEANUP_STEP_TIMEOUT_ENV]) ?? AGENT_CLEANUP_STEP_TIMEOUT_MS;
50+
}
51+
952
export async function runAgentCleanupStep(params: {
1053
runId: string;
1154
sessionId: string;
1255
step: string;
1356
cleanup: () => Promise<void>;
1457
log: AgentCleanupLogger;
58+
env?: NodeJS.ProcessEnv;
1559
timeoutMs?: number;
1660
}): Promise<void> {
17-
const timeoutMs = Math.max(1, Math.floor(params.timeoutMs ?? AGENT_CLEANUP_STEP_TIMEOUT_MS));
61+
const timeoutMs = resolveAgentCleanupStepTimeoutMs({
62+
step: params.step,
63+
timeoutMs: params.timeoutMs,
64+
env: params.env,
65+
});
1866
let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
1967
let timedOut = false;
2068
const cleanupPromise = Promise.resolve().then(params.cleanup);

0 commit comments

Comments
 (0)