Skip to content

Commit d8d8f75

Browse files
committed
fix: harden code mode runtime
1 parent d001d15 commit d8d8f75

8 files changed

Lines changed: 258 additions & 43 deletions

File tree

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
41fe35b1d577bdc2495842effde4158657b7ebfbfe5bd20aed4e507f52a6ac20 config-baseline.json
2-
d2307659e5496b97096b8e5af6b6f8fefeee861f7d6a3b29d250887be5c937cf config-baseline.core.json
1+
932d0df0d277aded125d4843a55f3acc2b90d39a5867d09d76bdf1a59d469e5f config-baseline.json
2+
fe5e2eecee8c354eac3e10b801c27c20a16f9432377a19fcb2849221e62c62bf config-baseline.core.json
33
2aa997d48549bd321a478485126a4bd5065ba47333a80e7eb07a0ef6ad75b0a6 config-baseline.channel.json
44
1ab5b65a94d84f59bae5e6bbe310057fae0a645f2538ab00f1f37b7f8b371e6f config-baseline.plugin.json

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1786,7 +1786,7 @@
17861786
"pdfjs-dist": "5.7.284",
17871787
"playwright-core": "1.60.0",
17881788
"qrcode": "1.5.4",
1789-
"quickjs-wasi": "^2.2.0",
1789+
"quickjs-wasi": "2.2.0",
17901790
"tar": "7.5.15",
17911791
"tokenjuice": "0.7.0",
17921792
"tree-sitter-bash": "0.25.1",

pnpm-lock.yaml

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/agents/code-mode.test.ts

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ async function runUntilCompleted(params: {
9595
describe("Code Mode", () => {
9696
afterEach(() => {
9797
__testing.activeRuns.clear();
98+
__testing.resumingRunIds.clear();
9899
});
99100

100101
it("resolves object config defaults", () => {
@@ -299,6 +300,60 @@ describe("Code Mode", () => {
299300
).rejects.toThrow("different session");
300301
});
301302

303+
it("rejects concurrent waits for the same suspended run", async () => {
304+
const catalogRef = createToolSearchCatalogRef();
305+
const config = {
306+
tools: {
307+
codeMode: {
308+
enabled: true,
309+
timeoutMs: 100,
310+
},
311+
},
312+
} as never;
313+
const ctx = {
314+
config,
315+
runtimeConfig: config,
316+
sessionId: "session-code-mode",
317+
sessionKey: "agent:main:main",
318+
runId: "run-code-mode",
319+
catalogRef,
320+
};
321+
const codeModeTools = createCodeModeTools(ctx);
322+
applyCodeModeCatalog({
323+
tools: [
324+
...codeModeTools,
325+
pluginToolWithExecute(
326+
"fake_slow",
327+
"Slow helper",
328+
async () => await new Promise<never>(() => undefined),
329+
),
330+
],
331+
config,
332+
sessionId: "session-code-mode",
333+
sessionKey: "agent:main:main",
334+
runId: "run-code-mode",
335+
catalogRef,
336+
});
337+
338+
const first = resultDetails(
339+
await codeModeTools[0].execute("code-call-concurrent-wait", {
340+
code: "await tools.fake_slow({}); return 'done';",
341+
}),
342+
);
343+
expect(first.status).toBe("waiting");
344+
345+
const firstWait = codeModeTools[1].execute("code-wait-concurrent-a", {
346+
runId: first.runId,
347+
});
348+
await expect(
349+
codeModeTools[1].execute("code-wait-concurrent-b", { runId: first.runId }),
350+
).rejects.toThrow("already being resumed");
351+
const stillWaiting = resultDetails(await firstWait);
352+
353+
expect(stillWaiting.status).toBe("waiting");
354+
expect(stillWaiting.runId).toBe(first.runId);
355+
});
356+
302357
it("reports only unsettled pending tool calls when wait times out", async () => {
303358
const catalogRef = createToolSearchCatalogRef();
304359
const config = {
@@ -379,6 +434,54 @@ describe("Code Mode", () => {
379434
expect(__testing.getTypescriptRuntimePromise()).toBeNull();
380435
});
381436

437+
it("allows identifiers and strings that contain import without module access", async () => {
438+
const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
439+
applyCodeModeCatalog({
440+
tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
441+
config,
442+
sessionId: "session-code-mode",
443+
sessionKey: "agent:main:main",
444+
runId: "run-code-mode",
445+
catalogRef,
446+
});
447+
448+
const details = await runUntilCompleted({
449+
execTool: codeModeTools[0],
450+
waitTool: codeModeTools[1],
451+
code: `
452+
const important = 41;
453+
const message = "import docs later";
454+
return important + (message.includes("import") ? 1 : 0);
455+
`,
456+
});
457+
458+
expect(details.status).toBe("completed");
459+
expect(details.value).toBe(42);
460+
});
461+
462+
it("fails pending promises that have no host bridge work", async () => {
463+
const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
464+
applyCodeModeCatalog({
465+
tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
466+
config,
467+
sessionId: "session-code-mode",
468+
sessionKey: "agent:main:main",
469+
runId: "run-code-mode",
470+
catalogRef,
471+
});
472+
473+
const beforeRunCount = __testing.activeRuns.size;
474+
const details = resultDetails(
475+
await codeModeTools[0].execute("code-call-empty-wait", {
476+
code: "await new Promise(() => undefined); return 'never';",
477+
}),
478+
);
479+
480+
expect(details.status).toBe("failed");
481+
expect(String(details.error)).toContain("pending without host work");
482+
expect(__testing.activeRuns.size).toBe(beforeRunCount);
483+
});
484+
382485
it("clamps omitted code-mode catalog search limits to maxSearchLimit", async () => {
383486
const catalogRef = createToolSearchCatalogRef();
384487
const config = {
@@ -449,7 +552,12 @@ describe("Code Mode", () => {
449552
expect(details.value).toEqual({ value: 42 });
450553
});
451554

452-
it("rejects module access", async () => {
555+
it.each([
556+
"const fs = require('node:fs'); return fs;",
557+
"return import('node:fs');",
558+
"return import.meta.url;",
559+
"return `${import('node:fs')}`;",
560+
])("rejects module access: %s", async (code) => {
453561
const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness();
454562
applyCodeModeCatalog({
455563
tools: [...codeModeTools, pluginTool("fake_noop", "Noop")],
@@ -462,7 +570,7 @@ describe("Code Mode", () => {
462570

463571
const details = resultDetails(
464572
await codeModeTools[0].execute("code-call-import", {
465-
code: "const fs = require('node:fs'); return fs;",
573+
code,
466574
}),
467575
);
468576

@@ -585,5 +693,6 @@ describe("Code Mode", () => {
585693

586694
await expect(heartbeat).resolves.toBe("main-event-loop-alive");
587695
expect(details.status).toBe("failed");
696+
expect(String(details.error)).toContain("timeout exceeded");
588697
});
589698
});

src/agents/code-mode.ts

Lines changed: 89 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ const DEFAULT_MAX_PENDING_TOOL_CALLS = 16;
4040
const DEFAULT_SNAPSHOT_TTL_SECONDS = 900;
4141
const DEFAULT_SEARCH_LIMIT = 8;
4242
const DEFAULT_MAX_SEARCH_LIMIT = 50;
43+
const MAX_ACTIVE_CODE_MODE_RUNS = 64;
4344

4445
type CodeModeLanguage = "javascript" | "typescript";
4546

@@ -113,6 +114,7 @@ type CodeModeWorkerResult =
113114
};
114115

115116
const activeRuns = new Map<string, CodeModeRunState>();
117+
const resumingRunIds = new Set<string>();
116118
let typescriptRuntimePromise: Promise<typeof import("typescript")> | null = null;
117119

118120
function isRecord(value: unknown): value is Record<string, unknown> {
@@ -223,10 +225,18 @@ function removeExpiredRuns(now = Date.now()): void {
223225
for (const [runId, state] of activeRuns) {
224226
if (state.expiresAt <= now) {
225227
activeRuns.delete(runId);
228+
resumingRunIds.delete(runId);
226229
}
227230
}
228231
}
229232

233+
function enforceActiveRunLimit(): void {
234+
removeExpiredRuns();
235+
if (activeRuns.size >= MAX_ACTIVE_CODE_MODE_RUNS) {
236+
throw new ToolInputError("too many suspended code mode runs.");
237+
}
238+
}
239+
230240
function toJsonSafe(value: unknown): unknown {
231241
if (value === undefined) {
232242
return null;
@@ -298,8 +308,65 @@ function readRunId(args: unknown): string {
298308
return runId.trim();
299309
}
300310

311+
function maskCodeLiteralsAndComments(code: string): string {
312+
let masked = "";
313+
let index = 0;
314+
while (index < code.length) {
315+
const char = code[index];
316+
const next = code[index + 1];
317+
if (char === "/" && next === "/") {
318+
masked += " ";
319+
index += 2;
320+
while (index < code.length && code[index] !== "\n") {
321+
masked += " ";
322+
index += 1;
323+
}
324+
continue;
325+
}
326+
if (char === "/" && next === "*") {
327+
masked += " ";
328+
index += 2;
329+
while (index < code.length) {
330+
if (code[index] === "*" && code[index + 1] === "/") {
331+
masked += " ";
332+
index += 2;
333+
break;
334+
}
335+
masked += code[index] === "\n" ? "\n" : " ";
336+
index += 1;
337+
}
338+
continue;
339+
}
340+
if (char === "'" || char === '"') {
341+
const quote = char;
342+
masked += " ";
343+
index += 1;
344+
while (index < code.length) {
345+
const current = code[index];
346+
masked += current === "\n" ? "\n" : " ";
347+
index += 1;
348+
if (current === "\\") {
349+
if (index < code.length) {
350+
masked += code[index] === "\n" ? "\n" : " ";
351+
index += 1;
352+
}
353+
continue;
354+
}
355+
if (current === quote) {
356+
break;
357+
}
358+
}
359+
continue;
360+
}
361+
masked += char;
362+
index += 1;
363+
}
364+
return masked;
365+
}
366+
301367
function rejectsModuleAccess(code: string): boolean {
302-
return /(^|[^\w$])import\s*(?:\(|[\s{*]|\w)|(^|[^\w$])require\s*\(/u.test(code);
368+
const source = maskCodeLiteralsAndComments(code);
369+
return /\bimport\b\s*(?:\.|\(|["'`{*]|\w)|\brequire\b\s*\(/u.test(source);
303370
}
304371

305372
async function loadTypeScriptRuntime(): Promise<typeof import("typescript")> {
@@ -498,6 +565,7 @@ function snapshotState(params: {
498565
signal?: AbortSignal;
499566
onUpdate?: AgentToolUpdateCallback<unknown>;
500567
}) {
568+
enforceActiveRunLimit();
501569
if (params.snapshotBytes.byteLength > params.config.maxSnapshotBytes) {
502570
throw new ToolInputError("code mode snapshot limit exceeded");
503571
}
@@ -670,21 +738,25 @@ async function runWait(params: {
670738
) {
671739
throw new ToolInputError("code mode run belongs to a different session.");
672740
}
673-
const ready = await waitForPending(state.pending, state.config.timeoutMs);
674-
if (!ready) {
675-
const pending = state.pending.filter((entry) => !entry.settled);
676-
return {
677-
status: "waiting" as const,
678-
runId: state.runId,
679-
reason: codeModeWaitingReason(pending.length > 0 ? pending : state.pending),
680-
pendingToolCalls: pendingToolCalls(pending.length > 0 ? pending : state.pending),
681-
output: state.output,
682-
telemetry: telemetry(state.runtime),
683-
};
741+
if (resumingRunIds.has(state.runId)) {
742+
throw new ToolInputError("code mode run is already being resumed.");
684743
}
685-
686-
activeRuns.delete(state.runId);
744+
resumingRunIds.add(state.runId);
687745
try {
746+
const ready = await waitForPending(state.pending, state.config.timeoutMs);
747+
if (!ready) {
748+
const pending = state.pending.filter((entry) => !entry.settled);
749+
return {
750+
status: "waiting" as const,
751+
runId: state.runId,
752+
reason: codeModeWaitingReason(pending.length > 0 ? pending : state.pending),
753+
pendingToolCalls: pendingToolCalls(pending.length > 0 ? pending : state.pending),
754+
output: state.output,
755+
telemetry: telemetry(state.runtime),
756+
};
757+
}
758+
759+
activeRuns.delete(state.runId);
688760
const settledRequests: SettledBridgeRequest[] = [];
689761
for (const entry of state.pending) {
690762
settledRequests.push(entry.settled ?? (await entry.promise));
@@ -731,6 +803,8 @@ async function runWait(params: {
731803
output: state.output,
732804
telemetry: telemetry(state.runtime),
733805
};
806+
} finally {
807+
resumingRunIds.delete(state.runId);
734808
}
735809
}
736810

@@ -843,6 +917,7 @@ export function addClientToolsToCodeModeCatalog(params: {
843917

844918
export const __testing = {
845919
activeRuns,
920+
resumingRunIds,
846921
codeModeWorkerUrl,
847922
resolveCodeModeWorkerUrl,
848923
resolveCodeModeConfig,

0 commit comments

Comments
 (0)