Skip to content

Commit 21410d1

Browse files
authored
fix(codex): guard sandbox http requests (#91752)
* fix(codex): guard sandbox http requests * fix(codex): align sandbox http policy
1 parent a4e02cd commit 21410d1

2 files changed

Lines changed: 264 additions & 7 deletions

File tree

extensions/codex/src/app-server/sandbox-exec-server.http.test.ts

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Codex tests cover sandbox exec server.http plugin behavior.
2+
import { spawn } from "node:child_process";
23
import { afterEach, describe, expect, it, vi } from "vitest";
34
import {
45
closeCodexSandboxExecServersForTests,
@@ -13,7 +14,10 @@ import {
1314
rpc,
1415
waitForHttpBodyDeltas,
1516
} from "./sandbox-exec-server.test-helpers.js";
16-
import { SANDBOX_HTTP_STREAM_LINE_MAX_CHARS } from "./sandbox-exec-server/http.js";
17+
import {
18+
SANDBOX_HTTP_REQUEST_SCRIPT,
19+
SANDBOX_HTTP_STREAM_LINE_MAX_CHARS,
20+
} from "./sandbox-exec-server/http.js";
1721

1822
afterEach(async () => {
1923
vi.unstubAllEnvs();
@@ -26,6 +30,32 @@ function testExecEnv(): NodeJS.ProcessEnv {
2630
};
2731
}
2832

33+
function runSandboxHttpRequestScript(input: unknown): Promise<{
34+
code: number | null;
35+
stderr: string;
36+
stdout: string;
37+
}> {
38+
return new Promise((resolve, reject) => {
39+
const child = spawn("bash", ["-lc", SANDBOX_HTTP_REQUEST_SCRIPT], {
40+
env: testExecEnv(),
41+
stdio: ["pipe", "pipe", "pipe"],
42+
});
43+
let stdout = "";
44+
let stderr = "";
45+
child.stdout.on("data", (chunk: Buffer) => {
46+
stdout += chunk.toString("utf8");
47+
});
48+
child.stderr.on("data", (chunk: Buffer) => {
49+
stderr += chunk.toString("utf8");
50+
});
51+
child.once("error", reject);
52+
child.once("close", (code) => {
53+
resolve({ code, stderr, stdout });
54+
});
55+
child.stdin.end(JSON.stringify(input));
56+
});
57+
}
58+
2959
describe("OpenClaw Codex sandbox exec-server HTTP", () => {
3060
it("routes HTTP requests through the sandbox backend", async () => {
3161
const runShellCommand = vi.fn(async () => ({
@@ -71,6 +101,85 @@ describe("OpenClaw Codex sandbox exec-server HTTP", () => {
71101
socket.close();
72102
});
73103

104+
it("blocks private HTTP targets before starting the sandbox backend", async () => {
105+
const runShellCommand = vi.fn(async () => ({
106+
stdout: Buffer.alloc(0),
107+
stderr: Buffer.alloc(0),
108+
code: 0,
109+
}));
110+
const sandbox = createSandboxContext({ runShellCommand });
111+
const client = createClient();
112+
await ensureCodexSandboxExecServerEnvironment({
113+
client: client as never,
114+
sandbox,
115+
});
116+
const socket = await openSocket(execServerUrlFromClient(client));
117+
await rpc(socket, "initialize", { clientName: "test" });
118+
socket.send(JSON.stringify({ method: "initialized" }));
119+
120+
await expect(
121+
rpc(socket, "http/request", {
122+
requestId: "http-private",
123+
method: "GET",
124+
url: "http://127.0.0.1:6379/",
125+
}),
126+
).rejects.toThrow("Blocked hostname or private/internal IP");
127+
expect(runShellCommand).not.toHaveBeenCalled();
128+
socket.close();
129+
});
130+
131+
it("blocks metadata HTTP targets before starting the streaming sandbox backend", async () => {
132+
const buildExecSpec = vi.fn(async () => ({
133+
argv: [process.execPath, "-e", ""],
134+
env: testExecEnv(),
135+
stdinMode: "pipe-closed" as const,
136+
}));
137+
const sandbox = createSandboxContext({ buildExecSpec });
138+
const client = createClient();
139+
await ensureCodexSandboxExecServerEnvironment({
140+
client: client as never,
141+
sandbox,
142+
});
143+
const socket = await openSocket(execServerUrlFromClient(client));
144+
await rpc(socket, "initialize", { clientName: "test" });
145+
socket.send(JSON.stringify({ method: "initialized" }));
146+
147+
await expect(
148+
rpc(socket, "http/request", {
149+
requestId: "http-metadata",
150+
method: "GET",
151+
url: "http://metadata.google.internal/",
152+
streamResponse: true,
153+
}),
154+
).rejects.toThrow("Blocked hostname or private/internal IP");
155+
expect(buildExecSpec).not.toHaveBeenCalled();
156+
socket.close();
157+
});
158+
159+
it("blocks protected IP classes inside the sandbox Python helper", async () => {
160+
const blockedUrls = [
161+
"http://100.100.100.200/",
162+
"http://[fd00:ec2::254]/",
163+
"http://[fec0::1]/",
164+
"http://[64:ff9b::100.100.100.200]/",
165+
"http://[64:ff9b:1::6464:64c8]/",
166+
"http://[2002:6464:64c8::]/",
167+
"http://[2001::9b9b:9b37]/",
168+
"http://[2001:4860:1::5efe:6464:64c8]/",
169+
];
170+
171+
for (const url of blockedUrls) {
172+
const result = await runSandboxHttpRequestScript({
173+
method: "GET",
174+
url,
175+
timeoutMs: 1,
176+
});
177+
expect(result.code, url).not.toBe(0);
178+
expect(result.stdout, url).toBe("");
179+
expect(result.stderr, url).toContain("Blocked");
180+
}
181+
});
182+
74183
it("streams HTTP response body deltas from the sandbox backend", async () => {
75184
const headerLine = JSON.stringify({
76185
type: "headers",

extensions/codex/src/app-server/sandbox-exec-server/http.ts

Lines changed: 154 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process";
66
import { embeddedAgentLog } from "openclaw/plugin-sdk/agent-harness-runtime";
77
import type { SandboxContext } from "openclaw/plugin-sdk/sandbox";
8+
import { SsrFBlockedError, isBlockedHostnameOrIp } from "openclaw/plugin-sdk/ssrf-runtime";
89
import type { WebSocket } from "ws";
910
import type { JsonObject, JsonValue } from "../protocol.js";
1011
import { readHttpHeaders, requireNumber, requireObject, requireString } from "./json-rpc.js";
@@ -22,9 +23,11 @@ export async function httpRequest(
2223
): Promise<JsonObject> {
2324
const record = requireObject(params, "http/request params");
2425
const requestId = requireString(record.requestId, "requestId");
26+
const url = requireString(record.url, "url");
27+
assertSandboxHttpRequestTargetAllowed(url);
2528
const request = {
2629
method: requireString(record.method, "method"),
27-
url: requireString(record.url, "url"),
30+
url,
2831
headers: readHttpHeaders(record.headers),
2932
bodyBase64: typeof record.bodyBase64 === "string" ? record.bodyBase64 : undefined,
3033
timeoutMs:
@@ -52,6 +55,25 @@ type SandboxHttpRequest = {
5255
streamResponse: boolean;
5356
};
5457

58+
function assertSandboxHttpRequestTargetAllowed(url: string): void {
59+
let parsed: URL;
60+
try {
61+
parsed = new URL(url);
62+
} catch {
63+
throw new SsrFBlockedError("Invalid URL supplied to sandbox http/request");
64+
}
65+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
66+
throw new SsrFBlockedError(
67+
`Blocked non-HTTP(S) protocol in sandbox http/request: ${parsed.protocol}`,
68+
);
69+
}
70+
if (isBlockedHostnameOrIp(parsed.hostname)) {
71+
throw new SsrFBlockedError(
72+
`Blocked hostname or private/internal IP in sandbox http/request: ${parsed.hostname}`,
73+
);
74+
}
75+
}
76+
5577
async function runSandboxHttpRequest(
5678
execServer: OpenClawExecServer,
5779
params: SandboxHttpRequest,
@@ -230,12 +252,14 @@ function readStreamingSandboxHttpResponse(params: {
230252
});
231253
}
232254

233-
const SANDBOX_HTTP_REQUEST_SCRIPT = String.raw`
255+
export const SANDBOX_HTTP_REQUEST_SCRIPT = String.raw`
234256
tmp=$(mktemp "$TMPDIR/openclaw-http.XXXXXX.py" 2>/dev/null || mktemp "/tmp/openclaw-http.XXXXXX.py") || exit 1
235257
trap 'rm -f "$tmp"' EXIT
236258
cat > "$tmp" <<'PY'
237259
import base64
238260
import json
261+
import ipaddress
262+
import socket
239263
import sys
240264
import urllib.error
241265
import urllib.parse
@@ -247,6 +271,127 @@ def emit(payload):
247271
def response_headers(response):
248272
return [{"name": name, "value": value} for name, value in response.headers.items()]
249273
274+
BLOCKED_HOSTNAMES = {
275+
"localhost",
276+
"localhost.localdomain",
277+
"metadata.google.internal",
278+
}
279+
CLOUD_METADATA_IP_ADDRESSES = {
280+
"100.100.100.200",
281+
"fd00:ec2::254",
282+
}
283+
BLOCKED_IPV4_NETWORKS = tuple(
284+
ipaddress.ip_network(network)
285+
for network in (
286+
"100.64.0.0/10",
287+
"198.18.0.0/15",
288+
)
289+
)
290+
BLOCKED_IPV6_NETWORKS = tuple(
291+
ipaddress.ip_network(network)
292+
for network in (
293+
"100::/64",
294+
"2001:2::/48",
295+
"2001:20::/28",
296+
"2001:db8::/32",
297+
"fec0::/10",
298+
)
299+
)
300+
PINNED_ADDRESSES = {}
301+
302+
def normalize_hostname(hostname):
303+
return (hostname or "").strip("[]").rstrip(".").lower()
304+
305+
def is_blocked_hostname(hostname):
306+
normalized = normalize_hostname(hostname)
307+
return (
308+
normalized in BLOCKED_HOSTNAMES
309+
or normalized.endswith(".localhost")
310+
or normalized.endswith(".local")
311+
or normalized.endswith(".internal")
312+
)
313+
314+
def is_blocked_ip(address):
315+
try:
316+
parsed = ipaddress.ip_address(address)
317+
except ValueError:
318+
return False
319+
embedded_ipv4 = extract_embedded_ipv4(parsed)
320+
if embedded_ipv4 is not None and is_blocked_ip(str(embedded_ipv4)):
321+
return True
322+
if str(parsed).lower() in CLOUD_METADATA_IP_ADDRESSES:
323+
return True
324+
if isinstance(parsed, ipaddress.IPv4Address):
325+
if any(parsed in network for network in BLOCKED_IPV4_NETWORKS):
326+
return True
327+
else:
328+
if any(parsed in network for network in BLOCKED_IPV6_NETWORKS):
329+
return True
330+
return (
331+
parsed.is_loopback
332+
or parsed.is_private
333+
or parsed.is_link_local
334+
or parsed.is_multicast
335+
or parsed.is_reserved
336+
or parsed.is_unspecified
337+
)
338+
339+
def ipv4_from_int(value):
340+
return ipaddress.IPv4Address(value & 0xffffffff)
341+
342+
def extract_embedded_ipv4(address):
343+
if not isinstance(address, ipaddress.IPv6Address):
344+
return None
345+
if address.ipv4_mapped is not None:
346+
return address.ipv4_mapped
347+
value = int(address)
348+
hextets = [(value >> shift) & 0xffff for shift in range(112, -1, -16)]
349+
if hextets[:6] == [0, 0, 0, 0, 0, 0]:
350+
return ipv4_from_int(value)
351+
if hextets[:6] == [0x64, 0xff9b, 0, 0, 0, 0]:
352+
return ipv4_from_int(value)
353+
if hextets[:6] == [0x64, 0xff9b, 1, 0, 0, 0]:
354+
return ipv4_from_int(value)
355+
if hextets[0] == 0x2002:
356+
return ipv4_from_int((hextets[1] << 16) | hextets[2])
357+
if hextets[0] == 0x2001 and hextets[1] == 0:
358+
return ipv4_from_int(((hextets[6] << 16) | hextets[7]) ^ 0xffffffff)
359+
if (hextets[4] & 0xfcff) == 0 and hextets[5] == 0x5efe:
360+
return ipv4_from_int((hextets[6] << 16) | hextets[7])
361+
return None
362+
363+
def assert_url_allowed(url):
364+
parsed = urllib.parse.urlparse(url)
365+
if parsed.scheme not in ("http", "https"):
366+
raise ValueError("http/request only supports http and https URLs")
367+
hostname = normalize_hostname(parsed.hostname)
368+
if not hostname or is_blocked_hostname(hostname) or is_blocked_ip(hostname):
369+
raise ValueError("Blocked hostname or private/internal/special-use IP address")
370+
try:
371+
results = socket.getaddrinfo(hostname, parsed.port, proto=socket.IPPROTO_TCP)
372+
except socket.gaierror as error:
373+
raise ValueError(f"Unable to resolve hostname: {hostname}") from error
374+
addresses = {entry[4][0] for entry in results if entry[4]}
375+
if not addresses or any(is_blocked_ip(address) for address in addresses):
376+
raise ValueError("Blocked: resolves to private/internal/special-use IP address")
377+
PINNED_ADDRESSES[hostname] = sorted(addresses)
378+
379+
class GuardedRedirectHandler(urllib.request.HTTPRedirectHandler):
380+
def redirect_request(self, req, fp, code, msg, headers, newurl):
381+
assert_url_allowed(newurl)
382+
return super().redirect_request(req, fp, code, msg, headers, newurl)
383+
384+
def pinned_getaddrinfo(original_getaddrinfo):
385+
def getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
386+
pinned = PINNED_ADDRESSES.get(normalize_hostname(host))
387+
if not pinned:
388+
return original_getaddrinfo(host, port, family, type, proto, flags)
389+
results = []
390+
for address in pinned:
391+
results.extend(original_getaddrinfo(address, port, family, type, proto, flags))
392+
return results
393+
return getaddrinfo
394+
250395
def handle_response(input_data, response):
251396
headers = response_headers(response)
252397
status = int(getattr(response, "status", getattr(response, "code", 0)))
@@ -276,9 +421,7 @@ def handle_response(input_data, response):
276421
def main():
277422
input_data = json.load(sys.stdin)
278423
url = str(input_data.get("url", ""))
279-
parsed = urllib.parse.urlparse(url)
280-
if parsed.scheme not in ("http", "https"):
281-
raise ValueError("http/request only supports http and https URLs")
424+
assert_url_allowed(url)
282425
body_base64 = input_data.get("bodyBase64")
283426
data = base64.b64decode(body_base64) if isinstance(body_base64, str) else None
284427
request = urllib.request.Request(
@@ -292,11 +435,16 @@ def main():
292435
timeout = None
293436
if isinstance(timeout_ms, (int, float)) and timeout_ms > 0:
294437
timeout = timeout_ms / 1000
438+
opener = urllib.request.build_opener(urllib.request.ProxyHandler({}), GuardedRedirectHandler)
439+
original_getaddrinfo = socket.getaddrinfo
440+
socket.getaddrinfo = pinned_getaddrinfo(original_getaddrinfo)
295441
try:
296-
with urllib.request.urlopen(request, timeout=timeout) as response:
442+
with opener.open(request, timeout=timeout) as response:
297443
handle_response(input_data, response)
298444
except urllib.error.HTTPError as response:
299445
handle_response(input_data, response)
446+
finally:
447+
socket.getaddrinfo = original_getaddrinfo
300448
301449
if __name__ == "__main__":
302450
main()

0 commit comments

Comments
 (0)