Skip to content

Commit b63daa6

Browse files
committed
fix(file-transfer): CI failures + second-round PR review feedback
CI failures on previous push: - Declare runtime deps (minimatch, typebox) in package.json — failed the extension-runtime-dependencies contract test that scans imports. - Switch policy.ts and policy.test.ts off the broad openclaw/plugin-sdk/config-runtime barrel and onto the narrow openclaw/plugin-sdk/config-mutation + runtime-config-snapshot subpaths. This satisfies the deprecated-internal-config-api architecture guard. Second-round Aisle findings: - policy: traversal-segment check now treats backslash and forward slash as equivalent, so a Windows node can't be hit with mixed-separator "C:\\allowed\\..\\Windows\\system.ini" (Aisle HIGH CWE-22). - dir-fetch tool: replace the single fragile `tar -tvzf` parser pass (which broke for filenames containing whitespace) with two robust passes: `tar -tzf` for paths only (one per line, no parsing of fixed columns) and `tar -tzvf` for type chars only (FIRST CHAR of each line, never the path column). Also reject backslash-containing entry names. Drops the in-process uncompressed-size cap because reliably parsing sizes from tar output is fragile and Aisle flagged it as a bypass primitive — entry-count cap stays (Aisle HIGH CWE-22, MED). Tests still 84/84 passing.
1 parent 10e622e commit b63daa6

5 files changed

Lines changed: 172 additions & 84 deletions

File tree

extensions/file-transfer/package.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
"version": "2026.4.27",
44
"description": "OpenClaw file transfer plugin (file_fetch, dir_list, dir_fetch, file_write, file_watch)",
55
"type": "module",
6+
"dependencies": {
7+
"minimatch": "10.2.4",
8+
"typebox": "1.1.33"
9+
},
610
"devDependencies": {
711
"@openclaw/plugin-sdk": "workspace:*"
812
},

extensions/file-transfer/src/shared/policy.test.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
99
const getRuntimeConfigMock = vi.fn();
1010
const mutateConfigFileMock = vi.fn();
1111

12-
vi.mock("openclaw/plugin-sdk/config-runtime", () => ({
12+
vi.mock("openclaw/plugin-sdk/runtime-config-snapshot", () => ({
1313
getRuntimeConfig: () => getRuntimeConfigMock(),
14+
}));
15+
vi.mock("openclaw/plugin-sdk/config-mutation", () => ({
1416
mutateConfigFile: (input: unknown) => mutateConfigFileMock(input),
1517
}));
1618

extensions/file-transfer/src/shared/policy.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@
3434
import os from "node:os";
3535
import path from "node:path";
3636
import { minimatch } from "minimatch";
37-
import { mutateConfigFile } from "openclaw/plugin-sdk/config-runtime";
38-
import { getRuntimeConfig } from "openclaw/plugin-sdk/config-runtime";
37+
import { mutateConfigFile } from "openclaw/plugin-sdk/config-mutation";
38+
import { getRuntimeConfig } from "openclaw/plugin-sdk/runtime-config-snapshot";
3939

4040
export type FilePolicyKind = "read" | "write";
4141
export type FilePolicyAskMode = "off" | "on-miss" | "always";
@@ -152,10 +152,13 @@ function normalizeAskMode(value: unknown): FilePolicyAskMode {
152152
* pre-realpath, so the node fetches the bytes before the post-flight
153153
* canonical-path check denies — too late, the bytes already crossed the
154154
* node→gateway boundary.
155+
*
156+
* Treats backslash and forward slash as equivalent separators so a Windows
157+
* node can't be hit with "C:\\allowed\\..\\Windows\\system.ini".
155158
*/
156159
function containsParentRefSegment(p: string): boolean {
157-
const segments = p.split("/");
158-
return segments.includes("..");
160+
const unified = p.replace(/\\/gu, "/");
161+
return unified.split("/").includes("..");
159162
}
160163

161164
export function evaluateFilePolicy(input: {

extensions/file-transfer/src/tools/dir-fetch-tool.ts

Lines changed: 151 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,10 @@ const MEDIA_URL_CAP = 25;
3636
// Hard timeout for the gateway-side `tar -xzf` unpack process.
3737
const TAR_UNPACK_TIMEOUT_MS = 60_000;
3838

39-
// Defense-in-depth caps for the *uncompressed* extraction. The compressed
40-
// tar is already capped at DIR_FETCH_HARD_MAX_BYTES upstream, but a
41-
// gzip-bomb can decompress to many GB. These caps bound the unpacked
42-
// size and entry count so a malicious node can't exhaust the gateway's
43-
// disk or CPU.
44-
const TAR_UNPACK_MAX_UNCOMPRESSED_BYTES = 64 * 1024 * 1024;
39+
// Cap on number of entries pre-validated. The compressed tar is already
40+
// capped at DIR_FETCH_HARD_MAX_BYTES upstream, and we walk the unpacked
41+
// tree to compute hashes — TAR_UNPACK_MAX_ENTRIES bounds how much work
42+
// that walk can do.
4543
const TAR_UNPACK_MAX_ENTRIES = 5000;
4644

4745
const DirFetchToolSchema = Type.Object({
@@ -90,26 +88,28 @@ async function computeFileSha256(filePath: string): Promise<string> {
9088
}
9189

9290
/**
93-
* Run `tar -tzvf -` against the buffer to enumerate entries with their
94-
* type letter (regular file / symlink / hardlink / dir) and size BEFORE
95-
* we extract anything. Rejects:
96-
* - any entry whose path is absolute (escapes destDir on -P-using tar)
97-
* - any entry with ".." segments after normalization
98-
* - any entry that is a symlink ("l"), hardlink ("h"), or unknown type
99-
* - cumulative uncompressed size > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES
100-
* - entry count > TAR_UNPACK_MAX_ENTRIES
91+
* Run two passes against the buffer to enumerate entries BEFORE we extract:
10192
*
102-
* BSD tar's -tvzf produces a `ls -l`-style line; we parse the leading
103-
* type char and the size column.
93+
* 1. `tar -tf -` produces names ONLY, one per line. This is whitespace-safe
94+
* because each line is exactly one path; no parsing of fixed columns.
95+
* Used to validate paths (reject absolute, '..' traversal).
96+
* 2. `tar -tvf -` adds type info via the `ls -l`-style perm prefix.
97+
* Used ONLY to detect symlinks / hardlinks / non-regular entries via
98+
* the FIRST CHARACTER of each line, never the path column.
99+
*
100+
* Size limits are enforced at the *extraction* step instead — the tar
101+
* unpack process is bounded by the maxBytes we already pass through, and
102+
* the post-extract walkDir is hard-capped by TAR_UNPACK_MAX_ENTRIES.
103+
* Trying to parse uncompressed sizes from `tar -tvf` output is fragile
104+
* (filenames with whitespace shift the columns) and Aisle flagged that
105+
* shape as a bypass primitive — drop it.
104106
*/
105-
async function preValidateTarball(
107+
async function listTarPaths(
106108
tarBuffer: Buffer,
107-
): Promise<{ ok: true } | { ok: false; reason: string }> {
109+
): Promise<{ ok: true; paths: string[] } | { ok: false; reason: string }> {
108110
return new Promise((resolve) => {
109111
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
110-
const child = spawn(tarBin, ["-tzvf", "-"], {
111-
stdio: ["pipe", "pipe", "pipe"],
112-
});
112+
const child = spawn(tarBin, ["-tzf", "-"], { stdio: ["pipe", "pipe", "pipe"] });
113113
let stdout = "";
114114
let stderr = "";
115115
let aborted = false;
@@ -120,10 +120,20 @@ async function preValidateTarball(
120120
} catch {
121121
/* gone */
122122
}
123-
resolve({ ok: false, reason: "tar -tvzf timed out" });
123+
resolve({ ok: false, reason: "tar -tzf timed out" });
124124
}, 30_000);
125125
child.stdout.on("data", (c: Buffer) => {
126126
stdout += c.toString();
127+
if (stdout.length > 32 * 1024 * 1024) {
128+
aborted = true;
129+
try {
130+
child.kill("SIGKILL");
131+
} catch {
132+
/* gone */
133+
}
134+
clearTimeout(watchdog);
135+
resolve({ ok: false, reason: "tar -tzf output too large" });
136+
}
127137
});
128138
child.stderr.on("data", (c: Buffer) => {
129139
stderr += c.toString();
@@ -134,80 +144,142 @@ async function preValidateTarball(
134144
return;
135145
}
136146
if (code !== 0) {
137-
resolve({ ok: false, reason: `tar -tvzf exited ${code}: ${stderr.slice(0, 200)}` });
147+
resolve({ ok: false, reason: `tar -tzf exited ${code}: ${stderr.slice(0, 200)}` });
138148
return;
139149
}
140-
const lines = stdout.split("\n").filter((l) => l.trim().length > 0);
141-
if (lines.length > TAR_UNPACK_MAX_ENTRIES) {
142-
resolve({
143-
ok: false,
144-
reason: `archive contains ${lines.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`,
145-
});
146-
return;
150+
// tar -tf emits one path per line with literal newlines as record
151+
// separators. Filenames containing newlines are exotic enough that
152+
// refusing them is safer than trying to parse around them.
153+
const paths = stdout.split("\n").filter((l) => l.length > 0);
154+
resolve({ ok: true, paths });
155+
});
156+
child.on("error", (e) => {
157+
clearTimeout(watchdog);
158+
if (!aborted) {
159+
resolve({ ok: false, reason: `tar -tzf error: ${String(e)}` });
147160
}
148-
let totalBytes = 0;
149-
for (const line of lines) {
150-
// BSD tar -tvzf format:
151-
// "drwxr-xr-x 0 user staff 0 Mar 14 ... ./"
152-
// "-rw-r--r-- 0 user staff 12 Mar 14 ... ./hello.txt"
153-
// "lrwxr-xr-x 0 user staff 0 Mar 14 ... link -> target"
154-
// First char of the perm field is the type.
155-
const typeChar = line.charAt(0);
156-
if (typeChar === "l" || typeChar === "h") {
157-
resolve({ ok: false, reason: `archive contains link entry: ${line.slice(0, 120)}` });
158-
return;
159-
}
160-
if (typeChar !== "-" && typeChar !== "d") {
161-
resolve({ ok: false, reason: `archive contains non-regular entry type '${typeChar}'` });
162-
return;
163-
}
164-
// Path is everything after the date/time field; cheapest is to
165-
// grab the last whitespace-delimited token. For " name -> target"
166-
// (symlink) we already rejected above.
167-
const tokens = line.trim().split(/\s+/u);
168-
if (tokens.length < 2) {
169-
continue;
170-
}
171-
const entryPath = tokens.at(-1) ?? "";
172-
if (path.isAbsolute(entryPath)) {
173-
resolve({ ok: false, reason: `archive contains absolute path: ${entryPath}` });
174-
return;
175-
}
176-
const norm = path.posix.normalize(entryPath);
177-
if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) {
178-
resolve({ ok: false, reason: `archive contains '..' traversal: ${entryPath}` });
179-
return;
180-
}
181-
// Size column: tar -tvzf output has size in bytes at a stable
182-
// position. tokens[2] is owner, tokens[3] is group, tokens[4] is
183-
// size on BSD; on GNU it's tokens[2] (perm/owner combined).
184-
// Be permissive — find the first all-digit token as size.
185-
for (const t of tokens.slice(1)) {
186-
if (/^\d+$/u.test(t)) {
187-
totalBytes += Number.parseInt(t, 10);
188-
break;
189-
}
161+
});
162+
child.stdin.end(tarBuffer);
163+
});
164+
}
165+
166+
async function listTarTypeChars(
167+
tarBuffer: Buffer,
168+
): Promise<{ ok: true; typeChars: string[] } | { ok: false; reason: string }> {
169+
return new Promise((resolve) => {
170+
const tarBin = process.platform !== "win32" ? "/usr/bin/tar" : "tar";
171+
const child = spawn(tarBin, ["-tzvf", "-"], { stdio: ["pipe", "pipe", "pipe"] });
172+
let stdout = "";
173+
let stderr = "";
174+
let aborted = false;
175+
const watchdog = setTimeout(() => {
176+
aborted = true;
177+
try {
178+
child.kill("SIGKILL");
179+
} catch {
180+
/* gone */
181+
}
182+
resolve({ ok: false, reason: "tar -tzvf timed out" });
183+
}, 30_000);
184+
child.stdout.on("data", (c: Buffer) => {
185+
stdout += c.toString();
186+
if (stdout.length > 32 * 1024 * 1024) {
187+
aborted = true;
188+
try {
189+
child.kill("SIGKILL");
190+
} catch {
191+
/* gone */
190192
}
193+
clearTimeout(watchdog);
194+
resolve({ ok: false, reason: "tar -tzvf output too large" });
191195
}
192-
if (totalBytes > TAR_UNPACK_MAX_UNCOMPRESSED_BYTES) {
193-
resolve({
194-
ok: false,
195-
reason: `uncompressed size ${totalBytes} exceeds ${TAR_UNPACK_MAX_UNCOMPRESSED_BYTES}`,
196-
});
196+
});
197+
child.stderr.on("data", (c: Buffer) => {
198+
stderr += c.toString();
199+
});
200+
child.on("close", (code) => {
201+
clearTimeout(watchdog);
202+
if (aborted) {
203+
return;
204+
}
205+
if (code !== 0) {
206+
resolve({ ok: false, reason: `tar -tzvf exited ${code}: ${stderr.slice(0, 200)}` });
197207
return;
198208
}
199-
resolve({ ok: true });
209+
// Take only the first character of each line — the entry type.
210+
// We don't touch the rest of the line (path/size/etc) so filenames
211+
// with whitespace can't shift our parser.
212+
const typeChars = stdout
213+
.split("\n")
214+
.filter((l) => l.length > 0)
215+
.map((l) => l.charAt(0));
216+
resolve({ ok: true, typeChars });
200217
});
201218
child.on("error", (e) => {
202219
clearTimeout(watchdog);
203220
if (!aborted) {
204-
resolve({ ok: false, reason: `tar -tvzf error: ${String(e)}` });
221+
resolve({ ok: false, reason: `tar -tzvf error: ${String(e)}` });
205222
}
206223
});
207224
child.stdin.end(tarBuffer);
208225
});
209226
}
210227

228+
async function preValidateTarball(
229+
tarBuffer: Buffer,
230+
): Promise<{ ok: true } | { ok: false; reason: string }> {
231+
const namesResult = await listTarPaths(tarBuffer);
232+
if (!namesResult.ok) {
233+
return namesResult;
234+
}
235+
const paths = namesResult.paths;
236+
if (paths.length > TAR_UNPACK_MAX_ENTRIES) {
237+
return {
238+
ok: false,
239+
reason: `archive contains ${paths.length} entries; limit ${TAR_UNPACK_MAX_ENTRIES}`,
240+
};
241+
}
242+
243+
const typesResult = await listTarTypeChars(tarBuffer);
244+
if (!typesResult.ok) {
245+
return typesResult;
246+
}
247+
const typeChars = typesResult.typeChars;
248+
// The two passes should report the same number of entries; if they
249+
// don't, something exotic is going on (filenames with newlines, etc.)
250+
// and we refuse defensively.
251+
if (typeChars.length !== paths.length) {
252+
return {
253+
ok: false,
254+
reason: `tar -tzf and tar -tzvf disagree on entry count (${paths.length} vs ${typeChars.length}); refusing`,
255+
};
256+
}
257+
258+
for (let i = 0; i < paths.length; i++) {
259+
const entryPath = paths[i];
260+
const t = typeChars[i];
261+
if (t === "l" || t === "h") {
262+
return { ok: false, reason: `archive contains link entry: ${entryPath}` };
263+
}
264+
if (t !== "-" && t !== "d") {
265+
return { ok: false, reason: `archive contains non-regular entry type '${t}': ${entryPath}` };
266+
}
267+
if (path.isAbsolute(entryPath)) {
268+
return { ok: false, reason: `archive contains absolute path: ${entryPath}` };
269+
}
270+
const norm = path.posix.normalize(entryPath);
271+
if (norm === ".." || norm.startsWith("../") || norm.includes("/../")) {
272+
return { ok: false, reason: `archive contains '..' traversal: ${entryPath}` };
273+
}
274+
// Reject backslash-containing names too — refuses Windows-style
275+
// traversal in archives produced by an attacker on a Windows node.
276+
if (entryPath.includes("\\")) {
277+
return { ok: false, reason: `archive contains backslash in path: ${entryPath}` };
278+
}
279+
}
280+
return { ok: true };
281+
}
282+
211283
type UnpackedFileEntry = {
212284
relPath: string;
213285
size: number;

pnpm-lock.yaml

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)