Skip to content

Commit 48818dc

Browse files
committed
fix(process): handle Windows CJK encoding in child process output [AI-assisted]
1 parent bb543f7 commit 48818dc

7 files changed

Lines changed: 429 additions & 88 deletions

File tree

src/infra/windows-encoding.test.ts

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
import { describe, expect, it, vi } from "vitest";
2+
import {
3+
decodeCapturedOutputBuffer,
4+
parseWindowsCodePage,
5+
resolveWindowsConsoleEncoding,
6+
} from "./windows-encoding.js";
7+
8+
describe("parseWindowsCodePage", () => {
9+
it("parses English chcp output", () => {
10+
expect(parseWindowsCodePage("Active code page: 936")).toBe(936);
11+
});
12+
13+
it("parses Chinese chcp output", () => {
14+
expect(parseWindowsCodePage("活动代码页: 65001")).toBe(65001);
15+
});
16+
17+
it("returns null for empty string", () => {
18+
expect(parseWindowsCodePage("")).toBeNull();
19+
});
20+
21+
it("returns null when no code page number found", () => {
22+
expect(parseWindowsCodePage("no code page")).toBeNull();
23+
});
24+
25+
it("parses Japanese chcp output", () => {
26+
expect(parseWindowsCodePage("アクティブ コード ページ: 932")).toBe(932);
27+
});
28+
29+
it("parses Korean chcp output", () => {
30+
expect(parseWindowsCodePage("활성 코드 페이지: 949")).toBe(949);
31+
});
32+
33+
it("returns null for non-numeric code page", () => {
34+
expect(parseWindowsCodePage("Active code page: abc")).toBeNull();
35+
});
36+
37+
it("returns null for zero code page", () => {
38+
expect(parseWindowsCodePage("Active code page: 0")).toBeNull();
39+
});
40+
});
41+
42+
describe("resolveWindowsConsoleEncoding", () => {
43+
it("returns null on non-Windows platforms", () => {
44+
const platformSpy = vi.spyOn(process, "platform", "get").mockReturnValue("darwin");
45+
try {
46+
expect(resolveWindowsConsoleEncoding()).toBeNull();
47+
} finally {
48+
platformSpy.mockRestore();
49+
}
50+
});
51+
});
52+
53+
describe("decodeCapturedOutputBuffer", () => {
54+
it("returns UTF-8 string on non-Windows platforms", () => {
55+
const raw = Buffer.from("hello world");
56+
const decoded = decodeCapturedOutputBuffer({ buffer: raw, platform: "darwin" });
57+
expect(decoded).toBe("hello world");
58+
});
59+
60+
it("returns UTF-8 string when encoding is utf-8", () => {
61+
const raw = Buffer.from("hello world");
62+
const decoded = decodeCapturedOutputBuffer({
63+
buffer: raw,
64+
platform: "win32",
65+
windowsEncoding: "utf-8",
66+
});
67+
expect(decoded).toBe("hello world");
68+
});
69+
70+
it("returns UTF-8 string when encoding is null", () => {
71+
const raw = Buffer.from("hello world");
72+
const decoded = decodeCapturedOutputBuffer({
73+
buffer: raw,
74+
platform: "win32",
75+
windowsEncoding: null,
76+
});
77+
expect(decoded).toBe("hello world");
78+
});
79+
80+
it("decodes GBK output on Windows when code page is known", () => {
81+
let supportsGbk = true;
82+
try {
83+
void new TextDecoder("gbk");
84+
} catch {
85+
supportsGbk = false;
86+
}
87+
88+
const raw = Buffer.from([0xb2, 0xe2, 0xca, 0xd4, 0xa1, 0xab, 0xa3, 0xbb]);
89+
const decoded = decodeCapturedOutputBuffer({
90+
buffer: raw,
91+
platform: "win32",
92+
windowsEncoding: "gbk",
93+
});
94+
95+
if (!supportsGbk) {
96+
expect(decoded).toContain("�");
97+
return;
98+
}
99+
expect(decoded).toBe("测试~;");
100+
});
101+
102+
it("decodes Shift_JIS output on Windows", () => {
103+
let supportsShiftJis = true;
104+
try {
105+
void new TextDecoder("shift_jis");
106+
} catch {
107+
supportsShiftJis = false;
108+
}
109+
110+
const raw = Buffer.from([0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd]);
111+
const decoded = decodeCapturedOutputBuffer({
112+
buffer: raw,
113+
platform: "win32",
114+
windowsEncoding: "shift_jis",
115+
});
116+
117+
if (!supportsShiftJis) {
118+
expect(decoded).toContain("�");
119+
return;
120+
}
121+
expect(decoded).toBe("こんにちは");
122+
});
123+
124+
it("falls back to UTF-8 on unsupported encoding", () => {
125+
const raw = Buffer.from([0xb2, 0xe2, 0xca, 0xd4]);
126+
const decoded = decodeCapturedOutputBuffer({
127+
buffer: raw,
128+
platform: "win32",
129+
windowsEncoding: "nonexistent-encoding",
130+
});
131+
expect(decoded).toBe(raw.toString("utf8"));
132+
});
133+
134+
it("handles empty buffer", () => {
135+
const raw = Buffer.alloc(0);
136+
const decoded = decodeCapturedOutputBuffer({ buffer: raw, platform: "win32" });
137+
expect(decoded).toBe("");
138+
});
139+
140+
it("handles pure ASCII on Windows with GBK encoding", () => {
141+
const raw = Buffer.from("ASCII text");
142+
const decoded = decodeCapturedOutputBuffer({
143+
buffer: raw,
144+
platform: "win32",
145+
windowsEncoding: "gbk",
146+
});
147+
expect(decoded).toBe("ASCII text");
148+
});
149+
150+
it("handles mixed CJK and ASCII in GBK", () => {
151+
let supportsGbk = true;
152+
try {
153+
void new TextDecoder("gbk");
154+
} catch {
155+
supportsGbk = false;
156+
}
157+
158+
const gbkBytes = Buffer.from([0xc4, 0xe3, 0xba, 0xc3]);
159+
const asciiBytes = Buffer.from(" hello");
160+
const raw = Buffer.concat([gbkBytes, asciiBytes]);
161+
const decoded = decodeCapturedOutputBuffer({
162+
buffer: raw,
163+
platform: "win32",
164+
windowsEncoding: "gbk",
165+
});
166+
167+
if (!supportsGbk) {
168+
return;
169+
}
170+
expect(decoded).toBe("你好 hello");
171+
});
172+
});

src/infra/windows-encoding.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import { spawnSync } from "node:child_process";
2+
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
3+
4+
const WINDOWS_CODEPAGE_ENCODING_MAP: Record<number, string> = {
5+
65001: "utf-8",
6+
54936: "gb18030",
7+
936: "gbk",
8+
950: "big5",
9+
932: "shift_jis",
10+
949: "euc-kr",
11+
1252: "windows-1252",
12+
};
13+
14+
let cachedWindowsConsoleEncoding: string | null | undefined;
15+
16+
export function parseWindowsCodePage(raw: string): number | null {
17+
if (!raw) {
18+
return null;
19+
}
20+
const match = raw.match(/\b(\d{3,5})\b/);
21+
if (!match?.[1]) {
22+
return null;
23+
}
24+
const codePage = Number.parseInt(match[1], 10);
25+
if (!Number.isFinite(codePage) || codePage <= 0) {
26+
return null;
27+
}
28+
return codePage;
29+
}
30+
31+
export function resolveWindowsConsoleEncoding(): string | null {
32+
if (process.platform !== "win32") {
33+
return null;
34+
}
35+
if (cachedWindowsConsoleEncoding !== undefined) {
36+
return cachedWindowsConsoleEncoding;
37+
}
38+
try {
39+
const result = spawnSync("cmd.exe", ["/d", "/s", "/c", "chcp"], {
40+
windowsHide: true,
41+
encoding: "utf8",
42+
stdio: ["ignore", "pipe", "pipe"],
43+
});
44+
const raw = `${result.stdout ?? ""}\n${result.stderr ?? ""}`;
45+
const codePage = parseWindowsCodePage(raw);
46+
cachedWindowsConsoleEncoding =
47+
codePage !== null ? (WINDOWS_CODEPAGE_ENCODING_MAP[codePage] ?? null) : null;
48+
} catch {
49+
cachedWindowsConsoleEncoding = null;
50+
}
51+
return cachedWindowsConsoleEncoding;
52+
}
53+
54+
export function decodeCapturedOutputBuffer(params: {
55+
buffer: Buffer;
56+
platform?: NodeJS.Platform;
57+
windowsEncoding?: string | null;
58+
}): string {
59+
const utf8 = params.buffer.toString("utf8");
60+
const platform = params.platform ?? process.platform;
61+
if (platform !== "win32") {
62+
return utf8;
63+
}
64+
let encoding = params.windowsEncoding;
65+
if (encoding === undefined && process.platform === "win32") {
66+
encoding = resolveWindowsConsoleEncoding();
67+
}
68+
if (!encoding || normalizeLowercaseStringOrEmpty(encoding) === "utf-8") {
69+
return utf8;
70+
}
71+
try {
72+
return new TextDecoder(encoding).decode(params.buffer);
73+
} catch {
74+
return utf8;
75+
}
76+
}
77+
78+
export function resetWindowsConsoleEncodingCache(): void {
79+
cachedWindowsConsoleEncoding = undefined;
80+
}

src/memory-host-sdk/host/qmd-process.ts

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
import { spawn } from "node:child_process";
2+
import {
3+
decodeCapturedOutputBuffer,
4+
resolveWindowsConsoleEncoding,
5+
} from "../../infra/windows-encoding.js";
26
import {
37
materializeWindowsSpawnProgram,
48
resolveWindowsSpawnProgram,
@@ -124,16 +128,25 @@ export async function runCliCommand(params: {
124128
reject(new Error(`${params.commandSummary} timed out after ${params.timeoutMs}ms`));
125129
}, params.timeoutMs)
126130
: null;
127-
child.stdout.on("data", (data) => {
131+
const windowsEncoding = resolveWindowsConsoleEncoding();
132+
child.stdout.on("data", (data: Buffer) => {
128133
if (discardStdout) {
129134
return;
130135
}
131-
const next = appendOutputWithCap(stdout, data.toString("utf8"), params.maxOutputChars);
136+
const next = appendOutputWithCap(
137+
stdout,
138+
decodeCapturedOutputBuffer({ buffer: data, windowsEncoding }),
139+
params.maxOutputChars,
140+
);
132141
stdout = next.text;
133142
stdoutTruncated = stdoutTruncated || next.truncated;
134143
});
135-
child.stderr.on("data", (data) => {
136-
const next = appendOutputWithCap(stderr, data.toString("utf8"), params.maxOutputChars);
144+
child.stderr.on("data", (data: Buffer) => {
145+
const next = appendOutputWithCap(
146+
stderr,
147+
decodeCapturedOutputBuffer({ buffer: data, windowsEncoding }),
148+
params.maxOutputChars,
149+
);
137150
stderr = next.text;
138151
stderrTruncated = stderrTruncated || next.truncated;
139152
});

src/node-host/invoke.ts

Lines changed: 7 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { spawn, spawnSync } from "node:child_process";
1+
import { spawn } from "node:child_process";
22
import fs from "node:fs";
33
import path from "node:path";
44
import { GatewayClient } from "../gateway/client.js";
@@ -19,6 +19,11 @@ import {
1919
type ExecHostResponse,
2020
} from "../infra/exec-host.js";
2121
import { sanitizeHostExecEnv } from "../infra/host-env-security.js";
22+
import {
23+
decodeCapturedOutputBuffer,
24+
parseWindowsCodePage,
25+
resolveWindowsConsoleEncoding,
26+
} from "../infra/windows-encoding.js";
2227
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
2328
import { buildSystemRunApprovalPlan, handleSystemRunInvoke } from "./invoke-system-run.js";
2429
import type {
@@ -33,16 +38,6 @@ import { invokeRegisteredNodeHostCommand } from "./plugin-node-host.js";
3338
const OUTPUT_CAP = 200_000;
3439
const OUTPUT_EVENT_TAIL = 20_000;
3540
const DEFAULT_NODE_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
36-
const WINDOWS_CODEPAGE_ENCODING_MAP: Record<number, string> = {
37-
65001: "utf-8",
38-
54936: "gb18030",
39-
936: "gbk",
40-
950: "big5",
41-
932: "shift_jis",
42-
949: "euc-kr",
43-
1252: "windows-1252",
44-
};
45-
let cachedWindowsConsoleEncoding: string | null | undefined;
4641

4742
const execHostEnforced =
4843
normalizeLowercaseStringOrEmpty(process.env.OPENCLAW_NODE_EXEC_HOST ?? "") === "app";
@@ -105,64 +100,7 @@ function truncateOutput(raw: string, maxChars: number): { text: string; truncate
105100
return { text: `... (truncated) ${raw.slice(raw.length - maxChars)}`, truncated: true };
106101
}
107102

108-
export function parseWindowsCodePage(raw: string): number | null {
109-
if (!raw) {
110-
return null;
111-
}
112-
const match = raw.match(/\b(\d{3,5})\b/);
113-
if (!match?.[1]) {
114-
return null;
115-
}
116-
const codePage = Number.parseInt(match[1], 10);
117-
if (!Number.isFinite(codePage) || codePage <= 0) {
118-
return null;
119-
}
120-
return codePage;
121-
}
122-
123-
function resolveWindowsConsoleEncoding(): string | null {
124-
if (process.platform !== "win32") {
125-
return null;
126-
}
127-
if (cachedWindowsConsoleEncoding !== undefined) {
128-
return cachedWindowsConsoleEncoding;
129-
}
130-
try {
131-
const result = spawnSync("cmd.exe", ["/d", "/s", "/c", "chcp"], {
132-
windowsHide: true,
133-
encoding: "utf8",
134-
stdio: ["ignore", "pipe", "pipe"],
135-
});
136-
const raw = `${result.stdout ?? ""}\n${result.stderr ?? ""}`;
137-
const codePage = parseWindowsCodePage(raw);
138-
cachedWindowsConsoleEncoding =
139-
codePage !== null ? (WINDOWS_CODEPAGE_ENCODING_MAP[codePage] ?? null) : null;
140-
} catch {
141-
cachedWindowsConsoleEncoding = null;
142-
}
143-
return cachedWindowsConsoleEncoding;
144-
}
145-
146-
export function decodeCapturedOutputBuffer(params: {
147-
buffer: Buffer;
148-
platform?: NodeJS.Platform;
149-
windowsEncoding?: string | null;
150-
}): string {
151-
const utf8 = params.buffer.toString("utf8");
152-
const platform = params.platform ?? process.platform;
153-
if (platform !== "win32") {
154-
return utf8;
155-
}
156-
const encoding = params.windowsEncoding ?? resolveWindowsConsoleEncoding();
157-
if (!encoding || normalizeLowercaseStringOrEmpty(encoding) === "utf-8") {
158-
return utf8;
159-
}
160-
try {
161-
return new TextDecoder(encoding).decode(params.buffer);
162-
} catch {
163-
return utf8;
164-
}
165-
}
103+
export { decodeCapturedOutputBuffer, parseWindowsCodePage } from "../infra/windows-encoding.js";
166104

167105
function redactExecApprovals(file: ExecApprovalsFile): ExecApprovalsFile {
168106
const socketPath = file.socket?.path?.trim();

0 commit comments

Comments
 (0)