Skip to content

Commit d1eedef

Browse files
committed
fix(clownfish): address review for ghcrawl-199248-agentic-merge (1)
1 parent a218574 commit d1eedef

2 files changed

Lines changed: 66 additions & 4 deletions

File tree

src/infra/windows-encoding.test.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ describe("windows output encoding", () => {
4848
expect(decoder.flush()).toBe("");
4949
});
5050

51+
it("replays buffered UTF-8 lead bytes when split GBK output falls back to the console code page", () => {
52+
const decoder = createWindowsOutputDecoder({
53+
platform: "win32",
54+
windowsEncoding: "gbk",
55+
});
56+
57+
expect(decoder.decode(Buffer.from([0xc4]))).toBe("");
58+
expect(decoder.decode(Buffer.from([0xe3]))).toBe("你");
59+
expect(decoder.flush()).toBe("");
60+
});
61+
5162
it("keeps split valid UTF-8 output on the UTF-8 path for streaming decode", () => {
5263
const decoder = createWindowsOutputDecoder({
5364
platform: "win32",

src/infra/windows-encoding.ts

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ export function createWindowsOutputDecoder(params?: {
9595
const utf8Decoder =
9696
platform === "win32" && legacyDecoder ? new TextDecoder("utf-8", { fatal: true }) : null;
9797
let useLegacyDecoder = false;
98+
let pendingUtf8Bytes = Buffer.alloc(0);
9899

99100
return {
100101
decode(chunk) {
@@ -105,11 +106,16 @@ export function createWindowsOutputDecoder(params?: {
105106
if (useLegacyDecoder) {
106107
return legacyDecoder.decode(buffer, { stream: true });
107108
}
109+
const replayBuffer =
110+
pendingUtf8Bytes.length > 0 ? Buffer.concat([pendingUtf8Bytes, buffer]) : buffer;
108111
try {
109-
return utf8Decoder.decode(buffer, { stream: true });
112+
const decoded = utf8Decoder.decode(buffer, { stream: true });
113+
pendingUtf8Bytes = Buffer.from(getTrailingIncompleteUtf8Bytes(replayBuffer));
114+
return decoded;
110115
} catch {
111116
useLegacyDecoder = true;
112-
return legacyDecoder.decode(buffer, { stream: true });
117+
pendingUtf8Bytes = Buffer.alloc(0);
118+
return legacyDecoder.decode(replayBuffer, { stream: true });
113119
}
114120
},
115121
flush() {
@@ -120,14 +126,59 @@ export function createWindowsOutputDecoder(params?: {
120126
return legacyDecoder.decode();
121127
}
122128
try {
123-
return utf8Decoder.decode();
129+
const decoded = utf8Decoder.decode();
130+
pendingUtf8Bytes = Buffer.alloc(0);
131+
return decoded;
124132
} catch {
125-
return "";
133+
useLegacyDecoder = true;
134+
const replayBuffer = pendingUtf8Bytes;
135+
pendingUtf8Bytes = Buffer.alloc(0);
136+
return replayBuffer.length > 0 ? legacyDecoder.decode(replayBuffer) : "";
126137
}
127138
},
128139
};
129140
}
130141

142+
function getTrailingIncompleteUtf8Bytes(buffer: Buffer): Buffer {
143+
let index = buffer.length - 1;
144+
let continuationBytes = 0;
145+
while (
146+
index >= 0 &&
147+
buffer[index] !== undefined &&
148+
buffer[index] >= 0x80 &&
149+
buffer[index] <= 0xbf &&
150+
continuationBytes < 3
151+
) {
152+
continuationBytes += 1;
153+
index -= 1;
154+
}
155+
if (index < 0) {
156+
return buffer;
157+
}
158+
159+
const leadByte = buffer[index];
160+
const sequenceLength = getUtf8SequenceLength(leadByte);
161+
if (sequenceLength <= 1) {
162+
return Buffer.alloc(0);
163+
}
164+
165+
const availableBytes = continuationBytes + 1;
166+
return availableBytes < sequenceLength ? buffer.subarray(index) : Buffer.alloc(0);
167+
}
168+
169+
function getUtf8SequenceLength(byte: number): number {
170+
if (byte >= 0xc2 && byte <= 0xdf) {
171+
return 2;
172+
}
173+
if (byte >= 0xe0 && byte <= 0xef) {
174+
return 3;
175+
}
176+
if (byte >= 0xf0 && byte <= 0xf4) {
177+
return 4;
178+
}
179+
return 1;
180+
}
181+
131182
function decodeStrictUtf8(buffer: Buffer): string | null {
132183
try {
133184
return new TextDecoder("utf-8", { fatal: true }).decode(buffer);

0 commit comments

Comments
 (0)