Skip to content

Commit b5c98f1

Browse files
committed
test(generator-helper): add readline multibyte UTF-8 handling tests
1 parent 84fca2c commit b5c98f1

1 file changed

Lines changed: 82 additions & 0 deletions

File tree

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import readline from 'node:readline'
2+
import { Readable } from 'node:stream'
3+
4+
import { describe, expect, test } from 'vitest'
5+
6+
/**
7+
* Verify node:readline correctly handles multibyte UTF-8 characters split across chunks.
8+
*
9+
* The custom byline implementation we replaced had StringDecoder logic for this.
10+
* These tests confirm readline.createInterface() handles it correctly.
11+
*
12+
* Note: Integration tests can't reliably reproduce chunk splitting, so these unit tests
13+
* directly control chunk boundaries.
14+
*/
15+
16+
/**
17+
* Helper function to collect lines from a readline interface
18+
*/
19+
async function collectLines(stream: Readable): Promise<string[]> {
20+
const rl = readline.createInterface({
21+
input: stream,
22+
crlfDelay: Infinity,
23+
})
24+
25+
const lines: string[] = []
26+
rl.on('line', (line) => lines.push(line))
27+
28+
return new Promise((resolve) => {
29+
rl.on('close', () => resolve(lines))
30+
})
31+
}
32+
33+
/**
34+
* Helper function to create a readable stream from chunks
35+
*/
36+
function createStreamFromChunks(chunks: Buffer[]): Readable {
37+
let index = 0
38+
return new Readable({
39+
read() {
40+
if (index < chunks.length) {
41+
this.push(chunks[index])
42+
index++
43+
} else {
44+
this.push(null)
45+
}
46+
},
47+
})
48+
}
49+
50+
describe('readline multibyte UTF-8 handling', () => {
51+
test('handles single Japanese character (3 bytes) split across chunks', async () => {
52+
// 「あ」is UTF-8 encoded as [0xE3, 0x81, 0x82]
53+
const chunk1 = Buffer.from([0xe3, 0x81]) // First 2 bytes
54+
const chunk2 = Buffer.from([0x82, 0x0a]) // Last byte + newline
55+
56+
const stream = createStreamFromChunks([chunk1, chunk2])
57+
const lines = await collectLines(stream)
58+
59+
expect(lines).toEqual(['あ'])
60+
})
61+
62+
test('handles multiple lines with multibyte characters split across chunks', async () => {
63+
// Multiple lines: "日本語\n😀" (no trailing newline)
64+
// 日: [0xE6, 0x97, 0xA5]
65+
// 本: [0xE6, 0x9C, 0xAC]
66+
// 語: [0xE8, 0xAA, 0x9E]
67+
// 😀: [0xF0, 0x9F, 0x98, 0x80]
68+
const line1 = Buffer.from('日本語\n', 'utf8')
69+
const line2 = Buffer.from('😀', 'utf8') // No newline at end
70+
const fullBuffer = Buffer.concat([line1, line2])
71+
72+
// Split in the middle of the second line (within the emoji)
73+
const splitPoint = line1.length + 2 // Split after first 2 bytes of emoji
74+
const chunk1 = fullBuffer.subarray(0, splitPoint)
75+
const chunk2 = fullBuffer.subarray(splitPoint)
76+
77+
const stream = createStreamFromChunks([chunk1, chunk2])
78+
const lines = await collectLines(stream)
79+
80+
expect(lines).toEqual(['日本語', '😀'])
81+
})
82+
})

0 commit comments

Comments
 (0)