|
| 1 | +import readline from 'node:readline' |
| 2 | +import { Readable } from 'node:stream' |
| 3 | + |
| 4 | +import { describe, expect, test } from 'vitest' |
| 5 | + |
| 6 | +/** |
| 7 | + * Verify node:readline correctly handles multibyte UTF-8 characters split across chunks. |
| 8 | + * |
| 9 | + * The custom byline implementation we replaced had StringDecoder logic for this. |
| 10 | + * These tests confirm readline.createInterface() handles it correctly. |
| 11 | + * |
| 12 | + * Note: Integration tests can't reliably reproduce chunk splitting, so these unit tests |
| 13 | + * directly control chunk boundaries. |
| 14 | + */ |
| 15 | + |
| 16 | +/** |
| 17 | + * Helper function to collect lines from a readline interface |
| 18 | + */ |
| 19 | +async function collectLines(stream: Readable): Promise<string[]> { |
| 20 | + const rl = readline.createInterface({ |
| 21 | + input: stream, |
| 22 | + crlfDelay: Infinity, |
| 23 | + }) |
| 24 | + |
| 25 | + const lines: string[] = [] |
| 26 | + rl.on('line', (line) => lines.push(line)) |
| 27 | + |
| 28 | + return new Promise((resolve) => { |
| 29 | + rl.on('close', () => resolve(lines)) |
| 30 | + }) |
| 31 | +} |
| 32 | + |
| 33 | +/** |
| 34 | + * Helper function to create a readable stream from chunks |
| 35 | + */ |
| 36 | +function createStreamFromChunks(chunks: Buffer[]): Readable { |
| 37 | + let index = 0 |
| 38 | + return new Readable({ |
| 39 | + read() { |
| 40 | + if (index < chunks.length) { |
| 41 | + this.push(chunks[index]) |
| 42 | + index++ |
| 43 | + } else { |
| 44 | + this.push(null) |
| 45 | + } |
| 46 | + }, |
| 47 | + }) |
| 48 | +} |
| 49 | + |
| 50 | +describe('readline multibyte UTF-8 handling', () => { |
| 51 | + test('handles single Japanese character (3 bytes) split across chunks', async () => { |
| 52 | + // 「あ」is UTF-8 encoded as [0xE3, 0x81, 0x82] |
| 53 | + const chunk1 = Buffer.from([0xe3, 0x81]) // First 2 bytes |
| 54 | + const chunk2 = Buffer.from([0x82, 0x0a]) // Last byte + newline |
| 55 | + |
| 56 | + const stream = createStreamFromChunks([chunk1, chunk2]) |
| 57 | + const lines = await collectLines(stream) |
| 58 | + |
| 59 | + expect(lines).toEqual(['あ']) |
| 60 | + }) |
| 61 | + |
| 62 | + test('handles multiple lines with multibyte characters split across chunks', async () => { |
| 63 | + // Multiple lines: "日本語\n😀" (no trailing newline) |
| 64 | + // 日: [0xE6, 0x97, 0xA5] |
| 65 | + // 本: [0xE6, 0x9C, 0xAC] |
| 66 | + // 語: [0xE8, 0xAA, 0x9E] |
| 67 | + // 😀: [0xF0, 0x9F, 0x98, 0x80] |
| 68 | + const line1 = Buffer.from('日本語\n', 'utf8') |
| 69 | + const line2 = Buffer.from('😀', 'utf8') // No newline at end |
| 70 | + const fullBuffer = Buffer.concat([line1, line2]) |
| 71 | + |
| 72 | + // Split in the middle of the second line (within the emoji) |
| 73 | + const splitPoint = line1.length + 2 // Split after first 2 bytes of emoji |
| 74 | + const chunk1 = fullBuffer.subarray(0, splitPoint) |
| 75 | + const chunk2 = fullBuffer.subarray(splitPoint) |
| 76 | + |
| 77 | + const stream = createStreamFromChunks([chunk1, chunk2]) |
| 78 | + const lines = await collectLines(stream) |
| 79 | + |
| 80 | + expect(lines).toEqual(['日本語', '😀']) |
| 81 | + }) |
| 82 | +}) |
0 commit comments