Skip to content

Commit c28eecb

Browse files
chengloumayrangvoidborne-dlttlin
committed
fix: align line walkers and Hangul jamo breaks
Normalize chunked batch line starts through the same segment-kind policy used by streaming so layoutWithLines(), walkLineRanges(), layoutNextLine(), and layout() stay aligned after zero-width break opportunities and collapsible spaces. Classify Hangul Compatibility Jamo (U+3130..U+318F) as CJK so common Korean compatibility jamo runs break like browser text. Refs #129, #135, #141. Closes #121 Closes #142 Co-authored-by: mayrang <pkss0626@naver.com> Co-authored-by: voidborne-d <voidborne-d@users.noreply.github.com> Co-authored-by: lttlin <lttlin@gmail.com>
1 parent 65f3a5c commit c28eecb

3 files changed

Lines changed: 60 additions & 17 deletions

File tree

src/analysis.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ function isCJKCodePoint(codePoint: number): boolean {
121121
(codePoint >= 0x3000 && codePoint <= 0x303F) ||
122122
(codePoint >= 0x3040 && codePoint <= 0x309F) ||
123123
(codePoint >= 0x30A0 && codePoint <= 0x30FF) ||
124+
(codePoint >= 0x3130 && codePoint <= 0x318F) ||
124125
(codePoint >= 0xAC00 && codePoint <= 0xD7AF) ||
125126
(codePoint >= 0xFF00 && codePoint <= 0xFFEF)
126127
)

src/layout.test.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ function isWideCharacter(ch: string): boolean {
7979
(code >= 0x3000 && code <= 0x303F) ||
8080
(code >= 0x3040 && code <= 0x309F) ||
8181
(code >= 0x30A0 && code <= 0x30FF) ||
82+
(code >= 0x3130 && code <= 0x318F) ||
8283
(code >= 0xAC00 && code <= 0xD7AF) ||
8384
(code >= 0xFF00 && code <= 0xFFEF)
8485
)
@@ -552,6 +553,19 @@ describe('prepare invariants', () => {
552553
expect(prepareWithSegments('테스트입니다.', FONT).segments.at(-1)).toBe('다.')
553554
})
554555

556+
test('treats Hangul compatibility jamo as CJK break units', () => {
557+
const prepared = prepareWithSegments('ㅋㅋㅋ 진짜', FONT)
558+
expect(prepared.segments).toEqual(['ㅋ', 'ㅋ', 'ㅋ', ' ', '진', '짜'])
559+
560+
const width = measureWidth('ㅋㅋ', FONT) + 0.1
561+
const lines = layoutWithLines(prepared, width, LINE_HEIGHT)
562+
expect(lines.lines.map(line => line.text)).toEqual(['ㅋㅋ', 'ㅋ ', '진짜'])
563+
expect(layout(prepared, width, LINE_HEIGHT)).toEqual({
564+
lineCount: 3,
565+
height: LINE_HEIGHT * 3,
566+
})
567+
})
568+
555569
test('keeps non-CJK glue-connected runs intact before CJK text', () => {
556570
const prepared = prepareWithSegments('foo\u00A0世界', FONT)
557571
expect(prepared.segments).toEqual(['foo\u00A0', '世', '界'])
@@ -600,7 +614,8 @@ describe('prepare invariants', () => {
600614
}
601615
})
602616

603-
test('isCJK covers the newer CJK extension blocks', () => {
617+
test('isCJK covers Hangul compatibility jamo and the newer CJK extension blocks', () => {
618+
expect(isCJK('ㅋ')).toBe(true)
604619
expect(isCJK('\u{2EBF0}')).toBe(true)
605620
expect(isCJK('\u{31350}')).toBe(true)
606621
expect(isCJK('\u{323B0}')).toBe(true)
@@ -840,6 +855,16 @@ describe('layout invariants', () => {
840855
expect(layoutWithLines(prepared, width, LINE_HEIGHT).lines).toEqual(collectStreamedLines(prepared, width))
841856
})
842857

858+
test('chunked batch line walking normalizes spaces after zero-width breaks like streaming', () => {
859+
const prepared = prepareWithSegments('x\u00AD A\u200B B', FONT)
860+
const width = measureWidth('x A', FONT) + 0.1
861+
const batched = layoutWithLines(prepared, width, LINE_HEIGHT)
862+
863+
expect(batched.lines.map(line => line.text)).toEqual(['x A\u200B', 'B'])
864+
expect(collectStreamedLines(prepared, width)).toEqual(batched.lines)
865+
expect(layout(prepared, width, LINE_HEIGHT).lineCount).toBe(batched.lineCount)
866+
})
867+
843868
test('layoutNextLine can resume from any fixed-width line start without hidden state', () => {
844869
const prepared = prepareWithSegments('foo trans\u00ADatlantic said "hello" to 世界 and waved. alpha\u200Bbeta 🚀', FONT)
845870
const width = 90

src/line-break.ts

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,28 @@ type InternalLineVisitor = (
3838
endGraphemeIndex: number,
3939
) => void
4040

41-
function normalizeSimpleLineStartSegmentIndex(
41+
function consumesAtLineStart(kind: SegmentBreakKind): boolean {
42+
return kind === 'space' || kind === 'zero-width-break' || kind === 'soft-hyphen'
43+
}
44+
45+
function breaksAfter(kind: SegmentBreakKind): boolean {
46+
return (
47+
kind === 'space' ||
48+
kind === 'preserved-space' ||
49+
kind === 'tab' ||
50+
kind === 'zero-width-break' ||
51+
kind === 'soft-hyphen'
52+
)
53+
}
54+
55+
function normalizeLineStartSegmentIndex(
4256
prepared: PreparedLineBreakData,
4357
segmentIndex: number,
58+
endSegmentIndex = prepared.widths.length,
4459
): number {
45-
while (segmentIndex < prepared.widths.length) {
60+
while (segmentIndex < endSegmentIndex) {
4661
const kind = prepared.kinds[segmentIndex]!
47-
if (kind !== 'space' && kind !== 'zero-width-break' && kind !== 'soft-hyphen') break
62+
if (!consumesAtLineStart(kind)) break
4863
segmentIndex++
4964
}
5065
return segmentIndex
@@ -113,14 +128,11 @@ function normalizeLineStartInChunk(
113128
}
114129

115130
if (segmentIndex < chunk.startSegmentIndex) segmentIndex = chunk.startSegmentIndex
116-
while (segmentIndex < chunk.endSegmentIndex) {
117-
const kind = prepared.kinds[segmentIndex]!
118-
if (kind !== 'space' && kind !== 'zero-width-break' && kind !== 'soft-hyphen') {
119-
cursor.segmentIndex = segmentIndex
120-
cursor.graphemeIndex = 0
121-
return chunkIndex
122-
}
123-
segmentIndex++
131+
segmentIndex = normalizeLineStartSegmentIndex(prepared, segmentIndex, chunk.endSegmentIndex)
132+
if (segmentIndex < chunk.endSegmentIndex) {
133+
cursor.segmentIndex = segmentIndex
134+
cursor.graphemeIndex = 0
135+
return chunkIndex
124136
}
125137

126138
if (chunk.consumedEndSegmentIndex >= prepared.widths.length) return -1
@@ -274,13 +286,13 @@ function walkPreparedLinesSimple(
274286
let i = 0
275287
while (i < widths.length) {
276288
if (!hasContent) {
277-
i = normalizeSimpleLineStartSegmentIndex(prepared, i)
289+
i = normalizeLineStartSegmentIndex(prepared, i)
278290
if (i >= widths.length) break
279291
}
280292

281293
const w = widths[i]!
282294
const kind = kinds[i]!
283-
const breakAfter = kind === 'space' || kind === 'preserved-space' || kind === 'tab' || kind === 'zero-width-break' || kind === 'soft-hyphen'
295+
const breakAfter = breaksAfter(kind)
284296

285297
if (!hasContent) {
286298
if (w > maxWidth && breakableFitAdvances[i] !== null) {
@@ -525,8 +537,13 @@ export function walkPreparedLinesRaw(
525537

526538
let i = chunk.startSegmentIndex
527539
while (i < chunk.endSegmentIndex) {
540+
if (!hasContent) {
541+
i = normalizeLineStartSegmentIndex(prepared, i, chunk.endSegmentIndex)
542+
if (i >= chunk.endSegmentIndex) break
543+
}
544+
528545
const kind = kinds[i]!
529-
const breakAfter = kind === 'space' || kind === 'preserved-space' || kind === 'tab' || kind === 'zero-width-break' || kind === 'soft-hyphen'
546+
const breakAfter = breaksAfter(kind)
530547
const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]!
531548

532549
if (kind === 'soft-hyphen') {
@@ -800,7 +817,7 @@ function stepPreparedChunkLineGeometry(
800817

801818
for (let i = cursor.segmentIndex; i < chunk.endSegmentIndex; i++) {
802819
const kind = kinds[i]!
803-
const breakAfter = kind === 'space' || kind === 'preserved-space' || kind === 'tab' || kind === 'zero-width-break' || kind === 'soft-hyphen'
820+
const breakAfter = breaksAfter(kind)
804821
const startGraphemeIndex = i === cursor.segmentIndex ? cursor.graphemeIndex : 0
805822
const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]!
806823

@@ -902,7 +919,7 @@ function stepPreparedSimpleLineGeometry(
902919
for (let i = cursor.segmentIndex; i < widths.length; i++) {
903920
const w = widths[i]!
904921
const kind = kinds[i]!
905-
const breakAfter = kind === 'space' || kind === 'preserved-space' || kind === 'tab' || kind === 'zero-width-break' || kind === 'soft-hyphen'
922+
const breakAfter = breaksAfter(kind)
906923
const startGraphemeIndex = i === cursor.segmentIndex ? cursor.graphemeIndex : 0
907924
const breakableFitAdvance = breakableFitAdvances[i]
908925

0 commit comments

Comments
 (0)