@@ -303,23 +303,17 @@ def test_iter_sequences_mixed(benchmark):
303303
304304
305305# UDHR-based benchmarks,
306- # Load combined text (500+ world languages), split into 10k-line chunks
306+ # Load combined text (500+ world languages)
307307UDHR_FILE = os .path .join (os .path .dirname (__file__ ), 'udhr_combined.txt' )
308- UDHR_CHUNK_SIZE = 10_000
309- UDHR_CHUNKS = []
308+ UDHR_TEXT = ''
309+ UDHR_LINES = []
310+ UDHR_WIDTHS = []
310311UDHR_FILLCHAR = '█'
311312if os .path .exists (UDHR_FILE ):
312313 with open (UDHR_FILE , encoding = 'utf-8' ) as f :
313- all_lines = [line .rstrip () for line in f ]
314- for start in range (0 , len (all_lines ), UDHR_CHUNK_SIZE ):
315- end = min (start + UDHR_CHUNK_SIZE , len (all_lines ))
316- chunk_lines = all_lines [start :end ]
317- chunk_text = '\n ' .join (chunk_lines )
318- chunk_widths = [wcwidth .width (line ) for line in chunk_lines ]
319- UDHR_CHUNKS .append (pytest .param (
320- chunk_text , chunk_lines , chunk_widths ,
321- id = f"udhr_data_lines_{ start } _{ end } " ,
322- ))
314+ UDHR_TEXT = f .read ()
315+ UDHR_LINES = [line .rstrip () for line in UDHR_TEXT .splitlines ()]
316+ UDHR_WIDTHS = [wcwidth .width (line ) for line in UDHR_LINES ]
323317
324318_udhr_skip = pytest .mark .skipif (
325319 not os .path .exists (UDHR_FILE ),
@@ -328,38 +322,34 @@ def test_iter_sequences_mixed(benchmark):
328322
329323
330324@_udhr_skip
331- @pytest .mark .parametrize ("text, lines, widths" , UDHR_CHUNKS )
332- def test_wrap_udhr (benchmark , text , lines , widths ):
325+ def test_wrap_udhr (benchmark ):
333326 """Benchmark wrap() with multilingual UDHR text."""
334- result = benchmark .pedantic (wcwidth .wrap , args = (text , 80 ), rounds = 1 , iterations = 1 )
327+ result = benchmark .pedantic (wcwidth .wrap , args = (UDHR_TEXT , 80 ), rounds = 1 , iterations = 1 )
335328 assert len (result )
336329 assert all (0 <= wcwidth .width (_l ) <= 80 for _l in result )
337330
338331
339332@_udhr_skip
340- @pytest .mark .parametrize ("text, lines, widths" , UDHR_CHUNKS )
341- def test_width_udhr (benchmark , text , lines , widths ):
333+ def test_width_udhr (benchmark ):
342334 """Benchmark width() with multilingual UDHR text."""
343- result = benchmark .pedantic (wcwidth .width , args = (text ,), rounds = 1 , iterations = 1 )
335+ result = benchmark .pedantic (wcwidth .width , args = (UDHR_TEXT ,), rounds = 1 , iterations = 1 )
344336 assert result > 0
345337
346338
347339@_udhr_skip
348- @pytest .mark .parametrize ("text, lines, widths" , UDHR_CHUNKS )
349- def test_width_udhr_lines (benchmark , text , lines , widths ):
340+ def test_width_udhr_lines (benchmark ):
350341 """Benchmark width() on individual UDHR lines."""
351- result = benchmark .pedantic (lambda : sum (wcwidth .width (line ) for line in lines ),
342+ result = benchmark .pedantic (lambda : sum (wcwidth .width (line ) for line in UDHR_LINES ),
352343 rounds = 1 , iterations = 1 )
353344 assert result > 0
354345
355346
356347@_udhr_skip
357- @pytest .mark .parametrize ("text, lines, widths" , UDHR_CHUNKS )
358- def test_width_wcswidth_consistency_udhr (benchmark , text , lines , widths ):
348+ def test_width_wcswidth_consistency_udhr (benchmark ):
359349 """Verify width() and wcswidth() agree for printable multilingual text."""
360350 def check ():
361351 failures = []
362- for line in lines :
352+ for line in UDHR_LINES :
363353 if not line or not line .isprintable ():
364354 continue
365355 w = wcwidth .width (line )
@@ -372,22 +362,15 @@ def check():
372362
373363
374364@_udhr_skip
375- @pytest .mark .parametrize ("text, lines, widths" , UDHR_CHUNKS )
376- def test_width_fastpath_integrity_udhr (benchmark , text , lines , widths ):
377- """
378- Verify width() produces identical results with and without the fast path.
379-
380- The fast path (for strings longer than _WIDTH_FAST_PATH_MIN_LEN) delegates to wcswidth(). The
381- parse path processes character-by-character. Both must produce the same total across all UDHR
382- lines.
383- """
365+ def test_width_fastpath_integrity_udhr (benchmark ):
366+ """Verify width() produces identical results with and without the fast path."""
384367 saved = _wcwidth_module ._WIDTH_FAST_PATH_MIN_LEN
385368
386369 def check ():
387370 _wcwidth_module ._WIDTH_FAST_PATH_MIN_LEN = 0
388- fast_total = sum (wcwidth .width (line ) for line in lines )
371+ fast_total = sum (wcwidth .width (line ) for line in UDHR_LINES )
389372 _wcwidth_module ._WIDTH_FAST_PATH_MIN_LEN = 999_999
390- parse_total = sum (wcwidth .width (line ) for line in lines )
373+ parse_total = sum (wcwidth .width (line ) for line in UDHR_LINES )
391374 return fast_total , parse_total
392375
393376 fast_total , parse_total = benchmark .pedantic (check , rounds = 1 , iterations = 1 )
@@ -396,9 +379,8 @@ def check():
396379
397380
398381@_udhr_skip
399- @pytest .mark .parametrize ("text, lines, widths" , UDHR_CHUNKS )
400- def test_ljust_udhr_lines (benchmark , text , lines , widths ):
382+ def test_ljust_udhr_lines (benchmark ):
401383 """Benchmark ljust() on UDHR lines."""
402384 benchmark .pedantic (lambda : [wcwidth .ljust (line , w + 1 , UDHR_FILLCHAR )
403- for line , w in zip (lines , widths )],
385+ for line , w in zip (UDHR_LINES , UDHR_WIDTHS )],
404386 rounds = 1 , iterations = 1 )
0 commit comments