Skip to content

Commit b8fd762

Browse files
committed
trying still to make udhr tests useful but not so long!
1 parent 4160f78 commit b8fd762

1 file changed

Lines changed: 21 additions & 39 deletions

File tree

tests/test_benchmarks.py

Lines changed: 21 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -303,23 +303,17 @@ def test_iter_sequences_mixed(benchmark):
303303

304304

305305
# UDHR-based benchmarks,
306-
# Load combined text (500+ world languages), split into 10k-line chunks
306+
# Load combined text (500+ world languages)
307307
UDHR_FILE = os.path.join(os.path.dirname(__file__), 'udhr_combined.txt')
308-
UDHR_CHUNK_SIZE = 10_000
309-
UDHR_CHUNKS = []
308+
UDHR_TEXT = ''
309+
UDHR_LINES = []
310+
UDHR_WIDTHS = []
310311
UDHR_FILLCHAR = '█'
311312
if os.path.exists(UDHR_FILE):
312313
with open(UDHR_FILE, encoding='utf-8') as f:
313-
all_lines = [line.rstrip() for line in f]
314-
for start in range(0, len(all_lines), UDHR_CHUNK_SIZE):
315-
end = min(start + UDHR_CHUNK_SIZE, len(all_lines))
316-
chunk_lines = all_lines[start:end]
317-
chunk_text = '\n'.join(chunk_lines)
318-
chunk_widths = [wcwidth.width(line) for line in chunk_lines]
319-
UDHR_CHUNKS.append(pytest.param(
320-
chunk_text, chunk_lines, chunk_widths,
321-
id=f"udhr_data_lines_{start}_{end}",
322-
))
314+
UDHR_TEXT = f.read()
315+
UDHR_LINES = [line.rstrip() for line in UDHR_TEXT.splitlines()]
316+
UDHR_WIDTHS = [wcwidth.width(line) for line in UDHR_LINES]
323317

324318
_udhr_skip = pytest.mark.skipif(
325319
not os.path.exists(UDHR_FILE),
@@ -328,38 +322,34 @@ def test_iter_sequences_mixed(benchmark):
328322

329323

330324
@_udhr_skip
331-
@pytest.mark.parametrize("text, lines, widths", UDHR_CHUNKS)
332-
def test_wrap_udhr(benchmark, text, lines, widths):
325+
def test_wrap_udhr(benchmark):
333326
"""Benchmark wrap() with multilingual UDHR text."""
334-
result = benchmark.pedantic(wcwidth.wrap, args=(text, 80), rounds=1, iterations=1)
327+
result = benchmark.pedantic(wcwidth.wrap, args=(UDHR_TEXT, 80), rounds=1, iterations=1)
335328
assert len(result)
336329
assert all(0 <= wcwidth.width(_l) <= 80 for _l in result)
337330

338331

339332
@_udhr_skip
340-
@pytest.mark.parametrize("text, lines, widths", UDHR_CHUNKS)
341-
def test_width_udhr(benchmark, text, lines, widths):
333+
def test_width_udhr(benchmark):
342334
"""Benchmark width() with multilingual UDHR text."""
343-
result = benchmark.pedantic(wcwidth.width, args=(text,), rounds=1, iterations=1)
335+
result = benchmark.pedantic(wcwidth.width, args=(UDHR_TEXT,), rounds=1, iterations=1)
344336
assert result > 0
345337

346338

347339
@_udhr_skip
348-
@pytest.mark.parametrize("text, lines, widths", UDHR_CHUNKS)
349-
def test_width_udhr_lines(benchmark, text, lines, widths):
340+
def test_width_udhr_lines(benchmark):
350341
"""Benchmark width() on individual UDHR lines."""
351-
result = benchmark.pedantic(lambda: sum(wcwidth.width(line) for line in lines),
342+
result = benchmark.pedantic(lambda: sum(wcwidth.width(line) for line in UDHR_LINES),
352343
rounds=1, iterations=1)
353344
assert result > 0
354345

355346

356347
@_udhr_skip
357-
@pytest.mark.parametrize("text, lines, widths", UDHR_CHUNKS)
358-
def test_width_wcswidth_consistency_udhr(benchmark, text, lines, widths):
348+
def test_width_wcswidth_consistency_udhr(benchmark):
359349
"""Verify width() and wcswidth() agree for printable multilingual text."""
360350
def check():
361351
failures = []
362-
for line in lines:
352+
for line in UDHR_LINES:
363353
if not line or not line.isprintable():
364354
continue
365355
w = wcwidth.width(line)
@@ -372,22 +362,15 @@ def check():
372362

373363

374364
@_udhr_skip
375-
@pytest.mark.parametrize("text, lines, widths", UDHR_CHUNKS)
376-
def test_width_fastpath_integrity_udhr(benchmark, text, lines, widths):
377-
"""
378-
Verify width() produces identical results with and without the fast path.
379-
380-
The fast path (for strings longer than _WIDTH_FAST_PATH_MIN_LEN) delegates to wcswidth(). The
381-
parse path processes character-by-character. Both must produce the same total across all UDHR
382-
lines.
383-
"""
365+
def test_width_fastpath_integrity_udhr(benchmark):
366+
"""Verify width() produces identical results with and without the fast path."""
384367
saved = _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN
385368

386369
def check():
387370
_wcwidth_module._WIDTH_FAST_PATH_MIN_LEN = 0
388-
fast_total = sum(wcwidth.width(line) for line in lines)
371+
fast_total = sum(wcwidth.width(line) for line in UDHR_LINES)
389372
_wcwidth_module._WIDTH_FAST_PATH_MIN_LEN = 999_999
390-
parse_total = sum(wcwidth.width(line) for line in lines)
373+
parse_total = sum(wcwidth.width(line) for line in UDHR_LINES)
391374
return fast_total, parse_total
392375

393376
fast_total, parse_total = benchmark.pedantic(check, rounds=1, iterations=1)
@@ -396,9 +379,8 @@ def check():
396379

397380

398381
@_udhr_skip
399-
@pytest.mark.parametrize("text, lines, widths", UDHR_CHUNKS)
400-
def test_ljust_udhr_lines(benchmark, text, lines, widths):
382+
def test_ljust_udhr_lines(benchmark):
401383
"""Benchmark ljust() on UDHR lines."""
402384
benchmark.pedantic(lambda: [wcwidth.ljust(line, w + 1, UDHR_FILLCHAR)
403-
for line, w in zip(lines, widths)],
385+
for line, w in zip(UDHR_LINES, UDHR_WIDTHS)],
404386
rounds=1, iterations=1)

0 commit comments

Comments
 (0)