@@ -337,51 +337,81 @@ constexpr auto floor_log2_pow10(int e) noexcept -> int {
337337 return e * 1741647 >> 19 ;
338338}
339339
340- constexpr uint64_t powers_of_5[] = {
341- 0x0000000000000001 , 0x0000000000000005 , 0x0000000000000019 ,
342- 0x000000000000007d , 0x0000000000000271 , 0x0000000000000c35 ,
343- 0x0000000000003d09 , 0x000000000001312d , 0x000000000005f5e1 ,
344- 0x00000000001dcd65 , 0x00000000009502f9 , 0x0000000002e90edd ,
345- 0x000000000e8d4a51 , 0x0000000048c27395 , 0x000000016bcc41e9 ,
346- 0x000000071afd498d , 0x0000002386f26fc1 , 0x000000b1a2bc2ec5 ,
347- 0x000003782dace9d9 , 0x00001158e460913d , 0x000056bc75e2d631 ,
348- 0x0001b1ae4d6e2ef5 , 0x000878678326eac9 , 0x002a5a058fc295ed ,
349- 0x00d3c21bcecceda1 , 0x0422ca8b0a00a425 , 0x14adf4b7320334b9 };
350-
351340// 128-bit significands of powers of 10 rounded down.
352341// Generated using 192-bit arithmetic method by Dougall Johnson.
353342struct pow10_significands_table {
354343 static constexpr bool compress = ZMIJ_OPTIMIZE_SIZE != 0 ;
355344 static constexpr bool split_tables = !compress && ZMIJ_AARCH64 != 0 ;
356345 static constexpr int num_pow10 = 617 ;
357- static constexpr int compression_ratio = compress ? 27 : 1 ;
358- uint64_t data[(num_pow10 / compression_ratio + compress) * 2 ] = {};
346+ uint64_t data[compress ? 1 : num_pow10 * 2 ] = {};
359347
360348 ZMIJ_CONSTEXPR auto operator [](int dec_exp) const noexcept -> uint128 {
361349 constexpr int dec_exp_min = -292 ;
362350 if (compress) {
363- int base_index = (dec_exp - dec_exp_min) / compression_ratio;
364- int base_dec_exp = base_index * compression_ratio + dec_exp_min;
365- int offset = dec_exp - base_dec_exp;
366-
367- uint64_t base_hi = data[base_index * 2 ];
368- uint64_t base_lo = data[base_index * 2 + 1 ];
369- if (offset == 0 ) return {base_hi, base_lo};
370-
371- int shift = floor_log2_pow10 (base_dec_exp + offset) -
372- floor_log2_pow10 (base_dec_exp) - offset;
373-
374- uint64_t pow5 = powers_of_5[offset];
375- uint128_t p_hi = umul128 (base_hi, pow5);
376- uint128_t p_lo = umul128 (base_lo, pow5);
377-
378- uint64_t r_hi = uint64_t (p_hi >> 64 );
379- uint64_t r_lo = uint64_t (p_hi) + uint64_t (p_lo >> 64 );
380- if (r_lo < uint64_t (p_hi)) ++r_hi;
351+ unsigned i = dec_exp - dec_exp_min;
352+ // 672 bytes of data
353+ static const uint64_t pow10s[28 ] = {
354+ 0x8000000000000000 , 0xa000000000000000 , 0xc800000000000000 ,
355+ 0xfa00000000000000 , 0x9c40000000000000 , 0xc350000000000000 ,
356+ 0xf424000000000000 , 0x9896800000000000 , 0xbebc200000000000 ,
357+ 0xee6b280000000000 , 0x9502f90000000000 , 0xba43b74000000000 ,
358+ 0xe8d4a51000000000 , 0x9184e72a00000000 , 0xb5e620f480000000 ,
359+ 0xe35fa931a0000000 , 0x8e1bc9bf04000000 , 0xb1a2bc2ec5000000 ,
360+ 0xde0b6b3a76400000 , 0x8ac7230489e80000 , 0xad78ebc5ac620000 ,
361+ 0xd8d726b7177a8000 , 0x878678326eac9000 , 0xa968163f0a57b400 ,
362+ 0xd3c21bcecceda100 , 0x84595161401484a0 , 0xa56fa5b99019a5c8 ,
363+ 0xcecb8f27f4200f3a ,
364+ };
365+
366+ static const uint128 high_parts[23 ] = {
367+ {0xaf8e5410288e1b6f , 0x07ecf0ae5ee44dda },
368+ {0xb1442798f49ffb4a , 0x99cd11cfdf41779d },
369+ {0xb2fe3f0b8599ef07 , 0x861fa7e6dcb4aa15 },
370+ {0xb4bca50b065abe63 , 0x0fed077a756b53aa },
371+ {0xb67f6455292cbf08 , 0x1a3bc84c17b1d543 },
372+ {0xb84687c269ef3bfb , 0x3d5d514f40eea742 },
373+ {0xba121a4650e4ddeb , 0x92f34d62616ce413 },
374+ {0xbbe226efb628afea , 0x890489f70a55368c },
375+ {0xbdb6b8e905cb600f , 0x5400e987bbc1c921 },
376+ {0xbf8fdb78849a5f96 , 0xde98520472bdd034 },
377+ {0xc16d9a0095928a27 , 0x75b7053c0f178294 },
378+ {0xc350000000000000 , 0x0000000000000000 },
379+ {0xc5371912364ce305 , 0x6c28000000000000 },
380+ {0xc722f0ef9d80aad6 , 0x424d3ad2b7b97ef6 },
381+ {0xc913936dd571c84c , 0x03bc3a19cd1e38ea },
382+ {0xcb090c8001ab551c , 0x5cadf5bfd3072cc6 },
383+ {0xcd036837130890a1 , 0x36dba887c37a8c10 },
384+ {0xcf02b2c21207ef2e , 0x94f967e45e03f4bc },
385+ {0xd106f86e69d785c7 , 0xe13336d701beba52 },
386+ {0xd31045a8341ca07c , 0x1ede48111209a051 },
387+ {0xd51ea6fa85785631 , 0x552a74227f3ea566 },
388+ {0xd732290fbacaf133 , 0xa97c177947ad4096 },
389+ {0xd94ad8b1c7380874 , 0x18375281ae7822bc },
390+ };
391+
392+ uint32_t fixups[20 ] = {0x05271b1f , 0x00000c20 , 0x00003200 , 0x12100020 ,
393+ 0x00000000 , 0x06000000 , 0xc16409c0 , 0xaf26700f ,
394+ 0xeb987b07 , 0x0000000d , 0x00000000 , 0x66fbfffe ,
395+ 0xb74100ec , 0xa0669fe8 , 0xedb21280 , 0x00000686 ,
396+ 0x0a021200 , 0x29b89c20 , 0x08bc0eda , 0x00000000 };
397+
398+ uint64_t m = pow10s[(i + 11 ) % 28 ];
399+ uint128 h = high_parts[(i + 11 ) / 28 ];
400+
401+ uint64_t h1 = umul128_hi64 (h.lo , m);
402+
403+ uint64_t c0 = h.lo * m;
404+ uint64_t c1 = h1 + h.hi * m;
405+ uint64_t c2 = (c1 < h1) + umul128_hi64 (h.hi , m);
406+
407+ uint128 result;
408+ if (c2 >> 63 )
409+ result = uint128{c2, c1};
410+ else
411+ result = uint128{(c2 << 1 ) | (c1 >> 63 ), (c1 << 1 ) | (c0 >> 63 )};
381412
382- uint64_t lo = ((uint64_t (p_lo) >> shift) | (r_lo << (64 - shift))) + 1 ;
383- uint64_t hi = (r_lo >> shift) | (r_hi << (64 - shift));
384- return {lo != 0 ? hi : hi + 1 , lo};
413+ result.lo -= (fixups[i >> 5 ] >> (i & 31 )) & 1 ;
414+ return result;
385415 }
386416 if (!split_tables) {
387417 int index = (dec_exp - dec_exp_min) * 2 ;
@@ -406,16 +436,13 @@ struct pow10_significands_table {
406436 uint192 current = {0xe000000000000000 , 0x25e8e89c13bb0f7a ,
407437 0xff77b1fcbebcdc4f };
408438 uint64_t ten = 0xa000000000000000 ;
409- for (int i = 0 ; i < num_pow10; ++i) {
410- if (i % compression_ratio == 0 ) {
411- int index = i / compression_ratio;
412- if (split_tables) {
413- data[num_pow10 - index - 1 ] = current.w2 ;
414- data[num_pow10 * 2 - index - 1 ] = current.w1 ;
415- } else {
416- data[index * 2 ] = current.w2 ;
417- data[index * 2 + 1 ] = current.w1 ;
418- }
439+ for (int i = 0 ; i < num_pow10 && !compress; ++i) {
440+ if (split_tables) {
441+ data[num_pow10 - i - 1 ] = current.w2 ;
442+ data[num_pow10 * 2 - i - 1 ] = current.w1 ;
443+ } else {
444+ data[i * 2 ] = current.w2 ;
445+ data[i * 2 + 1 ] = current.w1 ;
419446 }
420447
421448 uint64_t h0 = umul128_hi64 (current.w0 , ten);
0 commit comments