@@ -342,78 +342,91 @@ constexpr auto floor_log2_pow10(int e) noexcept -> int {
342342}
343343
344344// 128-bit significands of powers of 10 rounded down.
345- // Generation with 192-bit arithmetic and compression by Dougall Johnson.
346345struct pow10_significands_table {
347346 static constexpr bool compress = ZMIJ_OPTIMIZE_SIZE != 0 ;
348347 static constexpr bool split_tables = !compress && ZMIJ_AARCH64 != 0 ;
349348 static constexpr int num_pow10 = 617 ;
350349 uint64_t data[compress ? 1 : num_pow10 * 2 ] = {};
351350
351+ // Computes the 128-bit significand of 10**i using method by Dougall Johnson.
352+ static constexpr auto compute (unsigned i) noexcept -> uint128 {
353+ constexpr uint64_t pow10s[28 ] = {
354+ 0x8000000000000000 , 0xa000000000000000 , 0xc800000000000000 ,
355+ 0xfa00000000000000 , 0x9c40000000000000 , 0xc350000000000000 ,
356+ 0xf424000000000000 , 0x9896800000000000 , 0xbebc200000000000 ,
357+ 0xee6b280000000000 , 0x9502f90000000000 , 0xba43b74000000000 ,
358+ 0xe8d4a51000000000 , 0x9184e72a00000000 , 0xb5e620f480000000 ,
359+ 0xe35fa931a0000000 , 0x8e1bc9bf04000000 , 0xb1a2bc2ec5000000 ,
360+ 0xde0b6b3a76400000 , 0x8ac7230489e80000 , 0xad78ebc5ac620000 ,
361+ 0xd8d726b7177a8000 , 0x878678326eac9000 , 0xa968163f0a57b400 ,
362+ 0xd3c21bcecceda100 , 0x84595161401484a0 , 0xa56fa5b99019a5c8 ,
363+ 0xcecb8f27f4200f3a ,
364+ };
365+
366+ constexpr uint128 high_parts[23 ] = {
367+ {0xaf8e5410288e1b6f , 0x07ecf0ae5ee44dda },
368+ {0xb1442798f49ffb4a , 0x99cd11cfdf41779d },
369+ {0xb2fe3f0b8599ef07 , 0x861fa7e6dcb4aa15 },
370+ {0xb4bca50b065abe63 , 0x0fed077a756b53aa },
371+ {0xb67f6455292cbf08 , 0x1a3bc84c17b1d543 },
372+ {0xb84687c269ef3bfb , 0x3d5d514f40eea742 },
373+ {0xba121a4650e4ddeb , 0x92f34d62616ce413 },
374+ {0xbbe226efb628afea , 0x890489f70a55368c },
375+ {0xbdb6b8e905cb600f , 0x5400e987bbc1c921 },
376+ {0xbf8fdb78849a5f96 , 0xde98520472bdd034 },
377+ {0xc16d9a0095928a27 , 0x75b7053c0f178294 },
378+ {0xc350000000000000 , 0x0000000000000000 },
379+ {0xc5371912364ce305 , 0x6c28000000000000 },
380+ {0xc722f0ef9d80aad6 , 0x424d3ad2b7b97ef6 },
381+ {0xc913936dd571c84c , 0x03bc3a19cd1e38ea },
382+ {0xcb090c8001ab551c , 0x5cadf5bfd3072cc6 },
383+ {0xcd036837130890a1 , 0x36dba887c37a8c10 },
384+ {0xcf02b2c21207ef2e , 0x94f967e45e03f4bc },
385+ {0xd106f86e69d785c7 , 0xe13336d701beba52 },
386+ {0xd31045a8341ca07c , 0x1ede48111209a051 },
387+ {0xd51ea6fa85785631 , 0x552a74227f3ea566 },
388+ {0xd732290fbacaf133 , 0xa97c177947ad4096 },
389+ {0xd94ad8b1c7380874 , 0x18375281ae7822bc },
390+ };
391+
392+ constexpr uint32_t fixups[20 ] = {
393+ 0x05271b1f , 0x00000c20 , 0x00003200 , 0x12100020 , 0x00000000 ,
394+ 0x06000000 , 0xc16409c0 , 0xaf26700f , 0xeb987b07 , 0x0000000d ,
395+ 0x00000000 , 0x66fbfffe , 0xb74100ec , 0xa0669fe8 , 0xedb21280 ,
396+ 0x00000686 , 0x0a021200 , 0x29b89c20 , 0x08bc0eda , 0x00000000 };
397+
398+ uint64_t m = pow10s[(i + 11 ) % 28 ];
399+ uint128 h = high_parts[(i + 11 ) / 28 ];
400+
401+ uint64_t h1 = umul128_hi64 (h.lo , m);
402+
403+ uint64_t c0 = h.lo * m;
404+ uint64_t c1 = h1 + h.hi * m;
405+ uint64_t c2 = (c1 < h1) + umul128_hi64 (h.hi , m);
406+
407+ uint128 result = (c2 >> 63 ) != 0
408+ ? uint128{c2, c1}
409+ : uint128{c2 << 1 | c1 >> 63 , c1 << 1 | c0 >> 63 };
410+ result.lo -= (fixups[i >> 5 ] >> (i & 31 )) & 1 ;
411+ return result;
412+ }
413+
414+ constexpr pow10_significands_table () {
415+ for (int i = 0 ; i < num_pow10 && !compress; ++i) {
416+ uint128 result = compute (i);
417+ if (split_tables) {
418+ data[num_pow10 - i - 1 ] = result.hi ;
419+ data[num_pow10 * 2 - i - 1 ] = result.lo ;
420+ } else {
421+ data[i * 2 ] = result.hi ;
422+ data[i * 2 + 1 ] = result.lo ;
423+ }
424+ }
425+ }
426+
352427 ZMIJ_CONSTEXPR auto operator [](int dec_exp) const noexcept -> uint128 {
353428 constexpr int dec_exp_min = -292 ;
354- if (compress) {
355- // Compressed version using only 672 bytes of data.
356- unsigned i = dec_exp - dec_exp_min;
357- static const uint64_t pow10s[28 ] = {
358- 0x8000000000000000 , 0xa000000000000000 , 0xc800000000000000 ,
359- 0xfa00000000000000 , 0x9c40000000000000 , 0xc350000000000000 ,
360- 0xf424000000000000 , 0x9896800000000000 , 0xbebc200000000000 ,
361- 0xee6b280000000000 , 0x9502f90000000000 , 0xba43b74000000000 ,
362- 0xe8d4a51000000000 , 0x9184e72a00000000 , 0xb5e620f480000000 ,
363- 0xe35fa931a0000000 , 0x8e1bc9bf04000000 , 0xb1a2bc2ec5000000 ,
364- 0xde0b6b3a76400000 , 0x8ac7230489e80000 , 0xad78ebc5ac620000 ,
365- 0xd8d726b7177a8000 , 0x878678326eac9000 , 0xa968163f0a57b400 ,
366- 0xd3c21bcecceda100 , 0x84595161401484a0 , 0xa56fa5b99019a5c8 ,
367- 0xcecb8f27f4200f3a ,
368- };
369-
370- static const uint128 high_parts[23 ] = {
371- {0xaf8e5410288e1b6f , 0x07ecf0ae5ee44dda },
372- {0xb1442798f49ffb4a , 0x99cd11cfdf41779d },
373- {0xb2fe3f0b8599ef07 , 0x861fa7e6dcb4aa15 },
374- {0xb4bca50b065abe63 , 0x0fed077a756b53aa },
375- {0xb67f6455292cbf08 , 0x1a3bc84c17b1d543 },
376- {0xb84687c269ef3bfb , 0x3d5d514f40eea742 },
377- {0xba121a4650e4ddeb , 0x92f34d62616ce413 },
378- {0xbbe226efb628afea , 0x890489f70a55368c },
379- {0xbdb6b8e905cb600f , 0x5400e987bbc1c921 },
380- {0xbf8fdb78849a5f96 , 0xde98520472bdd034 },
381- {0xc16d9a0095928a27 , 0x75b7053c0f178294 },
382- {0xc350000000000000 , 0x0000000000000000 },
383- {0xc5371912364ce305 , 0x6c28000000000000 },
384- {0xc722f0ef9d80aad6 , 0x424d3ad2b7b97ef6 },
385- {0xc913936dd571c84c , 0x03bc3a19cd1e38ea },
386- {0xcb090c8001ab551c , 0x5cadf5bfd3072cc6 },
387- {0xcd036837130890a1 , 0x36dba887c37a8c10 },
388- {0xcf02b2c21207ef2e , 0x94f967e45e03f4bc },
389- {0xd106f86e69d785c7 , 0xe13336d701beba52 },
390- {0xd31045a8341ca07c , 0x1ede48111209a051 },
391- {0xd51ea6fa85785631 , 0x552a74227f3ea566 },
392- {0xd732290fbacaf133 , 0xa97c177947ad4096 },
393- {0xd94ad8b1c7380874 , 0x18375281ae7822bc },
394- };
395-
396- uint32_t fixups[20 ] = {0x05271b1f , 0x00000c20 , 0x00003200 , 0x12100020 ,
397- 0x00000000 , 0x06000000 , 0xc16409c0 , 0xaf26700f ,
398- 0xeb987b07 , 0x0000000d , 0x00000000 , 0x66fbfffe ,
399- 0xb74100ec , 0xa0669fe8 , 0xedb21280 , 0x00000686 ,
400- 0x0a021200 , 0x29b89c20 , 0x08bc0eda , 0x00000000 };
401-
402- uint64_t m = pow10s[(i + 11 ) % 28 ];
403- uint128 h = high_parts[(i + 11 ) / 28 ];
404-
405- uint64_t h1 = umul128_hi64 (h.lo , m);
406-
407- uint64_t c0 = h.lo * m;
408- uint64_t c1 = h1 + h.hi * m;
409- uint64_t c2 = (c1 < h1) + umul128_hi64 (h.hi , m);
410-
411- uint128 result = (c2 >> 63 ) != 0
412- ? uint128{c2, c1}
413- : uint128{c2 << 1 | c1 >> 63 , c1 << 1 | c0 >> 63 };
414- result.lo -= (fixups[i >> 5 ] >> (i & 31 )) & 1 ;
415- return result;
416- }
429+ if (compress) return compute (dec_exp - dec_exp_min);
417430 if (!split_tables) {
418431 int index = (dec_exp - dec_exp_min) * 2 ;
419432 return {data[index], data[index + 1 ]};
@@ -426,40 +439,6 @@ struct pow10_significands_table {
426439 if (!is_constant_evaluated ()) ZMIJ_ASM (volatile (" " : " +r" (hi), " +r" (lo)));
427440 return {hi[-dec_exp], lo[-dec_exp]};
428441 }
429-
430- constexpr pow10_significands_table () {
431- struct uint192 {
432- uint64_t w0, w1, w2; // w0 = least significant, w2 = most significant
433- };
434-
435- // First element, rounded up to cancel out rounding down in the
436- // multiplication, and minimize significant bits.
437- uint192 current = {0xe000000000000000 , 0x25e8e89c13bb0f7a ,
438- 0xff77b1fcbebcdc4f };
439- uint64_t ten = 0xa000000000000000 ;
440- for (int i = 0 ; i < num_pow10 && !compress; ++i) {
441- if (split_tables) {
442- data[num_pow10 - i - 1 ] = current.w2 ;
443- data[num_pow10 * 2 - i - 1 ] = current.w1 ;
444- } else {
445- data[i * 2 ] = current.w2 ;
446- data[i * 2 + 1 ] = current.w1 ;
447- }
448-
449- uint64_t h0 = umul128_hi64 (current.w0 , ten);
450- uint64_t h1 = umul128_hi64 (current.w1 , ten);
451-
452- uint64_t c0 = h0 + current.w1 * ten;
453- uint64_t c1 = (c0 < h0) + h1 + current.w2 * ten;
454- uint64_t c2 = (c1 < h1) + umul128_hi64 (current.w2 , ten); // dodgy carry
455-
456- // normalise
457- if (c2 >> 63 )
458- current = {c0, c1, c2};
459- else
460- current = {c0 << 1 , c1 << 1 | c0 >> 63 , c2 << 1 | c1 >> 63 };
461- }
462- }
463442};
464443alignas (64 ) constexpr pow10_significands_table pow10_significands;
465444
0 commit comments