Skip to content

Commit f59cbcf

Browse files
committed
Reuse compressed logic for full table computation
1 parent e458b55 commit f59cbcf

File tree

1 file changed

+77
-98
lines changed

1 file changed

+77
-98
lines changed

zmij.cc

Lines changed: 77 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -342,78 +342,91 @@ constexpr auto floor_log2_pow10(int e) noexcept -> int {
342342
}
343343

344344
// 128-bit significands of powers of 10 rounded down.
345-
// Generation with 192-bit arithmetic and compression by Dougall Johnson.
346345
struct pow10_significands_table {
347346
static constexpr bool compress = ZMIJ_OPTIMIZE_SIZE != 0;
348347
static constexpr bool split_tables = !compress && ZMIJ_AARCH64 != 0;
349348
static constexpr int num_pow10 = 617;
350349
uint64_t data[compress ? 1 : num_pow10 * 2] = {};
351350

351+
// Computes the 128-bit significand of 10**i using method by Dougall Johnson.
352+
static constexpr auto compute(unsigned i) noexcept -> uint128 {
353+
constexpr uint64_t pow10s[28] = {
354+
0x8000000000000000, 0xa000000000000000, 0xc800000000000000,
355+
0xfa00000000000000, 0x9c40000000000000, 0xc350000000000000,
356+
0xf424000000000000, 0x9896800000000000, 0xbebc200000000000,
357+
0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000,
358+
0xe8d4a51000000000, 0x9184e72a00000000, 0xb5e620f480000000,
359+
0xe35fa931a0000000, 0x8e1bc9bf04000000, 0xb1a2bc2ec5000000,
360+
0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000,
361+
0xd8d726b7177a8000, 0x878678326eac9000, 0xa968163f0a57b400,
362+
0xd3c21bcecceda100, 0x84595161401484a0, 0xa56fa5b99019a5c8,
363+
0xcecb8f27f4200f3a,
364+
};
365+
366+
constexpr uint128 high_parts[23] = {
367+
{0xaf8e5410288e1b6f, 0x07ecf0ae5ee44dda},
368+
{0xb1442798f49ffb4a, 0x99cd11cfdf41779d},
369+
{0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15},
370+
{0xb4bca50b065abe63, 0x0fed077a756b53aa},
371+
{0xb67f6455292cbf08, 0x1a3bc84c17b1d543},
372+
{0xb84687c269ef3bfb, 0x3d5d514f40eea742},
373+
{0xba121a4650e4ddeb, 0x92f34d62616ce413},
374+
{0xbbe226efb628afea, 0x890489f70a55368c},
375+
{0xbdb6b8e905cb600f, 0x5400e987bbc1c921},
376+
{0xbf8fdb78849a5f96, 0xde98520472bdd034},
377+
{0xc16d9a0095928a27, 0x75b7053c0f178294},
378+
{0xc350000000000000, 0x0000000000000000},
379+
{0xc5371912364ce305, 0x6c28000000000000},
380+
{0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef6},
381+
{0xc913936dd571c84c, 0x03bc3a19cd1e38ea},
382+
{0xcb090c8001ab551c, 0x5cadf5bfd3072cc6},
383+
{0xcd036837130890a1, 0x36dba887c37a8c10},
384+
{0xcf02b2c21207ef2e, 0x94f967e45e03f4bc},
385+
{0xd106f86e69d785c7, 0xe13336d701beba52},
386+
{0xd31045a8341ca07c, 0x1ede48111209a051},
387+
{0xd51ea6fa85785631, 0x552a74227f3ea566},
388+
{0xd732290fbacaf133, 0xa97c177947ad4096},
389+
{0xd94ad8b1c7380874, 0x18375281ae7822bc},
390+
};
391+
392+
constexpr uint32_t fixups[20] = {
393+
0x05271b1f, 0x00000c20, 0x00003200, 0x12100020, 0x00000000,
394+
0x06000000, 0xc16409c0, 0xaf26700f, 0xeb987b07, 0x0000000d,
395+
0x00000000, 0x66fbfffe, 0xb74100ec, 0xa0669fe8, 0xedb21280,
396+
0x00000686, 0x0a021200, 0x29b89c20, 0x08bc0eda, 0x00000000};
397+
398+
uint64_t m = pow10s[(i + 11) % 28];
399+
uint128 h = high_parts[(i + 11) / 28];
400+
401+
uint64_t h1 = umul128_hi64(h.lo, m);
402+
403+
uint64_t c0 = h.lo * m;
404+
uint64_t c1 = h1 + h.hi * m;
405+
uint64_t c2 = (c1 < h1) + umul128_hi64(h.hi, m);
406+
407+
uint128 result = (c2 >> 63) != 0
408+
? uint128{c2, c1}
409+
: uint128{c2 << 1 | c1 >> 63, c1 << 1 | c0 >> 63};
410+
result.lo -= (fixups[i >> 5] >> (i & 31)) & 1;
411+
return result;
412+
}
413+
414+
constexpr pow10_significands_table() {
415+
for (int i = 0; i < num_pow10 && !compress; ++i) {
416+
uint128 result = compute(i);
417+
if (split_tables) {
418+
data[num_pow10 - i - 1] = result.hi;
419+
data[num_pow10 * 2 - i - 1] = result.lo;
420+
} else {
421+
data[i * 2] = result.hi;
422+
data[i * 2 + 1] = result.lo;
423+
}
424+
}
425+
}
426+
352427
ZMIJ_CONSTEXPR auto operator[](int dec_exp) const noexcept -> uint128 {
353428
constexpr int dec_exp_min = -292;
354-
if (compress) {
355-
// Compressed version using only 672 bytes of data.
356-
unsigned i = dec_exp - dec_exp_min;
357-
static const uint64_t pow10s[28] = {
358-
0x8000000000000000, 0xa000000000000000, 0xc800000000000000,
359-
0xfa00000000000000, 0x9c40000000000000, 0xc350000000000000,
360-
0xf424000000000000, 0x9896800000000000, 0xbebc200000000000,
361-
0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000,
362-
0xe8d4a51000000000, 0x9184e72a00000000, 0xb5e620f480000000,
363-
0xe35fa931a0000000, 0x8e1bc9bf04000000, 0xb1a2bc2ec5000000,
364-
0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000,
365-
0xd8d726b7177a8000, 0x878678326eac9000, 0xa968163f0a57b400,
366-
0xd3c21bcecceda100, 0x84595161401484a0, 0xa56fa5b99019a5c8,
367-
0xcecb8f27f4200f3a,
368-
};
369-
370-
static const uint128 high_parts[23] = {
371-
{0xaf8e5410288e1b6f, 0x07ecf0ae5ee44dda},
372-
{0xb1442798f49ffb4a, 0x99cd11cfdf41779d},
373-
{0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15},
374-
{0xb4bca50b065abe63, 0x0fed077a756b53aa},
375-
{0xb67f6455292cbf08, 0x1a3bc84c17b1d543},
376-
{0xb84687c269ef3bfb, 0x3d5d514f40eea742},
377-
{0xba121a4650e4ddeb, 0x92f34d62616ce413},
378-
{0xbbe226efb628afea, 0x890489f70a55368c},
379-
{0xbdb6b8e905cb600f, 0x5400e987bbc1c921},
380-
{0xbf8fdb78849a5f96, 0xde98520472bdd034},
381-
{0xc16d9a0095928a27, 0x75b7053c0f178294},
382-
{0xc350000000000000, 0x0000000000000000},
383-
{0xc5371912364ce305, 0x6c28000000000000},
384-
{0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef6},
385-
{0xc913936dd571c84c, 0x03bc3a19cd1e38ea},
386-
{0xcb090c8001ab551c, 0x5cadf5bfd3072cc6},
387-
{0xcd036837130890a1, 0x36dba887c37a8c10},
388-
{0xcf02b2c21207ef2e, 0x94f967e45e03f4bc},
389-
{0xd106f86e69d785c7, 0xe13336d701beba52},
390-
{0xd31045a8341ca07c, 0x1ede48111209a051},
391-
{0xd51ea6fa85785631, 0x552a74227f3ea566},
392-
{0xd732290fbacaf133, 0xa97c177947ad4096},
393-
{0xd94ad8b1c7380874, 0x18375281ae7822bc},
394-
};
395-
396-
uint32_t fixups[20] = {0x05271b1f, 0x00000c20, 0x00003200, 0x12100020,
397-
0x00000000, 0x06000000, 0xc16409c0, 0xaf26700f,
398-
0xeb987b07, 0x0000000d, 0x00000000, 0x66fbfffe,
399-
0xb74100ec, 0xa0669fe8, 0xedb21280, 0x00000686,
400-
0x0a021200, 0x29b89c20, 0x08bc0eda, 0x00000000};
401-
402-
uint64_t m = pow10s[(i + 11) % 28];
403-
uint128 h = high_parts[(i + 11) / 28];
404-
405-
uint64_t h1 = umul128_hi64(h.lo, m);
406-
407-
uint64_t c0 = h.lo * m;
408-
uint64_t c1 = h1 + h.hi * m;
409-
uint64_t c2 = (c1 < h1) + umul128_hi64(h.hi, m);
410-
411-
uint128 result = (c2 >> 63) != 0
412-
? uint128{c2, c1}
413-
: uint128{c2 << 1 | c1 >> 63, c1 << 1 | c0 >> 63};
414-
result.lo -= (fixups[i >> 5] >> (i & 31)) & 1;
415-
return result;
416-
}
429+
if (compress) return compute(dec_exp - dec_exp_min);
417430
if (!split_tables) {
418431
int index = (dec_exp - dec_exp_min) * 2;
419432
return {data[index], data[index + 1]};
@@ -426,40 +439,6 @@ struct pow10_significands_table {
426439
if (!is_constant_evaluated()) ZMIJ_ASM(volatile("" : "+r"(hi), "+r"(lo)));
427440
return {hi[-dec_exp], lo[-dec_exp]};
428441
}
429-
430-
constexpr pow10_significands_table() {
431-
struct uint192 {
432-
uint64_t w0, w1, w2; // w0 = least significant, w2 = most significant
433-
};
434-
435-
// First element, rounded up to cancel out rounding down in the
436-
// multiplication, and minimize significant bits.
437-
uint192 current = {0xe000000000000000, 0x25e8e89c13bb0f7a,
438-
0xff77b1fcbebcdc4f};
439-
uint64_t ten = 0xa000000000000000;
440-
for (int i = 0; i < num_pow10 && !compress; ++i) {
441-
if (split_tables) {
442-
data[num_pow10 - i - 1] = current.w2;
443-
data[num_pow10 * 2 - i - 1] = current.w1;
444-
} else {
445-
data[i * 2] = current.w2;
446-
data[i * 2 + 1] = current.w1;
447-
}
448-
449-
uint64_t h0 = umul128_hi64(current.w0, ten);
450-
uint64_t h1 = umul128_hi64(current.w1, ten);
451-
452-
uint64_t c0 = h0 + current.w1 * ten;
453-
uint64_t c1 = (c0 < h0) + h1 + current.w2 * ten;
454-
uint64_t c2 = (c1 < h1) + umul128_hi64(current.w2, ten); // dodgy carry
455-
456-
// normalise
457-
if (c2 >> 63)
458-
current = {c0, c1, c2};
459-
else
460-
current = {c0 << 1, c1 << 1 | c0 >> 63, c2 << 1 | c1 >> 63};
461-
}
462-
}
463442
};
464443
alignas(64) constexpr pow10_significands_table pow10_significands;
465444

0 commit comments

Comments
 (0)