Skip to content

Commit aa2b909

Browse files
committed
Reuse intermediate results
1 parent 3b81fed commit aa2b909

File tree

2 files changed

+55
-30
lines changed

2 files changed

+55
-30
lines changed

test/double-check.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ constexpr auto debias(int raw_exp) -> int {
4646

4747
inline auto verify(uint64_t bits, uint64_t bin_sig, int bin_exp, int raw_exp,
4848
bool& has_errors) -> bool {
49-
zmij::dec_fp actual = to_decimal_normal<double>(bin_sig, raw_exp, true);
49+
to_decimal_result actual = to_decimal_normal<double>(bin_sig, raw_exp, true);
5050

5151
double value;
5252
memcpy(&value, &bits, sizeof(double));

zmij.cc

Lines changed: 54 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -524,8 +524,8 @@ constexpr auto pack8(uint8_t a, uint8_t b, uint8_t c, uint8_t d, //
524524
// normals) and removes trailing zeros. The significant digits start
525525
// from buffer[1]. buffer[0] may contain '0' after this function if
526526
// the significand has length 16.
527-
auto write_significand17(char* buffer, uint64_t value,
528-
bool has17digits) noexcept -> char* {
527+
auto write_significand17(char* buffer, uint64_t value, bool has17digits,
528+
long long value_div10) noexcept -> char* {
529529
if (!ZMIJ_USE_NEON && !ZMIJ_USE_SSE) {
530530
char* start = buffer + 1;
531531
// Each digit is denoted by a letter so value is abbccddeeffgghhii.
@@ -612,8 +612,7 @@ auto write_significand17(char* buffer, uint64_t value,
612612
buffer += 16 - ((zeroes != 0 ? clz(zeroes) : 64) >> 2);
613613
return buffer - int(buffer - start == 1);
614614
#elif ZMIJ_USE_SSE
615-
uint64_t digits_16 =
616-
use_umul128_hi64 ? umul128_hi64(value, div10_sig64) : value / 10;
615+
uint64_t digits_16 = value_div10;
617616
uint32_t last_digit = value - digits_16 * 10;
618617

619618
// We always write 17 digits into the buffer, but the first one can be zero.
@@ -630,15 +629,15 @@ auto write_significand17(char* buffer, uint64_t value,
630629
__m128i neg10k = splat64(::neg10k);
631630
__m128i div100 = splat32(div100_sig);
632631
__m128i div10 = splat16((1 << 16) / 10 + 1);
633-
# if ZMIJ_USE_SSE4_1
632+
# if ZMIJ_USE_SSE4_1
634633
__m128i neg100 = splat32(::neg100);
635634
__m128i neg10 = splat16((1 << 8) - 10);
636635
__m128i bswap = __m128i{pack8(15, 14, 13, 12, 11, 10, 9, 8),
637636
pack8(7, 6, 5, 4, 3, 2, 1, 0)};
638-
# else
637+
# else
639638
__m128i hundred = splat32(100);
640639
__m128i moddiv10 = splat16(10 * (1 << 8) - 1);
641-
# endif
640+
# endif
642641
__m128i zeros = splat64(::zeros);
643642
} consts;
644643

@@ -693,7 +692,7 @@ auto write_significand17(char* buffer, uint64_t value,
693692

694693
_mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), digits);
695694
return buffer + ((last_digit != 0) ? 17 : len - (len == 1));
696-
#endif // ZMIJ_USE_SSE
695+
#endif // ZMIJ_USE_SSE
697696
}
698697

699698
// Writes a significand consisting of up to 9 decimal digits (7-9 for normals)
@@ -708,20 +707,40 @@ auto write_significand9(char* buffer, uint32_t value, bool has9digits) noexcept
708707
return buffer - int(buffer - start == 1);
709708
}
710709

710+
struct to_decimal_result {
711+
long long sig;
712+
int exp;
713+
static constexpr long long sig_div10 = 0;
714+
void set_div10(long long) {}
715+
};
716+
717+
struct to_decimal_result_sse {
718+
long long sig;
719+
int exp;
720+
long long sig_div10;
721+
void set_div10(long long value) { sig_div10 = value; }
722+
};
723+
724+
using to_decimal_result_t =
725+
std::conditional_t<ZMIJ_USE_SSE != 0, to_decimal_result_sse,
726+
to_decimal_result>;
727+
711728
template <int num_bits>
712-
auto normalize(zmij::dec_fp dec, bool subnormal) noexcept -> zmij::dec_fp {
729+
auto normalize(to_decimal_result_t dec, bool subnormal) noexcept
730+
-> to_decimal_result_t {
713731
if (!subnormal) [[ZMIJ_LIKELY]]
714732
return dec;
715733
while (dec.sig < (num_bits == 64 ? uint64_t(1e16) : uint64_t(1e8))) {
716734
dec.sig *= 10;
717735
--dec.exp;
718736
}
737+
dec.set_div10(dec.sig / 10);
719738
return dec;
720739
}
721740

722741
template <bool subnormal = false, typename UInt>
723742
auto to_decimal_schubfach(UInt bin_sig, int64_t bin_exp, bool regular) noexcept
724-
-> zmij::dec_fp {
743+
-> to_decimal_result_t {
725744
constexpr int num_bits = std::numeric_limits<UInt>::digits;
726745
int dec_exp = compute_dec_exp(bin_exp, regular);
727746
unsigned char exp_shift = compute_exp_shift<num_bits>(bin_exp, dec_exp);
@@ -745,9 +764,13 @@ auto to_decimal_schubfach(UInt bin_sig, int64_t bin_exp, bool regular) noexcept
745764

746765
// The idea of using a single shorter candidate is by Cassio Neri.
747766
// It is less or equal to the upper bound by construction.
748-
UInt shorter = 10 * ((upper >> bound_shift) / 10);
749-
if ((shorter << bound_shift) >= lower)
750-
return normalize<num_bits>({int64_t(shorter), dec_exp}, subnormal);
767+
long long div10 = (upper >> bound_shift) / 10;
768+
UInt shorter = div10 * 10;
769+
if ((shorter << bound_shift) >= lower) {
770+
to_decimal_result_t result = {int64_t(shorter), dec_exp};
771+
result.set_div10(div10);
772+
return normalize<num_bits>(result, subnormal);
773+
}
751774

752775
UInt scaled_sig =
753776
umulhi_inexact_to_odd(pow10.hi, pow10.lo, bin_sig_shifted << exp_shift);
@@ -761,15 +784,18 @@ auto to_decimal_schubfach(UInt bin_sig, int64_t bin_exp, bool regular) noexcept
761784
bool below_closer = cmp < 0 || (cmp == 0 && (longer_below & 1) == 0);
762785
bool below_in = (longer_below << bound_shift) >= lower;
763786
UInt dec_sig = (below_closer & below_in) ? longer_below : longer_above;
764-
return normalize<num_bits>({int64_t(dec_sig), dec_exp}, subnormal);
787+
to_decimal_result_t result = {int64_t(dec_sig), dec_exp};
788+
result.set_div10(dec_sig / 10);
789+
return normalize<num_bits>(result, subnormal);
765790
}
766791

767792
// Here be 🐉s.
768793
// Converts a binary FP number bin_sig * 2**bin_exp to the shortest decimal
769794
// representation, where bin_exp = raw_exp - exp_offset.
770795
template <typename Float, typename UInt>
771796
ZMIJ_INLINE auto to_decimal_normal(UInt bin_sig, int64_t raw_exp,
772-
bool regular) noexcept -> zmij::dec_fp {
797+
bool regular) noexcept
798+
-> to_decimal_result_t {
773799
using traits = float_traits<Float>;
774800
int64_t bin_exp = raw_exp - traits::exp_offset;
775801
constexpr int num_bits = std::numeric_limits<UInt>::digits;
@@ -799,16 +825,13 @@ ZMIJ_INLINE auto to_decimal_normal(UInt bin_sig, int64_t raw_exp,
799825
if (cmp == 0) [[ZMIJ_UNLIKELY]]
800826
break;
801827

802-
uint64_t digit;
803-
if (ZMIJ_USE_INT128) {
804-
// An optimization of integral % 10 by Dougall Johnson.
805-
// Relies on range calculation: (max_bin_sig << max_exp_shift) * max_u128.
806-
digit = integral - umul128_hi64(integral, div10_sig64) * 10;
807-
// or it narrows to 32-bit and doesn't use madd/msub
808-
ZMIJ_ASM(("" : "+r"(digit)));
809-
} else {
810-
digit = integral % 10;
811-
}
828+
// An optimization of integral % 10 by Dougall Johnson.
829+
// Relies on range calculation: (max_bin_sig << max_exp_shift) * max_u128.
830+
long long div10 =
831+
ZMIJ_USE_INT128 ? umul128_hi64(integral, div10_sig64) : integral / 10;
832+
uint64_t digit = integral - div10 * 10;
833+
// or it narrows to 32-bit and doesn't use madd/msub
834+
ZMIJ_ASM(("" : "+r"(digit)));
812835

813836
// Switch to a fixed-point representation with the least significant
814837
// integral digit in the upper bits and fractional digits in the lower bits.
@@ -863,7 +886,9 @@ ZMIJ_INLINE auto to_decimal_normal(UInt bin_sig, int64_t raw_exp,
863886
}
864887
shorter += round_up * 10;
865888
bool use_shorter = (scaled_sig_mod10 <= scaled_half_ulp) + round_up != 0;
866-
return {use_shorter ? shorter : longer, dec_exp};
889+
to_decimal_result_t result = {use_shorter ? shorter : longer, dec_exp};
890+
result.set_div10(div10 + use_shorter * round_up);
891+
return result;
867892
}
868893
return to_decimal_schubfach(bin_sig, bin_exp, regular);
869894
}
@@ -877,7 +902,7 @@ inline auto to_decimal(double value) noexcept -> dec_fp {
877902
auto bits = traits::to_bits(value);
878903
auto bin_exp = traits::get_exp(bits); // binary exponent
879904
auto bin_sig = traits::get_sig(bits); // binary significand
880-
dec_fp dec;
905+
to_decimal_result_t dec;
881906
if (bin_exp == 0 || bin_exp == traits::exp_mask) [[ZMIJ_UNLIKELY]] {
882907
if (bin_exp != 0) return {0, int(~0u >> 1)};
883908
if (bin_sig == 0) return {0, 0};
@@ -903,7 +928,7 @@ auto write(Float value, char* buffer) noexcept -> char* {
903928
*buffer = '-';
904929
buffer += traits::is_negative(bits);
905930

906-
dec_fp dec;
931+
to_decimal_result_t dec;
907932
if (bin_exp == 0 || bin_exp == traits::exp_mask) [[ZMIJ_UNLIKELY]] {
908933
if (bin_exp != 0) {
909934
memcpy(buffer, bin_sig == 0 ? "inf" : "nan", 4);
@@ -925,7 +950,7 @@ auto write(Float value, char* buffer) noexcept -> char* {
925950
if (traits::num_bits == 64) {
926951
bool has17digits = dec.sig >= uint64_t(1e16);
927952
dec_exp += traits::max_digits10 - 2 + has17digits;
928-
buffer = write_significand17(buffer, dec.sig, has17digits);
953+
buffer = write_significand17(buffer, dec.sig, has17digits, dec.sig_div10);
929954
} else {
930955
if (dec.sig < uint32_t(1e7)) [[ZMIJ_UNLIKELY]] {
931956
dec.sig *= 10;

0 commit comments

Comments
 (0)