Skip to content

Commit 2f72d99

Browse files
authored
add likely to hot path in non-simd sto-alg so "Branch Mispredict" goes down by 2% (#1034)
* add likely to hot path in non-simd sto-alg so "Branch Mispredict" goes down by 2% * add likely to happy path
1 parent fac8f62 commit 2f72d99

File tree

1 file changed

+47
-49
lines changed

1 file changed

+47
-49
lines changed

include/fast_io_core_impl/integers/sto/sto_contiguous.h

Lines changed: 47 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
571571
constexpr ::std::uint_least64_t mask{0x000000FF000000FF};
572572
constexpr ::std::uint_least64_t fullmask{baseval * 0x80};
573573

574-
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t))
574+
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) [[likely]]
575575
{
576576
::std::uint_least64_t val;
577577
::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t));
@@ -581,7 +581,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
581581
val = ::fast_io::little_endian(val);
582582
}
583583

584-
if (::std::uint_least64_t const cval{((val + first_bound) | (val - zero_lower_bound)) & fullmask}; cval)
584+
if (::std::uint_least64_t const cval{((val + first_bound) | (val - zero_lower_bound)) & fullmask}; cval) [[likely]]
585585
{
586586
auto ctrz_cval{::std::countr_zero(cval)};
587587
auto const valid_bits{ctrz_cval & -8};
@@ -636,7 +636,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
636636
val = ::fast_io::little_endian(val);
637637
}
638638

639-
if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval)
639+
if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[likely]]
640640
{
641641
auto ctrz_cval{::std::countr_zero(cval)};
642642
auto const valid_bits{ctrz_cval & -8};
@@ -686,7 +686,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
686686
constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base)};
687687
if constexpr (max_size >= u64_size_of_c16)
688688
{
689-
while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16)
689+
while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) [[likely]]
690690
{
691691
::std::uint_least64_t val;
692692
::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t));
@@ -696,7 +696,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
696696
val = ::fast_io::little_endian(val);
697697
}
698698

699-
if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval)
699+
if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) [[likely]]
700700
{
701701
auto ctrz_cval{::std::countr_zero(cval)};
702702
auto const valid_bits{ctrz_cval & -16};
@@ -751,7 +751,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
751751
constexpr ::std::uint_least64_t mask{0x000000FF000000FF};
752752
constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)};
753753
constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)};
754-
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t))
754+
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) [[likely]]
755755
{
756756
::std::uint_least64_t val;
757757
::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t));
@@ -767,7 +767,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
767767
~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) &
768768
((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) &
769769
0x8080808080808080};
770-
cval)
770+
cval) [[likely]]
771771
{
772772
auto ctrz_cval{::std::countr_zero(cval)};
773773
auto const valid_bits{ctrz_cval & -8};
@@ -824,14 +824,14 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
824824

825825
constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least32_t, base_char_type, 2>};
826826
constexpr ::std::uint_least32_t mask{0x000000FF};
827-
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t))
827+
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) [[likely]]
828828
{
829829
::std::uint_least32_t val;
830830
::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t));
831831

832832
val = ::fast_io::little_endian(val);
833833

834-
if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]]
834+
if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[likely]]
835835
{
836836
auto ctrz_cval{::std::countr_zero(cval)};
837837
auto const valid_bits{ctrz_cval & -8};
@@ -874,7 +874,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
874874
}
875875
}
876876

877-
for (; first != first_phase_last; ++first)
877+
for (; first != first_phase_last; ++first) [[likely]]
878878
{
879879
unsigned_char_type ch{static_cast<unsigned_char_type>(*first)};
880880
if (char_digit_to_literal<base, char_type>(ch)) [[unlikely]]
@@ -949,57 +949,57 @@ template <char8_t base, ::std::integral char_type>
949949
inline constexpr parse_result<char_type const *> scan_shbase_impl(char_type const *first,
950950
char_type const *last) noexcept
951951
{
952-
if (first == last || *first != char_literal_v<u8'0', char_type>)
952+
if (first == last || *first != char_literal_v<u8'0', char_type>) [[unlikely]]
953953
{
954954
return {first, parse_code::invalid};
955955
}
956-
if ((++first) == last)
956+
if ((++first) == last) [[unlikely]]
957957
{
958958
return {first, parse_code::invalid};
959959
}
960960
if constexpr (base == 2 || base == 3 || base == 16)
961961
{
962962
auto ch{*first};
963963
if ((ch != char_literal_v<(base == 2 ? u8'B' : (base == 3 ? u8't' : u8'X')), char_type>)&(
964-
ch != char_literal_v<(base == 2 ? u8'b' : (base == 3 ? u8't' : u8'x')), char_type>))
964+
ch != char_literal_v<(base == 2 ? u8'b' : (base == 3 ? u8't' : u8'x')), char_type>)) [[unlikely]]
965965
{
966966
return {first, parse_code::invalid};
967967
}
968968
++first;
969969
}
970970
else
971971
{
972-
if (*first != char_literal_v<u8'[', char_type>)
972+
if (*first != char_literal_v<u8'[', char_type>) [[unlikely]]
973973
{
974974
return {first, parse_code::invalid};
975975
}
976976
++first;
977-
if ((++first) == last)
977+
if ((++first) == last) [[unlikely]]
978978
{
979979
return {first, parse_code::invalid};
980980
}
981981
constexpr auto digit0{char_literal_v<u8'0' + (base < 10 ? base : base / 10), char_type>};
982-
if (*first != digit0)
982+
if (*first != digit0) [[unlikely]]
983983
{
984984
return {first, parse_code::invalid};
985985
}
986-
if ((++first) == last)
986+
if ((++first) == last) [[unlikely]]
987987
{
988988
return {first, parse_code::invalid};
989989
}
990990
if constexpr (10 < base)
991991
{
992992
constexpr auto digit1{char_literal_v<u8'0' + (base % 10), char_type>};
993-
if (*first != digit1)
993+
if (*first != digit1) [[unlikely]]
994994
{
995995
return {first, parse_code::invalid};
996996
}
997-
if ((++first) == last)
997+
if ((++first) == last) [[unlikely]]
998998
{
999999
return {first, parse_code::invalid};
10001000
}
10011001
}
1002-
if (*first != char_literal_v<u8']', char_type>)
1002+
if (*first != char_literal_v<u8']', char_type>) [[unlikely]]
10031003
{
10041004
return {first, parse_code::invalid};
10051005
}
@@ -1018,7 +1018,7 @@ scan_int_contiguous_none_space_part_define_impl(char_type const *first, char_typ
10181018
[[maybe_unused]] bool sign{};
10191019
if constexpr (my_signed_integral<T>)
10201020
{
1021-
if (first == last)
1021+
if (first == last) [[unlikely]]
10221022
{
10231023
return {first, parse_code::invalid};
10241024
}
@@ -1031,7 +1031,7 @@ scan_int_contiguous_none_space_part_define_impl(char_type const *first, char_typ
10311031
{
10321032
if constexpr (base == 8)
10331033
{
1034-
if (first == last || *first != char_literal_v<u8'0', char_type>)
1034+
if (first == last || *first != char_literal_v<u8'0', char_type>) [[unlikely]]
10351035
{
10361036
return {first, parse_code::invalid};
10371037
}
@@ -1040,7 +1040,7 @@ scan_int_contiguous_none_space_part_define_impl(char_type const *first, char_typ
10401040
else
10411041
{
10421042
auto phase_ret = scan_shbase_impl<base>(first, last);
1043-
if (phase_ret.code != ongoing_parse_code)
1043+
if (phase_ret.code != ongoing_parse_code) [[unlikely]]
10441044
{
10451045
return phase_ret;
10461046
}
@@ -1062,7 +1062,7 @@ scan_int_contiguous_none_space_part_define_impl(char_type const *first, char_typ
10621062
{
10631063
++first;
10641064
first = ::fast_io::details::find_none_zero_simd_impl(first, last);
1065-
if (first == last)
1065+
if (first == last) [[likely]]
10661066
{
10671067
t = 0;
10681068
return {first, parse_code::ok};
@@ -1071,7 +1071,7 @@ scan_int_contiguous_none_space_part_define_impl(char_type const *first, char_typ
10711071
else
10721072
{
10731073
++first;
1074-
if ((first == last) || (!char_is_digit<base, char_type>(static_cast<unsigned_char_type>(*first))))
1074+
if ((first == last) || (!char_is_digit<base, char_type>(static_cast<unsigned_char_type>(*first)))) [[likely]]
10751075
{
10761076
t = {};
10771077
return {first, parse_code::ok};
@@ -1104,7 +1104,7 @@ scan_int_contiguous_none_space_part_define_impl(char_type const *first, char_typ
11041104
if constexpr (smaller_than_uint64)
11051105
{
11061106
constexpr unsigned_type umax{static_cast<unsigned_type>(-1)};
1107-
if (temp > umax)
1107+
if (temp > umax) [[unlikely]]
11081108
{
11091109
return {it, parse_code::overflow};
11101110
}
@@ -1118,7 +1118,7 @@ scan_int_contiguous_none_space_part_define_impl(char_type const *first, char_typ
11181118
else [[unlikely]]
11191119
{
11201120
auto [it2, ec] = scan_int_contiguous_none_simd_space_part_define_impl<base>(first, last, res);
1121-
if (ec != parse_code::ok)
1121+
if (ec != parse_code::ok) [[unlikely]]
11221122
{
11231123
return {it2, ec};
11241124
}
@@ -1129,7 +1129,7 @@ scan_int_contiguous_none_space_part_define_impl(char_type const *first, char_typ
11291129
#endif
11301130
{
11311131
auto [it2, ec] = scan_int_contiguous_none_simd_space_part_define_impl<base>(first, last, res);
1132-
if (ec != parse_code::ok)
1132+
if (ec != parse_code::ok) [[unlikely]]
11331133
{
11341134
return {it2, ec};
11351135
}
@@ -1139,7 +1139,7 @@ scan_int_contiguous_none_space_part_define_impl(char_type const *first, char_typ
11391139
{
11401140
constexpr unsigned_type umax{static_cast<unsigned_type>(-1)};
11411141
constexpr unsigned_type imax{umax >> 1};
1142-
if (res > (static_cast<my_make_unsigned_t<T>>(imax) + sign))
1142+
if (res > (static_cast<my_make_unsigned_t<T>>(imax) + sign)) [[unlikely]]
11431143
{
11441144
return {it, parse_code::overflow};
11451145
}
@@ -1177,7 +1177,7 @@ inline constexpr parse_result<char_type const *> scan_int_contiguous_define_impl
11771177
{
11781178
if constexpr (base == 8)
11791179
{
1180-
if (first == last || *first != char_literal_v<u8'0', char_type>)
1180+
if (first == last || *first != char_literal_v<u8'0', char_type>) [[unlikely]]
11811181
{
11821182
return {first, parse_code::invalid};
11831183
}
@@ -1186,7 +1186,7 @@ inline constexpr parse_result<char_type const *> scan_int_contiguous_define_impl
11861186
else
11871187
{
11881188
auto phase_ret = scan_shbase_impl<base>(first, last);
1189-
if (phase_ret.code != ongoing_parse_code)
1189+
if (phase_ret.code != ongoing_parse_code) [[unlikely]]
11901190
{
11911191
return phase_ret;
11921192
}
@@ -1310,7 +1310,7 @@ sc_int_ctx_prefix_phase(::std::uint_least8_t &sz, char_type const *first, char_t
13101310
}
13111311
if constexpr (base == 8)
13121312
{
1313-
if (*first != char_literal_v<u8'0', char_type>)
1313+
if (*first != char_literal_v<u8'0', char_type>) [[unlikely]]
13141314
{
13151315
return {first, parse_code::invalid};
13161316
}
@@ -1320,7 +1320,7 @@ sc_int_ctx_prefix_phase(::std::uint_least8_t &sz, char_type const *first, char_t
13201320
::std::uint_least8_t size_cache{sz};
13211321
if (size_cache == 0)
13221322
{
1323-
if (*first != char_literal_v<u8'0', char_type>)
1323+
if (*first != char_literal_v<u8'0', char_type>) [[unlikely]]
13241324
{
13251325
return {first, parse_code::invalid};
13261326
}
@@ -1338,13 +1338,13 @@ sc_int_ctx_prefix_phase(::std::uint_least8_t &sz, char_type const *first, char_t
13381338
{
13391339
auto ch{*first};
13401340
if ((ch == char_literal_v<(base == 2 ? u8'B' : (base == 3 ? u8't' : u8'X')), char_type>) |
1341-
(ch == char_literal_v<(base == 2 ? u8'b' : (base == 3 ? u8't' : u8'x')), char_type>))
1341+
(ch == char_literal_v<(base == 2 ? u8'b' : (base == 3 ? u8't' : u8'x')), char_type>)) [[likely]]
13421342
{
13431343
sz = 0;
13441344
++first;
13451345
return {first, ongoing_parse_code};
13461346
}
1347-
else
1347+
else [[unlikely]]
13481348
{
13491349
return {first, parse_code::invalid};
13501350
}
@@ -1353,7 +1353,7 @@ sc_int_ctx_prefix_phase(::std::uint_least8_t &sz, char_type const *first, char_t
13531353
{
13541354
if (size_cache == 1)
13551355
{
1356-
if (*first != char_literal_v<u8'[', char_type>)
1356+
if (*first != char_literal_v<u8'[', char_type>) [[unlikely]]
13571357
{
13581358
return {first, parse_code::invalid};
13591359
}
@@ -1366,7 +1366,7 @@ sc_int_ctx_prefix_phase(::std::uint_least8_t &sz, char_type const *first, char_t
13661366
constexpr auto digit0{char_literal_v<u8'0' + (base < 10 ? base : base / 10), char_type>};
13671367
if (size_cache == 2)
13681368
{
1369-
if (*first != digit0)
1369+
if (*first != digit0) [[unlikely]]
13701370
{
13711371
return {first, parse_code::invalid};
13721372
}
@@ -1381,7 +1381,7 @@ sc_int_ctx_prefix_phase(::std::uint_least8_t &sz, char_type const *first, char_t
13811381
constexpr auto digit1{char_literal_v<u8'0' + (base % 10), char_type>};
13821382
if (size_cache == 3)
13831383
{
1384-
if (*first != digit1)
1384+
if (*first != digit1) [[unlikely]]
13851385
{
13861386
return {first, parse_code::invalid};
13871387
}
@@ -1395,7 +1395,7 @@ sc_int_ctx_prefix_phase(::std::uint_least8_t &sz, char_type const *first, char_t
13951395
constexpr ::std::uint_least8_t last_index{base < 10 ? 3 : 4};
13961396
if (size_cache == last_index)
13971397
{
1398-
if (*first != char_literal_v<u8']', char_type>)
1398+
if (*first != char_literal_v<u8']', char_type>) [[unlikely]]
13991399
{
14001400
return {first, parse_code::invalid};
14011401
}
@@ -1443,7 +1443,7 @@ inline constexpr parse_result<char_type const *> sc_int_ctx_zero_phase(scan_inte
14431443
}
14441444
return {first, parse_code::partial};
14451445
}
1446-
if (!char_is_digit<base, char_type>(static_cast<unsigned_char_type>(*first)))
1446+
if (!char_is_digit<base, char_type>(static_cast<unsigned_char_type>(*first))) [[likely]]
14471447
{
14481448
return {first, parse_code::ok};
14491449
}
@@ -1472,7 +1472,7 @@ inline constexpr parse_result<char_type const *> sc_int_ctx_digit_phase(State &s
14721472
st.integer_phase = scan_integral_context_phase::digit;
14731473
return {it, parse_code::partial};
14741474
}
1475-
if (st.size == 0)
1475+
if (st.size == 0) [[likely]]
14761476
{
14771477
t = {};
14781478
return {it, parse_code::ok};
@@ -1487,7 +1487,7 @@ inline constexpr parse_result<char_type const *> sc_int_ctx_digit_phase(State &s
14871487
st.integer_phase = scan_integral_context_phase::overflow;
14881488
return {it, parse_code::partial};
14891489
}
1490-
else
1490+
else [[unlikely]]
14911491
{
14921492
return {it, parse_code::overflow};
14931493
}
@@ -1504,7 +1504,7 @@ inline constexpr parse_result<char_type const *> sc_int_ctx_zero_invalid_phase(c
15041504
return {first, parse_code::partial};
15051505
}
15061506
++first;
1507-
if (!char_is_digit<base, char_type>(static_cast<unsigned_char_type>(*first)))
1507+
if (!char_is_digit<base, char_type>(static_cast<unsigned_char_type>(*first))) [[likely]]
15081508
{
15091509
return {first, parse_code::ok};
15101510
}
@@ -1554,7 +1554,7 @@ inline constexpr parse_result<char_type const *> scan_context_define_parse_impl(
15541554
if constexpr (!noskipws)
15551555
{
15561556
auto phase_ret = sc_int_ctx_space_phase(first, last);
1557-
if (phase_ret.code != ongoing_parse_code)
1557+
if (phase_ret.code != ongoing_parse_code) [[unlikely]]
15581558
{
15591559
return phase_ret;
15601560
}
@@ -1567,7 +1567,7 @@ inline constexpr parse_result<char_type const *> scan_context_define_parse_impl(
15671567
if constexpr (my_signed_integral<T>)
15681568
{
15691569
auto phase_ret = sc_int_ctx_sign_phase<true, false>(st, first, last);
1570-
if (phase_ret.code != ongoing_parse_code)
1570+
if (phase_ret.code != ongoing_parse_code) [[unlikely]]
15711571
{
15721572
return phase_ret;
15731573
}
@@ -1581,7 +1581,7 @@ inline constexpr parse_result<char_type const *> scan_context_define_parse_impl(
15811581
{
15821582
st.integer_phase = scan_integral_context_phase::prefix;
15831583
auto phase_ret = sc_int_ctx_prefix_phase<base>(st.size, first, last);
1584-
if (phase_ret.code != ongoing_parse_code)
1584+
if (phase_ret.code != ongoing_parse_code) [[unlikely]]
15851585
{
15861586
return phase_ret;
15871587
}
@@ -1673,9 +1673,7 @@ inline constexpr parse_code scan_context_eof_define_parse_impl(State &st, T &t)
16731673
}
16741674
}
16751675
case scan_integral_context_phase::digit:
1676-
return scan_int_contiguous_none_space_part_define_impl<base>(st.buffer.data(), st.buffer.data() + st.size,
1677-
t)
1678-
.code;
1676+
return scan_int_contiguous_none_space_part_define_impl<base>(st.buffer.data(), st.buffer.data() + st.size, t).code;
16791677
case scan_integral_context_phase::overflow:
16801678
return parse_code::overflow;
16811679
case scan_integral_context_phase::zero_skip:

0 commit comments

Comments
 (0)