Skip to content

Commit

Permalink
add likely to hot path in non-simd sto-alg so "Branch Mispredict" goe…
Browse files Browse the repository at this point in the history
…s down by 2%
  • Loading branch information
MacroModel committed Dec 17, 2024
1 parent 3a1fafb commit c5d0343
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions include/fast_io_core_impl/integers/sto/sto_contiguous.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
constexpr ::std::uint_least64_t mask{0x000000FF000000FF};
constexpr ::std::uint_least64_t fullmask{baseval * 0x80};

while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t))
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) [[likely]]
{
::std::uint_least64_t val;
::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t));
Expand All @@ -581,7 +581,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
val = ::fast_io::little_endian(val);
}

if (::std::uint_least64_t const cval{((val + first_bound) | (val - zero_lower_bound)) & fullmask}; cval)
if (::std::uint_least64_t const cval{((val + first_bound) | (val - zero_lower_bound)) & fullmask}; cval) [[likely]]
{
auto ctrz_cval{::std::countr_zero(cval)};
auto const valid_bits{ctrz_cval & -8};
Expand Down Expand Up @@ -636,7 +636,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
val = ::fast_io::little_endian(val);
}

if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval)
if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[likely]]
{
auto ctrz_cval{::std::countr_zero(cval)};
auto const valid_bits{ctrz_cval & -8};
Expand Down Expand Up @@ -686,7 +686,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base)};
if constexpr (max_size >= u64_size_of_c16)
{
while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16)
while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) [[likely]]
{
::std::uint_least64_t val;
::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t));
Expand All @@ -696,7 +696,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
val = ::fast_io::little_endian(val);
}

if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval)
if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) [[likely]]
{
auto ctrz_cval{::std::countr_zero(cval)};
auto const valid_bits{ctrz_cval & -16};
Expand Down Expand Up @@ -751,7 +751,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
constexpr ::std::uint_least64_t mask{0x000000FF000000FF};
constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)};
constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)};
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t))
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) [[likely]]
{
::std::uint_least64_t val;
::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t));
Expand All @@ -767,7 +767,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) &
((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) &
0x8080808080808080};
cval)
cval) [[likely]]
{
auto ctrz_cval{::std::countr_zero(cval)};
auto const valid_bits{ctrz_cval & -8};
Expand Down Expand Up @@ -824,14 +824,14 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi

constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least32_t, base_char_type, 2>};
constexpr ::std::uint_least32_t mask{0x000000FF};
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t))
while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) [[likely]]
{
::std::uint_least32_t val;
::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t));

val = ::fast_io::little_endian(val);

if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]]
if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[likely]]
{
auto ctrz_cval{::std::countr_zero(cval)};
auto const valid_bits{ctrz_cval & -8};
Expand Down Expand Up @@ -874,7 +874,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi
}
}

for (; first != first_phase_last; ++first)
for (; first != first_phase_last; ++first) [[likely]]
{
unsigned_char_type ch{static_cast<unsigned_char_type>(*first)};
if (char_digit_to_literal<base, char_type>(ch)) [[unlikely]]
Expand Down

0 comments on commit c5d0343

Please sign in to comment.