From c5d0343c1741070801d434140bdc1da261ef32a4 Mon Sep 17 00:00:00 2001 From: MacroModel Date: Tue, 17 Dec 2024 19:32:14 +0800 Subject: [PATCH] add likely to hot path in non-simd sto-alg so "Branch Mispredict" goes down by 2% --- .../integers/sto/sto_contiguous.h | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index 6fb25ef5..51653854 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -571,7 +571,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; constexpr ::std::uint_least64_t fullmask{baseval * 0x80}; - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) [[likely]] { ::std::uint_least64_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); @@ -581,7 +581,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi val = ::fast_io::little_endian(val); } - if (::std::uint_least64_t const cval{((val + first_bound) | (val - zero_lower_bound)) & fullmask}; cval) + if (::std::uint_least64_t const cval{((val + first_bound) | (val - zero_lower_bound)) & fullmask}; cval) [[likely]] { auto ctrz_cval{::std::countr_zero(cval)}; auto const valid_bits{ctrz_cval & -8}; @@ -636,7 +636,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi val = ::fast_io::little_endian(val); } - if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[likely]] { auto ctrz_cval{::std::countr_zero(cval)}; auto const valid_bits{ctrz_cval & -8}; @@ -686,7 +686,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base)}; if constexpr (max_size >= u64_size_of_c16) { - while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) + while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) [[likely]] { ::std::uint_least64_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); @@ -696,7 +696,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi val = ::fast_io::little_endian(val); } - if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) + if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) [[likely]] { auto ctrz_cval{::std::countr_zero(cval)}; auto const valid_bits{ctrz_cval & -16}; @@ -751,7 +751,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) [[likely]] { ::std::uint_least64_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); @@ -767,7 +767,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi ~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) & ((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) & 0x8080808080808080}; - cval) + cval) [[likely]] { auto ctrz_cval{::std::countr_zero(cval)}; auto const valid_bits{ctrz_cval & -8}; @@ -824,14 +824,14 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least32_t, base_char_type, 2>}; constexpr ::std::uint_least32_t mask{0x000000FF}; - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) [[likely]] { ::std::uint_least32_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); val = ::fast_io::little_endian(val); - if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]] + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[likely]] { auto ctrz_cval{::std::countr_zero(cval)}; auto const valid_bits{ctrz_cval & -8}; @@ -874,7 +874,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi } } - for (; first != first_phase_last; ++first) + for (; first != first_phase_last; ++first) [[likely]] { unsigned_char_type ch{static_cast(*first)}; if (char_digit_to_literal(ch)) [[unlikely]]