From e60a6bc2ad99d80e02f3e618bcba2110e9ba240c Mon Sep 17 00:00:00 2001 From: Junekey Jeon Date: Tue, 12 Mar 2024 17:40:56 -0700 Subject: [PATCH] Improve compatibility (1) Replace std::uintXX_t by std::uint_leastXX_t, apply UINTXX_C when needed, and add XX-bit masking if necessary, removing the assumption of integers being XX-bits (but didn't touch the table entries; should fix the table generator for that) (2) Apply the change discussed in https://github.com/jk-jeon/dragonbox/discussions/57 (3) Remove remaining C++14/17 feature usages from to_chars. (4) Simplify dragonbox_to_chars.cpp --- include/dragonbox/dragonbox.h | 489 ++++++++------ include/dragonbox/dragonbox_to_chars.h | 139 ++-- source/dragonbox_to_chars.cpp | 859 +++++++++++++------------ 3 files changed, 825 insertions(+), 662 deletions(-) diff --git a/include/dragonbox/dragonbox.h b/include/dragonbox/dragonbox.h index 2f67d8e..3a7ae8d 100644 --- a/include/dragonbox/dragonbox.h +++ b/include/dragonbox/dragonbox.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023 Junekey Jeon +// Copyright 2020-2024 Junekey Jeon // // The contents of this file may be used under the terms of // the Apache License v2.0 with LLVM Exceptions. @@ -19,16 +19,33 @@ #ifndef JKJ_HEADER_DRAGONBOX #define JKJ_HEADER_DRAGONBOX -#include -#include -#include -#include -#include +// Attribute for storing static data into a dedicated place, e.g. flash memory. Every ODR-used static +// data declaration will be decorated with this macro. The users may define this macro, before including +// the library headers, into whatever they want. +#ifndef JKJ_STATIC_DATA_SECTION + #define JKJ_STATIC_DATA_SECTION +#else + #define JKJ_STATIC_DATA_SECTION_DEFINED 1 +#endif -#ifdef __has_include - #if __has_include() - #include +// To use the library with toolchains without standard C++ headers, the users may define this macro into +// their custom namespace which contains the defintions of all the standard C++ library features used in +// this header. (The list can be found below.) +#ifndef JKJ_STD_REPLACEMENT_NAMESPACE + #define JKJ_STD_REPLACEMENT_NAMESPACE std + #include + #include + #include + #include + #include + + #ifdef __has_include + #if __has_include() + #include + #endif #endif +#else + #define JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED 1 #endif //////////////////////////////////////////////////////////////////////////////////////// @@ -98,7 +115,13 @@ #endif // C++20 std::bit_cast -#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +#if JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED + #if JKJ_STD_REPLACEMENT_HAS_BIT_CAST + #define JKJ_HAS_BIT_CAST 1 + #else + #define JKJ_HAS_BIT_CAST 0 + #endif +#elif defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L #include #define JKJ_HAS_BIT_CAST 1 #else @@ -110,18 +133,34 @@ #define JKJ_IF_CONSTEVAL if consteval #define JKJ_IF_NOT_CONSTEVAL if !consteval #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 -#elif defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L - #define JKJ_IF_CONSTEVAL if (std::is_constant_evaluated()) - #define JKJ_IF_NOT_CONSTEVAL if (!std::is_constant_evaluated()) - #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 -#elif JKJ_HAS_IF_CONSTEXPR - #define JKJ_IF_CONSTEVAL if constexpr (false) - #define JKJ_IF_NOT_CONSTEVAL if constexpr (true) - #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 +#elif JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED + #if JKJ_STD_REPLACEMENT_HAS_IS_CONSTANT_EVALUATED + #define JKJ_IF_CONSTEVAL if (stdr::is_constant_evaluated()) + #define JKJ_IF_NOT_CONSTEVAL if (!stdr::is_constant_evaluated()) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 + #elif JKJ_HAS_IF_CONSTEXPR + #define JKJ_IF_CONSTEVAL if constexpr (false) + #define JKJ_IF_NOT_CONSTEVAL if constexpr (true) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 + #else + #define JKJ_IF_CONSTEVAL if (false) + #define JKJ_IF_NOT_CONSTEVAL if (true) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 + #endif #else - #define JKJ_IF_CONSTEVAL if (false) - #define JKJ_IF_NOT_CONSTEVAL if (true) - #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 + #if defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L + #define JKJ_IF_CONSTEVAL if (stdr::is_constant_evaluated()) + #define JKJ_IF_NOT_CONSTEVAL if (!stdr::is_constant_evaluated()) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 + #elif JKJ_HAS_IF_CONSTEXPR + #define JKJ_IF_CONSTEVAL if constexpr (false) + #define JKJ_IF_NOT_CONSTEVAL if constexpr (true) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 + #else + #define JKJ_IF_CONSTEVAL if (false) + #define JKJ_IF_NOT_CONSTEVAL if (true) + #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 + #endif #endif #if JKJ_CAN_BRANCH_ON_CONSTEVAL && JKJ_HAS_BIT_CAST @@ -157,30 +196,82 @@ namespace jkj { namespace dragonbox { //////////////////////////////////////////////////////////////////////////////////////// - // Some general utilities for C++11-compatibility + // The Compatibility layer for toolchains without standard C++ headers. + //////////////////////////////////////////////////////////////////////////////////////// + namespace detail { + namespace stdr { + // +#if JKJ_STD_REPLACEMENT_HAS_BIT_CAST + using JKJ_STD_REPLACEMENT_NAMESPACE::bit_cast; +#endif + + // + // We need assert() macro, but it is not namespaced anyway, so nothing to do here. + + // + using JKJ_STD_REPLACEMENT_NAMESPACE::int_least32_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least32_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least64_t; + // We need UINT32_C and UINT64_C macros too, but again there is nothing to do here. + + // + using JKJ_STD_REPLACEMENT_NAMESPACE::size_t; + using JKJ_STD_REPLACEMENT_NAMESPACE::memcpy; + + // + template + using numeric_limits = JKJ_STD_REPLACEMENT_NAMESPACE::numeric_limits; + + // + template + using enable_if = JKJ_STD_REPLACEMENT_NAMESPACE::enable_if; + template + using add_rvalue_reference = JKJ_STD_REPLACEMENT_NAMESPACE::add_rvalue_reference; + template + using conditional = JKJ_STD_REPLACEMENT_NAMESPACE::conditional; + template + using is_base_of = JKJ_STD_REPLACEMENT_NAMESPACE::is_base_of; +#if JKJ_STD_REPLACEMENT_HAS_IS_CONSTANT_EVALUATED + using JKJ_STD_REPLACEMENT_NAMESPACE::is_constant_evaluated; +#endif + template + using is_same = JKJ_STD_REPLACEMENT_NAMESPACE::is_same; +#if !JKJ_HAS_BIT_CAST + template + using is_trivially_copyable = JKJ_STD_REPLACEMENT_NAMESPACE::is_trivially_copyable; +#endif + template + using is_unsigned = JKJ_STD_REPLACEMENT_NAMESPACE::is_unsigned; + } + } + + + //////////////////////////////////////////////////////////////////////////////////////// + // Some general utilities for C++11-compatibility. //////////////////////////////////////////////////////////////////////////////////////// namespace detail { #if !JKJ_HAS_CONSTEXPR17 - template + template struct index_sequence {}; - template + template struct make_index_sequence_impl { using type = typename make_index_sequence_impl::type; }; - template + template struct make_index_sequence_impl { using type = index_sequence; }; - template + template using make_index_sequence = typename make_index_sequence_impl<0, N, void>::type; #endif + // Available since C++11, but including just for this is an overkill. template - typename std::add_rvalue_reference::type declval() noexcept; + typename stdr::add_rvalue_reference::type declval() noexcept; } @@ -190,25 +281,25 @@ namespace jkj { namespace detail { template struct physical_bits { - static constexpr std::size_t value = - sizeof(T) * std::numeric_limits::digits; + static constexpr stdr::size_t value = + sizeof(T) * stdr::numeric_limits::digits; }; template struct value_bits { - static constexpr std::size_t value = std::numeric_limits< - typename std::enable_if::value, T>::type>::digits; + static constexpr stdr::size_t value = stdr::numeric_limits< + typename stdr::enable_if::value, T>::type>::digits; }; template JKJ_CONSTEXPR20 To bit_cast(const From& from) { #if JKJ_HAS_BIT_CAST - return std::bit_cast(from); + return stdr::bit_cast(from); #else static_assert(sizeof(From) == sizeof(To), ""); - static_assert(std::is_trivially_copyable::value, ""); - static_assert(std::is_trivially_copyable::value, ""); + static_assert(stdr::is_trivially_copyable::value, ""); + static_assert(stdr::is_trivially_copyable::value, ""); To to; - std::memcpy(&to, &from, sizeof(To)); + stdr::memcpy(&to, &from, sizeof(To)); return to; #endif } @@ -243,7 +334,8 @@ namespace jkj { struct default_float_traits { // I don't know if there is a truly reliable way of detecting // IEEE-754 binary32/binary64 formats; I just did my best here. - static_assert(std::numeric_limits::is_iec559 && std::numeric_limits::radix == 2 && + static_assert(detail::stdr::numeric_limits::is_iec559 && + detail::stdr::numeric_limits::radix == 2 && (detail::physical_bits::value == 32 || detail::physical_bits::value == 64), "default_ieee754_traits only works for 32-bits or 64-bits types " @@ -253,13 +345,15 @@ namespace jkj { using type = T; // Refers to the format specification class. - using format = typename std::conditional::value == 32, - ieee754_binary32, ieee754_binary64>::type; + using format = typename detail::stdr::conditional::value == 32, + ieee754_binary32, ieee754_binary64>::type; // Defines an unsigned integer type that is large enough to carry a variable of type T. // Most of the operations will be done on this integer type. - using carrier_uint = typename std::conditional::value == 32, - std::uint32_t, std::uint64_t>::type; + using carrier_uint = + typename detail::stdr::conditional::value == 32, + detail::stdr::uint_least32_t, + detail::stdr::uint_least64_t>::type; static_assert(sizeof(carrier_uint) == sizeof(T), ""); // Number of bits in the above unsigned integer type. @@ -458,13 +552,11 @@ namespace jkj { namespace bits { // Most compilers should be able to optimize this into the ROR instruction. - inline JKJ_CONSTEXPR14 std::uint32_t rotr(std::uint32_t n, std::uint32_t r) noexcept { - r &= 31; - return (n >> r) | (n << (32 - r)); - } - inline JKJ_CONSTEXPR14 std::uint64_t rotr(std::uint64_t n, std::uint32_t r) noexcept { - r &= 63; - return (n >> r) | (n << (64 - r)); + template + JKJ_CONSTEXPR14 UInt rotr(UInt n, unsigned int r) noexcept { + static_assert(bit_width <= value_bits::value, "rotation bit width too large"); + r &= (bit_width - 1); + return (n >> r) | (n << (bit_width - r)); } } @@ -492,26 +584,31 @@ namespace jkj { struct uint128 { uint128() = default; - std::uint64_t high_; - std::uint64_t low_; + stdr::uint_least64_t high_; + stdr::uint_least64_t low_; - constexpr uint128(std::uint64_t high, std::uint64_t low) noexcept + constexpr uint128(stdr::uint_least64_t high, stdr::uint_least64_t low) noexcept : high_{high}, low_{low} {} - constexpr std::uint64_t high() const noexcept { return high_; } - constexpr std::uint64_t low() const noexcept { return low_; } + constexpr stdr::uint_least64_t high() const noexcept { return high_; } + constexpr stdr::uint_least64_t low() const noexcept { return low_; } - JKJ_CONSTEXPR20 uint128& operator+=(std::uint64_t n) & noexcept { + JKJ_CONSTEXPR20 uint128& operator+=(stdr::uint_least64_t n) & noexcept { auto const generic_impl = [&] { - auto sum = low_ + n; + auto sum = (low_ + n) & UINT64_C(0xffffffffffffffff); high_ += (sum < low_ ? 1 : 0); low_ = sum; }; + // To suppress warning. + static_cast(generic_impl); + JKJ_IF_CONSTEVAL { generic_impl(); return *this; } #if JKJ_HAS_BUILTIN(__builtin_addcll) + static_assert(stdr::is_same::value && + value_bits::value == 64); unsigned long long carry{}; low_ = __builtin_addcll(low_, n, 0, &carry); high_ = __builtin_addcll(high_, 0, carry, &carry); @@ -531,36 +628,41 @@ namespace jkj { } }; - inline JKJ_CONSTEXPR20 std::uint64_t umul64(std::uint32_t x, std::uint32_t y) noexcept { + inline JKJ_CONSTEXPR20 stdr::uint_least64_t umul64(stdr::uint_least32_t x, + stdr::uint_least32_t y) noexcept { #if defined(_MSC_VER) && defined(_M_IX86) JKJ_IF_NOT_CONSTEVAL { return __emulu(x, y); } #endif - return x * std::uint64_t(y); + return x * stdr::uint_least64_t(y); } // Get 128-bit result of multiplication of two 64-bit unsigned integers. - JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 umul128(std::uint64_t x, - std::uint64_t y) noexcept { + JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 + umul128(stdr::uint_least64_t x, stdr::uint_least64_t y) noexcept { auto const generic_impl = [&]() -> uint128 { - auto a = std::uint32_t(x >> 32); - auto b = std::uint32_t(x); - auto c = std::uint32_t(y >> 32); - auto d = std::uint32_t(y); + auto a = stdr::uint_least32_t(x >> 32); + auto b = stdr::uint_least32_t(x); + auto c = stdr::uint_least32_t(y >> 32); + auto d = stdr::uint_least32_t(y); auto ac = umul64(a, c); auto bc = umul64(b, c); auto ad = umul64(a, d); auto bd = umul64(b, d); - auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc); + auto intermediate = + (bd >> 32) + stdr::uint_least32_t(ad) + stdr::uint_least32_t(bc); return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), - (intermediate << 32) + std::uint32_t(bd)}; + (intermediate << 32) + stdr::uint_least32_t(bd)}; }; + // To suppress warning. + static_cast(generic_impl); + JKJ_IF_CONSTEVAL { return generic_impl(); } #if defined(__SIZEOF_INT128__) auto result = builtin_uint128_t(x) * builtin_uint128_t(y); - return {std::uint64_t(result >> 64), std::uint64_t(result)}; + return {stdr::uint_least64_t(result >> 64), stdr::uint_least64_t(result)}; #elif defined(_MSC_VER) && defined(_M_X64) uint128 result; result.low_ = _umul128(x, y, &result.high_); @@ -570,27 +672,31 @@ namespace jkj { #endif } - JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 std::uint64_t - umul128_upper64(std::uint64_t x, std::uint64_t y) noexcept { - auto const generic_impl = [&]() -> std::uint64_t { - auto a = std::uint32_t(x >> 32); - auto b = std::uint32_t(x); - auto c = std::uint32_t(y >> 32); - auto d = std::uint32_t(y); + JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 stdr::uint_least64_t + umul128_upper64(stdr::uint_least64_t x, stdr::uint_least64_t y) noexcept { + auto const generic_impl = [&]() -> stdr::uint_least64_t { + auto a = stdr::uint_least32_t(x >> 32); + auto b = stdr::uint_least32_t(x); + auto c = stdr::uint_least32_t(y >> 32); + auto d = stdr::uint_least32_t(y); auto ac = umul64(a, c); auto bc = umul64(b, c); auto ad = umul64(a, d); auto bd = umul64(b, d); - auto intermediate = (bd >> 32) + std::uint32_t(ad) + std::uint32_t(bc); + auto intermediate = + (bd >> 32) + stdr::uint_least32_t(ad) + stdr::uint_least32_t(bc); return ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32); }; + // To suppress warning. + static_cast(generic_impl); + JKJ_IF_CONSTEVAL { return generic_impl(); } #if defined(__SIZEOF_INT128__) auto result = builtin_uint128_t(x) * builtin_uint128_t(y); - return std::uint64_t(result >> 64); + return stdr::uint_least64_t(result >> 64); #elif defined(_MSC_VER) && defined(_M_X64) return __umulh(x, y); #else @@ -600,7 +706,7 @@ namespace jkj { // Get upper 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit // unsigned integer. - JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 umul192_upper128(std::uint64_t x, + JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 umul192_upper128(stdr::uint_least64_t x, uint128 y) noexcept { auto r = umul128(x, y.high()); r += umul128_upper64(x, y.low()); @@ -609,13 +715,13 @@ namespace jkj { // Get upper 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit // unsigned integer. - inline JKJ_CONSTEXPR20 std::uint64_t umul96_upper64(std::uint32_t x, - std::uint64_t y) noexcept { + inline JKJ_CONSTEXPR20 stdr::uint_least64_t + umul96_upper64(stdr::uint_least32_t x, stdr::uint_least64_t y) noexcept { #if defined(__SIZEOF_INT128__) || (defined(_MSC_VER) && defined(_M_X64)) - return umul128_upper64(std::uint64_t(x) << 32, y); + return umul128_upper64(stdr::uint_least64_t(x) << 32, y); #else - auto yh = std::uint32_t(y >> 32); - auto yl = std::uint32_t(y); + auto yh = stdr::uint_least32_t(y >> 32); + auto yl = stdr::uint_least32_t(y); auto xyh = umul64(x, yh); auto xyl = umul64(x, yl); @@ -626,17 +732,18 @@ namespace jkj { // Get lower 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit // unsigned integer. - JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 umul192_lower128(std::uint64_t x, + JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 umul192_lower128(stdr::uint_least64_t x, uint128 y) noexcept { auto high = x * y.high(); auto high_low = umul128(x, y.low()); - return {high + high_low.high(), high_low.low()}; + return {(high + high_low.high()) & UINT64_C(0xffffffffffffffff), high_low.low()}; } // Get lower 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit // unsigned integer. - constexpr std::uint64_t umul96_lower64(std::uint32_t x, std::uint64_t y) noexcept { - return x * y; + constexpr stdr::uint_least64_t umul96_lower64(stdr::uint_least32_t x, + stdr::uint_least64_t y) noexcept { + return (x * y) & UINT64_C(0xffffffffffffffff); } } @@ -680,21 +787,24 @@ namespace jkj { //////////////////////////////////////////////////////////////////////////////////////// namespace log { - static_assert((-1 >> 1) == -1, "right-shift for signed integers must be arithmetic"); + static_assert((stdr::int_least32_t(-1) >> 1) == stdr::int_least32_t(-1), + "right-shift for signed integers must be arithmetic"); // Compute floor(e * c - s). - enum class multiply : std::uint32_t {}; - enum class subtract : std::uint32_t {}; - enum class shift : std::size_t {}; - enum class min_exponent : std::int32_t {}; - enum class max_exponent : std::int32_t {}; + enum class multiply : stdr::uint_least32_t {}; + enum class subtract : stdr::uint_least32_t {}; + enum class shift : stdr::size_t {}; + enum class min_exponent : stdr::int_least32_t {}; + enum class max_exponent : stdr::int_least32_t {}; template constexpr int compute(int e) noexcept { #if JKJ_HAS_CONSTEXPR14 - assert(std::int32_t(e_min) <= e && e <= std::int32_t(e_max)); + assert(stdr::int_least32_t(e_min) <= e && e <= stdr::int_least32_t(e_max)); #endif - return int((std::int32_t(e) * std::int32_t(m) - std::int32_t(f)) >> std::size_t(k)); + return int( + (stdr::int_least32_t(e) * stdr::int_least32_t(m) - stdr::int_least32_t(f)) >> + stdr::size_t(k)); } // For constexpr computation. @@ -717,7 +827,7 @@ namespace jkj { static constexpr int floor_log10_pow2_max_exponent = 2620; constexpr int floor_log10_pow2(int e) noexcept { using namespace log; - return compute(e); } @@ -726,7 +836,7 @@ namespace jkj { static constexpr int floor_log2_pow10_max_exponent = 1233; constexpr int floor_log2_pow10(int e) noexcept { using namespace log; - return compute(e); } @@ -735,7 +845,7 @@ namespace jkj { static constexpr int floor_log10_pow2_minus_log10_4_over_3_max_exponent = 2936; constexpr int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept { using namespace log; - return compute(e); } @@ -744,7 +854,7 @@ namespace jkj { static constexpr int floor_log5_pow2_max_exponent = 1831; constexpr int floor_log5_pow2(int e) noexcept { using namespace log; - return compute(e); } @@ -753,7 +863,7 @@ namespace jkj { static constexpr int floor_log5_pow2_minus_log5_3_max_exponent = 2427; constexpr int floor_log5_pow2_minus_log5_3(int e) noexcept { using namespace log; - return compute(e); } @@ -773,26 +883,28 @@ namespace jkj { template <> struct divide_by_pow10_info<1> { - static constexpr std::uint32_t magic_number = 6554; + static constexpr stdr::uint_least32_t magic_number = 6554; static constexpr int shift_amount = 16; }; template <> struct divide_by_pow10_info<2> { - static constexpr std::uint32_t magic_number = 656; + static constexpr stdr::uint_least32_t magic_number = 656; static constexpr int shift_amount = 16; }; template - JKJ_CONSTEXPR14 bool check_divisibility_and_divide_by_pow10(std::uint32_t& n) noexcept { + JKJ_CONSTEXPR14 bool + check_divisibility_and_divide_by_pow10(stdr::uint_least32_t& n) noexcept { // Make sure the computation for max_n does not overflow. static_assert(N + 1 <= log::floor_log10_pow2(31), ""); - assert(n <= compute_power(std::uint32_t(10))); + assert(n <= compute_power(stdr::uint_least32_t(10))); using info = divide_by_pow10_info; n *= info::magic_number; - constexpr auto mask = std::uint32_t(std::uint32_t(1) << info::shift_amount) - 1; + constexpr auto mask = + stdr::uint_least32_t(stdr::uint_least32_t(1) << info::shift_amount) - 1; bool result = ((n & mask) < info::magic_number); n >>= info::shift_amount; @@ -802,10 +914,11 @@ namespace jkj { // Compute floor(n / 10^N) for small n and N. // Precondition: n <= 10^(N+1) template - JKJ_CONSTEXPR14 std::uint32_t small_division_by_pow10(std::uint32_t n) noexcept { + JKJ_CONSTEXPR14 stdr::uint_least32_t + small_division_by_pow10(stdr::uint_least32_t n) noexcept { // Make sure the computation for max_n does not overflow. static_assert(N + 1 <= log::floor_log10_pow2(31), ""); - assert(n <= compute_power(std::uint32_t(10))); + assert(n <= compute_power(stdr::uint_least32_t(10))); return (n * divide_by_pow10_info::magic_number) >> divide_by_pow10_info::shift_amount; @@ -822,14 +935,14 @@ namespace jkj { // "n / 100", but for some reason MSVC generates an inefficient code // (mul + mov for no apparent reason, instead of single imul), // so we does this manually. - JKJ_IF_CONSTEXPR(std::is_same::value && N == 2) { - return std::uint32_t(wuint::umul64(n, std::uint32_t(1374389535)) >> 37); + JKJ_IF_CONSTEXPR(stdr::is_same::value && N == 2) { + return stdr::uint_least32_t(wuint::umul64(n, UINT32_C(1374389535)) >> 37); } // Specialize for 64-bit division by 1000. // Ensure that the correctness condition is met. - else JKJ_IF_CONSTEXPR(std::is_same::value && N == 3 && - n_max <= std::uint64_t(15534100272597517998ull)) { - return wuint::umul128_upper64(n, std::uint64_t(2361183241434822607ull)) >> 7; + else JKJ_IF_CONSTEXPR(stdr::is_same::value && N == 3 && + n_max <= UINT64_C(15534100272597517998)) { + return wuint::umul128_upper64(n, UINT64_C(2361183241434822607)) >> 7; } else { constexpr auto divisor = compute_power(UInt(10)); @@ -925,11 +1038,11 @@ namespace jkj { template struct cache_holder { - using cache_entry_type = std::uint64_t; + using cache_entry_type = stdr::uint_least64_t; static constexpr int cache_bits = 64; static constexpr int min_k = -31; static constexpr int max_k = 46; - static constexpr cache_entry_type cache[max_k - min_k + 1] = { + static constexpr cache_entry_type cache[max_k - min_k + 1] JKJ_STATIC_DATA_SECTION = { 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, 0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb, 0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28, 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb, @@ -963,7 +1076,7 @@ namespace jkj { static constexpr int cache_bits = 128; static constexpr int min_k = -292; static constexpr int max_k = 326; - static constexpr cache_entry_type cache[max_k - min_k + 1] = { + static constexpr cache_entry_type cache[max_k - min_k + 1] JKJ_STATIC_DATA_SECTION = { {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0x9faacf3df73609b1, 0x77b191618c54e9ad}, {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e}, {0x9becce62836ac577, 0x4ee367f9430aec33}, {0xc2e801fb244576d5, 0x229c41f793cda740}, @@ -1285,7 +1398,7 @@ namespace jkj { template struct compressed_cache_detail { static constexpr int compression_ratio = 27; - static constexpr std::size_t compressed_table_size = + static constexpr stdr::size_t compressed_table_size = (cache_holder::max_k - cache_holder::min_k + compression_ratio) / compression_ratio; @@ -1294,39 +1407,39 @@ namespace jkj { wuint::uint128 table[compressed_table_size]; }; struct pow5_holder_t { - std::uint64_t table[compression_ratio]; + stdr::uint_least64_t table[compression_ratio]; }; #if JKJ_HAS_CONSTEXPR17 - static constexpr cache_holder_t cache = [] { + static constexpr cache_holder_t cache JKJ_STATIC_DATA_SECTION = [] { cache_holder_t res{}; - for (std::size_t i = 0; i < compressed_table_size; ++i) { + for (stdr::size_t i = 0; i < compressed_table_size; ++i) { res.table[i] = cache_holder::cache[i * compression_ratio]; } return res; }(); - static constexpr pow5_holder_t pow5 = [] { + static constexpr pow5_holder_t pow5 JKJ_STATIC_DATA_SECTION = [] { pow5_holder_t res{}; - std::uint64_t p = 1; - for (std::size_t i = 0; i < compression_ratio; ++i) { + stdr::uint_least64_t p = 1; + for (stdr::size_t i = 0; i < compression_ratio; ++i) { res.table[i] = p; p *= 5; } return res; }(); #else - template + template static constexpr cache_holder_t make_cache_table(index_sequence) { return {cache_holder::cache[indices * compression_ratio]...}; } - static constexpr cache_holder_t cache = + static constexpr cache_holder_t cache JKJ_STATIC_DATA_SECTION = make_cache_table(make_index_sequence{}); - template + template static constexpr pow5_holder_t make_pow5_table(index_sequence) { - return {compute_power(std::uint64_t(5))...}; + return {compute_power(stdr::uint_least64_t(5))...}; } - static constexpr pow5_holder_t pow5 = + static constexpr pow5_holder_t pow5 JKJ_STATIC_DATA_SECTION = make_pow5_table(make_index_sequence{}); #endif }; @@ -1859,7 +1972,7 @@ namespace jkj { assert(k >= cache_holder::min_k && k <= cache_holder::max_k); #endif - return cache_holder::cache[std::size_t( + return cache_holder::cache[stdr::size_t( k - cache_holder::min_k)]; } }; @@ -1880,9 +1993,9 @@ namespace jkj { static JKJ_CONSTEXPR20 cache_holder::cache_entry_type get_cache(int k) noexcept { // Compute the base index. - auto const cache_index = - int(std::uint32_t(k - cache_holder::min_k) / - compressed_cache_detail<>::compression_ratio); + auto const cache_index = int( + stdr::uint_least32_t(k - cache_holder::min_k) / + compressed_cache_detail<>::compression_ratio); auto const kb = cache_index * compressed_cache_detail<>::compression_ratio + cache_holder::min_k; @@ -1908,14 +2021,17 @@ namespace jkj { recovered_cache += middle_low.high(); - auto const high_to_middle = recovered_cache.high() << (64 - alpha); - auto const middle_to_low = recovered_cache.low() << (64 - alpha); + auto const high_to_middle = + (recovered_cache.high() << (64 - alpha)) & + UINT64_C(0xffffffffffffffff); + auto const middle_to_low = (recovered_cache.low() << (64 - alpha)) & + UINT64_C(0xffffffffffffffff); recovered_cache = wuint::uint128{ (recovered_cache.low() >> alpha) | high_to_middle, ((middle_low.low() >> alpha) | middle_to_low)}; - assert(recovered_cache.low() + 1 != 0); + assert(recovered_cache.low() != UINT64_C(0xffffffffffffffff)); recovered_cache = {recovered_cache.high(), recovered_cache.low() + 1}; @@ -2020,7 +2136,7 @@ namespace jkj { using format::exponent_bias; using format::decimal_digits; - static constexpr int kappa = std::is_same::value ? 1 : 2; + static constexpr int kappa = stdr::is_same::value ? 1 : 2; static_assert(kappa >= 1, ""); static_assert(carrier_bits >= significand_bits + 2 + log::floor_log2_pow10(kappa + 1), ""); @@ -2116,8 +2232,8 @@ namespace jkj { // Step 2: Try larger divisor; remove trailing zeros if necessary ////////////////////////////////////////////////////////////////////// - constexpr auto big_divisor = compute_power(std::uint32_t(10)); - constexpr auto small_divisor = compute_power(std::uint32_t(10)); + constexpr auto big_divisor = compute_power(stdr::uint_least32_t(10)); + constexpr auto small_divisor = compute_power(stdr::uint_least32_t(10)); // Using an upper bound on zi, we might be able to optimize the division // better than the compiler; we are computing zi / big_divisor here. @@ -2125,13 +2241,14 @@ namespace jkj { div::divide_by_pow10(z_result.integer_part); - auto r = std::uint32_t(z_result.integer_part - big_divisor * decimal_significand); + auto r = + stdr::uint_least32_t(z_result.integer_part - big_divisor * decimal_significand); do { if (r < deltai) { // Exclude the right endpoint if necessary. - if (r == 0 && - (z_result.is_integer & !interval_type.include_right_endpoint())) { + if ((r | !z_result.is_integer | interval_type.include_right_endpoint()) == + 0) { JKJ_IF_CONSTEXPR( BinaryToDecimalRoundingPolicy::tag == policy_impl::binary_to_decimal_rounding::tag_t::do_not_care) { @@ -2318,7 +2435,7 @@ namespace jkj { // and 29711844 * 2^-81 // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17 // for binary32. - JKJ_IF_CONSTEXPR(std::is_same::value) { + JKJ_IF_CONSTEXPR(stdr::is_same::value) { if (binary_exponent <= -80) { x_result.is_integer = false; } @@ -2332,7 +2449,7 @@ namespace jkj { // Step 2: Try larger divisor; remove trailing zeros if necessary ////////////////////////////////////////////////////////////////////// - constexpr auto big_divisor = compute_power(std::uint32_t(10)); + constexpr auto big_divisor = compute_power(stdr::uint_least32_t(10)); // Using an upper bound on xi, we might be able to optimize the division // better than the compiler; we are computing xi / big_divisor here. @@ -2340,7 +2457,8 @@ namespace jkj { div::divide_by_pow10(x_result.integer_part); - auto r = std::uint32_t(x_result.integer_part - big_divisor * decimal_significand); + auto r = + stdr::uint_least32_t(x_result.integer_part - big_divisor * decimal_significand); if (r != 0) { ++decimal_significand; @@ -2409,7 +2527,7 @@ namespace jkj { // Step 2: Try larger divisor; remove trailing zeros if necessary ////////////////////////////////////////////////////////////////////// - constexpr auto big_divisor = compute_power(std::uint32_t(10)); + constexpr auto big_divisor = compute_power(stdr::uint_least32_t(10)); // Using an upper bound on zi, we might be able to optimize the division better // than the compiler; we are computing zi / big_divisor here. @@ -2417,7 +2535,7 @@ namespace jkj { div::divide_by_pow10(zi); - auto const r = std::uint32_t(zi - big_divisor * decimal_significand); + auto const r = stdr::uint_least32_t(zi - big_divisor * decimal_significand); do { if (r > deltai) { @@ -2453,14 +2571,14 @@ namespace jkj { remove_trailing_zeros(carrier_uint& n) noexcept { assert(n != 0); - JKJ_IF_CONSTEXPR(std::is_same::value) { - constexpr auto mod_inv_5 = std::uint32_t(0xcccccccd); - constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5; + JKJ_IF_CONSTEXPR(stdr::is_same::value) { + constexpr auto mod_inv_5 = UINT32_C(0xcccccccd); + constexpr auto mod_inv_25 = (mod_inv_5 * mod_inv_5) & UINT32_C(0xffffffff); int s = 0; while (true) { - auto q = bits::rotr(n * mod_inv_25, 2); - if (q <= std::numeric_limits::max() / 100) { + auto q = bits::rotr<32>((n * mod_inv_25) & UINT32_C(0xffffffff), 2); + if (q <= UINT32_C(0xffffffff) / 100) { n = q; s += 2; } @@ -2468,8 +2586,8 @@ namespace jkj { break; } } - auto q = bits::rotr(n * mod_inv_5, 1); - if (q <= std::numeric_limits::max() / 10) { + auto q = bits::rotr<32>((n * mod_inv_5) & UINT32_C(0xffffffff), 1); + if (q <= UINT32_C(0xffffffff) / 10) { n = q; s |= 1; } @@ -2478,7 +2596,7 @@ namespace jkj { } else { #if JKJ_HAS_IF_CONSTEXPR - static_assert(std::is_same::value, ""); + static_assert(stdr::is_same::value, ""); #endif // Divide by 10^8 and reduce to 32-bits if divisible. @@ -2486,22 +2604,22 @@ namespace jkj { // n is at most of 16 digits. // This magic number is ceil(2^90 / 10^8). - constexpr auto magic_number = std::uint64_t(12379400392853802749ull); + constexpr auto magic_number = UINT64_C(12379400392853802749); auto nm = wuint::umul128(n, magic_number); // Is n is divisible by 10^8? - if ((nm.high() & ((std::uint64_t(1) << (90 - 64)) - 1)) == 0 && + if ((nm.high() & ((stdr::uint_least64_t(1) << (90 - 64)) - 1)) == 0 && nm.low() < magic_number) { // If yes, work with the quotient. - auto n32 = std::uint32_t(nm.high() >> (90 - 64)); + auto n32 = stdr::uint_least32_t(nm.high() >> (90 - 64)); - constexpr auto mod_inv_5 = std::uint32_t(0xcccccccd); - constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5; + constexpr auto mod_inv_5 = UINT32_C(0xcccccccd); + constexpr auto mod_inv_25 = (mod_inv_5 * mod_inv_5) & UINT32_C(0xffffffff); int s = 8; while (true) { - auto q = bits::rotr(n32 * mod_inv_25, 2); - if (q <= std::numeric_limits::max() / 100) { + auto q = bits::rotr<32>((n32 * mod_inv_25) & UINT32_C(0xffffffff), 2); + if (q <= UINT32_C(0xffffffff) / 100) { n32 = q; s += 2; } @@ -2509,8 +2627,8 @@ namespace jkj { break; } } - auto q = bits::rotr(n32 * mod_inv_5, 1); - if (q <= std::numeric_limits::max() / 10) { + auto q = bits::rotr<32>((n32 * mod_inv_5) & UINT32_C(0xffffffff), 1); + if (q <= UINT32_C(0xffffffff) / 10) { n32 = q; s |= 1; } @@ -2520,13 +2638,14 @@ namespace jkj { } // If n is not divisible by 10^8, work with n itself. - constexpr auto mod_inv_5 = std::uint64_t(0xcccccccccccccccd); - constexpr auto mod_inv_25 = mod_inv_5 * mod_inv_5; + constexpr auto mod_inv_5 = UINT64_C(0xcccccccccccccccd); + constexpr auto mod_inv_25 = + (mod_inv_5 * mod_inv_5) & UINT64_C(0xffffffffffffffff); int s = 0; while (true) { - auto q = bits::rotr(n * mod_inv_25, 2); - if (q <= std::numeric_limits::max() / 100) { + auto q = bits::rotr<64>((n * mod_inv_25) & UINT64_C(0xffffffffffffffff), 2); + if (q <= UINT64_C(0xffffffffffffffff) / 100) { n = q; s += 2; } @@ -2534,8 +2653,8 @@ namespace jkj { break; } } - auto q = bits::rotr(n * mod_inv_5, 1); - if (q <= std::numeric_limits::max() / 10) { + auto q = bits::rotr<64>((n * mod_inv_5) & UINT64_C(0xffffffffffffffff), 1); + if (q <= UINT64_C(0xffffffffffffffff) / 10) { n = q; s |= 1; } @@ -2552,9 +2671,9 @@ namespace jkj { return {carrier_uint(r >> 32), carrier_uint(r) == 0}; } - static constexpr std::uint32_t compute_delta(cache_entry_type const& cache, - int beta) noexcept { - return std::uint32_t(cache >> (cache_bits - 1 - beta)); + static constexpr stdr::uint_least32_t compute_delta(cache_entry_type const& cache, + int beta) noexcept { + return stdr::uint_least32_t(cache >> (cache_bits - 1 - beta)); } static JKJ_CONSTEXPR20 compute_mul_parity_result compute_mul_parity( @@ -2563,7 +2682,8 @@ namespace jkj { assert(beta < 64); auto r = wuint::umul96_lower64(two_f, cache); - return {((r >> (64 - beta)) & 1) != 0, std::uint32_t(r >> (32 - beta)) == 0}; + return {((r >> (64 - beta)) & 1) != 0, + (UINT32_C(0xffffffff) & (r >> (32 - beta))) == 0}; } static constexpr carrier_uint @@ -2596,9 +2716,9 @@ namespace jkj { return {r.high(), r.low() == 0}; } - static constexpr std::uint32_t compute_delta(cache_entry_type const& cache, - int beta) noexcept { - return std::uint32_t(cache.high() >> (carrier_bits - 1 - beta)); + static constexpr stdr::uint_least32_t compute_delta(cache_entry_type const& cache, + int beta) noexcept { + return stdr::uint_least32_t(cache.high() >> (carrier_bits - 1 - beta)); } static JKJ_CONSTEXPR20 compute_mul_parity_result compute_mul_parity( @@ -2608,7 +2728,8 @@ namespace jkj { auto r = wuint::umul192_lower128(two_f, cache); return {((r.high() >> (64 - beta)) & 1) != 0, - ((r.high() << beta) | (r.low() >> (64 - beta))) == 0}; + (((r.high() << beta) & UINT64_C(0xffffffffffffffff)) | + (r.low() >> (64 - beta))) == 0}; } static constexpr carrier_uint @@ -2708,9 +2829,9 @@ namespace jkj { template struct get_found_policy_pair_impl { - using type = typename std::conditional< - std::is_base_of::value, - typename std::conditional< + using type = typename stdr::conditional< + stdr::is_base_of::value, + typename stdr::conditional< FoundPolicyInfo::found_info == policy_found_info::not_found, typename get_found_policy_pair_impl< found_policy_pair, @@ -2741,7 +2862,7 @@ namespace jkj { constexpr bool check_policy_validity( Policy, base_default_pair_list) { - return std::is_base_of::value || + return stdr::is_base_of::value || check_policy_validity( Policy{}, base_default_pair_list{}); } @@ -2955,8 +3076,8 @@ namespace jkj { // 10^-308. This is indeed of the shortest length, and it is the unique // one closest to the true value among valid representations of the same // length. - static_assert(std::is_same::value || - std::is_same::value, + static_assert(stdr::is_same::value || + stdr::is_same::value, ""); if (two_fc == 0) { @@ -3093,5 +3214,15 @@ namespace jkj { #undef JKJ_HAS_CONSTEXPR17 #undef JKJ_CONSTEXPR14 #undef JKJ_HAS_CONSTEXPR14 +#if JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED + #undef JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED +#else + #undef JKJ_STD_REPLACEMENT_NAMESPACE +#endif +#if JKJ_STATIC_DATA_SECTION_DEFINED + #undef JKJ_STATIC_DATA_SECTION_DEFINED +#else + #undef JKJ_STATIC_DATA_SECTION +#endif #endif diff --git a/include/dragonbox/dragonbox_to_chars.h b/include/dragonbox/dragonbox_to_chars.h index 403c915..5cf5bfe 100644 --- a/include/dragonbox/dragonbox_to_chars.h +++ b/include/dragonbox/dragonbox_to_chars.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022 Junekey Jeon +// Copyright 2020-2024 Junekey Jeon // // The contents of this file may be used under the terms of // the Apache License v2.0 with LLVM Exceptions. @@ -37,89 +37,90 @@ #define JKJ_INLINE_VARIABLE static constexpr #endif -namespace jkj::dragonbox { - namespace to_chars_detail { - template - extern char* to_chars(typename FloatTraits::carrier_uint significand, int exponent, - char* buffer) noexcept; +namespace jkj { + namespace dragonbox { + namespace detail { + template + extern char* to_chars(typename FloatTraits::carrier_uint significand, int exponent, + char* buffer) noexcept; - // Avoid needless ABI overhead incurred by tag dispatch. - template - char* to_chars_n_impl(float_bits br, char* buffer) noexcept { - auto const exponent_bits = br.extract_exponent_bits(); - auto const s = br.remove_exponent_bits(exponent_bits); + // Avoid needless ABI overhead incurred by tag dispatch. + template + char* to_chars_n_impl(float_bits br, char* buffer) noexcept { + auto const exponent_bits = br.extract_exponent_bits(); + auto const s = br.remove_exponent_bits(exponent_bits); - if (br.is_finite(exponent_bits)) { - if (s.is_negative()) { - *buffer = '-'; - ++buffer; - } - if (br.is_nonzero()) { - auto result = to_decimal( - s, exponent_bits, policy::sign::ignore, policy::trailing_zero::ignore, - typename PolicyHolder::decimal_to_binary_rounding_policy{}, - typename PolicyHolder::binary_to_decimal_rounding_policy{}, - typename PolicyHolder::cache_policy{}); - return to_chars_detail::to_chars(result.significand, - result.exponent, buffer); - } - else { - std::memcpy(buffer, "0E0", 3); - return buffer + 3; - } - } - else { - if (s.has_all_zero_significand_bits()) { + if (br.is_finite(exponent_bits)) { if (s.is_negative()) { *buffer = '-'; ++buffer; } - std::memcpy(buffer, "Infinity", 8); - return buffer + 8; + if (br.is_nonzero()) { + auto result = to_decimal( + s, exponent_bits, policy::sign::ignore, policy::trailing_zero::ignore, + typename PolicyHolder::decimal_to_binary_rounding_policy{}, + typename PolicyHolder::binary_to_decimal_rounding_policy{}, + typename PolicyHolder::cache_policy{}); + return to_chars(result.significand, result.exponent, + buffer); + } + else { + stdr::memcpy(buffer, "0E0", 3); + return buffer + 3; + } } else { - std::memcpy(buffer, "NaN", 3); - return buffer + 3; + if (s.has_all_zero_significand_bits()) { + if (s.is_negative()) { + *buffer = '-'; + ++buffer; + } + stdr::memcpy(buffer, "Infinity", 8); + return buffer + 8; + } + else { + stdr::memcpy(buffer, "NaN", 3); + return buffer + 3; + } } } } - } - // Returns the next-to-end position - template , class... Policies> - char* to_chars_n(Float x, char* buffer, Policies... policies) noexcept { - using namespace jkj::dragonbox::detail::policy_impl; - using policy_holder = decltype(make_policy_holder( - base_default_pair_list, - base_default_pair, - base_default_pair>{}, - policies...)); + // Returns the next-to-end position + template , class... Policies> + char* to_chars_n(Float x, char* buffer, Policies... policies) noexcept { + using namespace jkj::dragonbox::detail::policy_impl; + using policy_holder = decltype(make_policy_holder( + base_default_pair_list, + base_default_pair, + base_default_pair>{}, + policies...)); - return to_chars_detail::to_chars_n_impl(float_bits(x), - buffer); - } + return detail::to_chars_n_impl(float_bits(x), buffer); + } - // Null-terminate and bypass the return value of fp_to_chars_n - template , class... Policies> - char* to_chars(Float x, char* buffer, Policies... policies) noexcept { - auto ptr = to_chars_n(x, buffer, policies...); - *ptr = '\0'; - return ptr; - } + // Null-terminate and bypass the return value of fp_to_chars_n + template , class... Policies> + char* to_chars(Float x, char* buffer, Policies... policies) noexcept { + auto ptr = to_chars_n(x, buffer, policies...); + *ptr = '\0'; + return ptr; + } - // Maximum required buffer size (excluding null-terminator) - template - JKJ_INLINE_VARIABLE std::size_t max_output_string_length = - std::is_same::value - ? - // sign(1) + significand(9) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(2) - (1 + 9 + 1 + 1 + 1 + 2) - : - // format == ieee754_format::binary64 - // sign(1) + significand(17) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(3) - (1 + 17 + 1 + 1 + 1 + 3); + // Maximum required buffer size (excluding null-terminator) + template + JKJ_INLINE_VARIABLE detail::stdr::size_t max_output_string_length = + detail::stdr::is_same::value + ? + // sign(1) + significand(9) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(2) + (1 + 9 + 1 + 1 + 1 + 2) + : + // format == ieee754_format::binary64 + // sign(1) + significand(17) + decimal_point(1) + exp_marker(1) + exp_sign(1) + exp(3) + (1 + 17 + 1 + 1 + 1 + 3); + } } #undef JKJ_INLINE_VARIABLE diff --git a/source/dragonbox_to_chars.cpp b/source/dragonbox_to_chars.cpp index 390f7d6..b59a166 100644 --- a/source/dragonbox_to_chars.cpp +++ b/source/dragonbox_to_chars.cpp @@ -1,4 +1,4 @@ -// Copyright 2020-2022 Junekey Jeon +// Copyright 2020-2024 Junekey Jeon // // The contents of this file may be used under the terms of // the Apache License v2.0 with LLVM Exceptions. @@ -18,6 +18,27 @@ #include "dragonbox/dragonbox_to_chars.h" +#ifndef JKJ_STATIC_DATA_SECTION + #define JKJ_STATIC_DATA_SECTION +#endif + +// C++17 if constexpr +#if defined(__cpp_if_constexpr) && __cpp_if_constexpr >= 201606L + #define JKJ_HAS_IF_CONSTEXPR 1 +#elif __cplusplus >= 201703L + #define JKJ_HAS_IF_CONSTEXPR 1 +#elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L + #define JKJ_HAS_IF_CONSTEXPR 1 +#else + #define JKJ_HAS_IF_CONSTEXPR 0 +#endif + +#if JKJ_HAS_IF_CONSTEXPR + #define JKJ_IF_CONSTEXPR if constexpr +#else + #define JKJ_IF_CONSTEXPR if +#endif + #if defined(__GNUC__) || defined(__clang__) #define JKJ_FORCEINLINE inline __attribute__((always_inline)) #elif defined(_MSC_VER) @@ -26,490 +47,500 @@ #define JKJ_FORCEINLINE inline #endif -namespace jkj::dragonbox { - namespace to_chars_detail { - // These "//"'s are to prevent clang-format to ruin this nice alignment. - // Thanks to reddit user u/mcmcc: - // https://www.reddit.com/r/cpp/comments/so3wx9/dragonbox_110_is_released_a_fast_floattostring/hw8z26r/?context=3 - static constexpr char radix_100_table[] = { - '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', // - '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', // - '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', // - '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', // - '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', // - '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', // - '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', // - '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', // - '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', // - '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', // - '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', // - '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', // - '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', // - '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', // - '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', // - '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', // - '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', // - '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', // - '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', // - '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' // - }; - static constexpr char radix_100_head_table[] = { - '0', '.', '1', '.', '2', '.', '3', '.', '4', '.', // - '5', '.', '6', '.', '7', '.', '8', '.', '9', '.', // - '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', // - '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', // - '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', // - '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', // - '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', // - '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', // - '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', // - '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', // - '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', // - '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', // - '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', // - '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', // - '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', // - '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', // - '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', // - '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', // - '9', '.', '9', '.', '9', '.', '9', '.', '9', '.', // - '9', '.', '9', '.', '9', '.', '9', '.', '9', '.' // - }; - - static void print_1_digit(std::uint32_t n, char* buffer) noexcept { - if constexpr (('0' & 0xf) == 0) { - *buffer = char('0' | n); +namespace jkj { + namespace dragonbox { + namespace detail { + // These "//"'s are to prevent clang-format to ruin this nice alignment. + // Thanks to reddit user u/mcmcc: + // https://www.reddit.com/r/cpp/comments/so3wx9/dragonbox_110_is_released_a_fast_floattostring/hw8z26r/?context=3 + static constexpr char radix_100_table[] JKJ_STATIC_DATA_SECTION = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', // + '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', // + '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', // + '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', // + '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', // + '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', // + '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', // + '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', // + '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', // + '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', // + '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', // + '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', // + '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', // + '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', // + '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', // + '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', // + '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', // + '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', // + '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', // + '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' // + }; + static constexpr char radix_100_head_table[] JKJ_STATIC_DATA_SECTION = { + '0', '.', '1', '.', '2', '.', '3', '.', '4', '.', // + '5', '.', '6', '.', '7', '.', '8', '.', '9', '.', // + '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', // + '1', '.', '1', '.', '1', '.', '1', '.', '1', '.', // + '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', // + '2', '.', '2', '.', '2', '.', '2', '.', '2', '.', // + '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', // + '3', '.', '3', '.', '3', '.', '3', '.', '3', '.', // + '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', // + '4', '.', '4', '.', '4', '.', '4', '.', '4', '.', // + '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', // + '5', '.', '5', '.', '5', '.', '5', '.', '5', '.', // + '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', // + '6', '.', '6', '.', '6', '.', '6', '.', '6', '.', // + '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', // + '7', '.', '7', '.', '7', '.', '7', '.', '7', '.', // + '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', // + '8', '.', '8', '.', '8', '.', '8', '.', '8', '.', // + '9', '.', '9', '.', '9', '.', '9', '.', '9', '.', // + '9', '.', '9', '.', '9', '.', '9', '.', '9', '.' // + }; + + static void print_1_digit(int n, char* buffer) noexcept { + JKJ_IF_CONSTEXPR(('0' & 0xf) == 0) { + *buffer = char('0' | n); + } + else { + *buffer = char('0' + n); + } } - else { - *buffer = char('0' + n); + + static void print_2_digits(int n, char* buffer) noexcept { + stdr::memcpy(buffer, radix_100_table + n * 2, 2); } - } - static void print_2_digits(std::uint32_t n, char* buffer) noexcept { - std::memcpy(buffer, radix_100_table + n * 2, 2); - } + // These digit generation routines are inspired by James Anhalt's itoa algorithm: + // https://github.com/jeaiii/itoa + // The main idea is for given n, find y such that floor(10^k * y / 2^32) = n holds, + // where k is an appropriate integer depending on the length of n. + // For example, if n = 1234567, we set k = 6. In this case, we have + // floor(y / 2^32) = 1, + // floor(10^2 * ((10^0 * y) mod 2^32) / 2^32) = 23, + // floor(10^2 * ((10^2 * y) mod 2^32) / 2^32) = 45, and + // floor(10^2 * ((10^4 * y) mod 2^32) / 2^32) = 67. + // See https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/ for more explanation. + + JKJ_FORCEINLINE static void print_9_digits(stdr::uint_least32_t s32, int& exponent, + char*& buffer) noexcept { + // -- IEEE-754 binary32 + // Since we do not cut trailing zeros in advance, s32 must be of 6~9 digits + // unless the original input was subnormal. + // In particular, when it is of 9 digits it shouldn't have any trailing zeros. + // -- IEEE-754 binary64 + // In this case, s32 must be of 7~9 digits unless the input is subnormal, + // and it shouldn't have any trailing zeros if it is of 9 digits. + if (s32 >= UINT32_C(100000000)) { + // 9 digits. + // 1441151882 = ceil(2^57 / 1'0000'0000) + 1 + auto prod = s32 * UINT64_C(1441151882); + prod >>= 25; + stdr::memcpy(buffer, radix_100_head_table + int(prod >> 32) * 2, 2); - // These digit generation routines are inspired by James Anhalt's itoa algorithm: - // https://github.com/jeaiii/itoa - // The main idea is for given n, find y such that floor(10^k * y / 2^32) = n holds, - // where k is an appropriate integer depending on the length of n. - // For example, if n = 1234567, we set k = 6. In this case, we have - // floor(y / 2^32) = 1, - // floor(10^2 * ((10^0 * y) mod 2^32) / 2^32) = 23, - // floor(10^2 * ((10^2 * y) mod 2^32) / 2^32) = 45, and - // floor(10^2 * ((10^4 * y) mod 2^32) / 2^32) = 67. - // See https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/ for more explanation. - - JKJ_FORCEINLINE static void print_9_digits(std::uint32_t s32, int& exponent, - char*& buffer) noexcept { - // -- IEEE-754 binary32 - // Since we do not cut trailing zeros in advance, s32 must be of 6~9 digits - // unless the original input was subnormal. - // In particular, when it is of 9 digits it shouldn't have any trailing zeros. - // -- IEEE-754 binary64 - // In this case, s32 must be of 7~9 digits unless the input is subnormal, - // and it shouldn't have any trailing zeros if it is of 9 digits. - if (s32 >= 1'0000'0000) { - // 9 digits. - // 1441151882 = ceil(2^57 / 1'0000'0000) + 1 - auto prod = s32 * std::uint64_t(1441151882); - prod >>= 25; - std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2); - - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 2); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 4); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 6); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 8); - - exponent += 8; - buffer += 10; - } - else if (s32 >= 100'0000) { - // 7 or 8 digits. - // 281474978 = ceil(2^48 / 100'0000) + 1 - auto prod = s32 * std::uint64_t(281474978); - prod >>= 16; - auto const head_digits = std::uint32_t(prod >> 32); - // If s32 is of 8 digits, increase the exponent by 7. - // Otherwise, increase it by 6. - exponent += (6 + unsigned(head_digits >= 10)); - - // Write the first digit and the decimal point. - std::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); - // This third character may be overwritten later but we don't care. - buffer[2] = radix_100_table[head_digits * 2 + 1]; - - // Remaining 6 digits are all zero? - if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) { - // The number of characters actually need to be written is: - // 1, if only the first digit is nonzero, which means that either s32 is of 7 - // digits or it is of 8 digits but the second digit is zero, or - // 3, otherwise. - // Note that buffer[2] is never '0' if s32 is of 7 digits, because the input is - // never zero. - buffer += (1 + (unsigned(head_digits >= 10) & unsigned(buffer[2] > '0')) * 2); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 2); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 4); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 6); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 8); + + exponent += 8; + buffer += 10; } - else { - // At least one of the remaining 6 digits are nonzero. - // After this adjustment, now the first destination becomes buffer + 2. - buffer += unsigned(head_digits >= 10); + else if (s32 >= UINT32_C(1000000)) { + // 7 or 8 digits. + // 281474978 = ceil(2^48 / 100'0000) + 1 + auto prod = s32 * UINT64_C(281474978); + prod >>= 16; + auto const head_digits = int(prod >> 32); + // If s32 is of 8 digits, increase the exponent by 7. + // Otherwise, increase it by 6. + exponent += (6 + int(head_digits >= 10)); + + // Write the first digit and the decimal point. + stdr::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); + // This third character may be overwritten later but we don't care. + buffer[2] = radix_100_table[head_digits * 2 + 1]; + + // Remaining 6 digits are all zero? + if ((prod & UINT32_C(0xffffffff)) <= + stdr::uint_least32_t((stdr::uint_least64_t(1) << 32) / UINT32_C(1000000))) { + // The number of characters actually need to be written is: + // 1, if only the first digit is nonzero, which means that either s32 is of 7 + // digits or it is of 8 digits but the second digit is zero, or + // 3, otherwise. + // Note that buffer[2] is never '0' if s32 is of 7 digits, because the input is + // never zero. + buffer += (1 + (int(head_digits >= 10) & int(buffer[2] > '0')) * 2); + } + else { + // At least one of the remaining 6 digits are nonzero. + // After this adjustment, now the first destination becomes buffer + 2. + buffer += int(head_digits >= 10); + + // Obtain the next two digits. + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 2); + + // Remaining 4 digits are all zero? + if ((prod & UINT32_C(0xffffffff)) <= + stdr::uint_least32_t((stdr::uint_least64_t(1) << 32) / 10000)) { + buffer += (3 + int(buffer[3] > '0')); + } + else { + // At least one of the remaining 4 digits are nonzero. + + // Obtain the next two digits. + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 4); + + // Remaining 2 digits are all zero? + if ((prod & UINT32_C(0xffffffff)) <= + stdr::uint_least32_t((stdr::uint_least64_t(1) << 32) / 100)) { + buffer += (5 + int(buffer[5] > '0')); + } + else { + // Obtain the last two digits. + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 6); + + buffer += (7 + int(buffer[7] > '0')); + } + } + } + } + else if (s32 >= 1'0000) { + // 5 or 6 digits. + // 429497 = ceil(2^32 / 1'0000) + auto prod = s32 * UINT64_C(429497); + auto const head_digits = int(prod >> 32); - // Obtain the next two digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 2); + // If s32 is of 6 digits, increase the exponent by 5. + // Otherwise, increase it by 4. + exponent += (4 + int(head_digits >= 10)); + + // Write the first digit and the decimal point. + stdr::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); + // This third character may be overwritten later but we don't care. + buffer[2] = radix_100_table[head_digits * 2 + 1]; // Remaining 4 digits are all zero? - if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) { - buffer += (3 + unsigned(buffer[3] > '0')); + if ((prod & UINT32_C(0xffffffff)) <= + stdr::uint_least32_t((stdr::uint_least64_t(1) << 32) / 10000)) { + // The number of characters actually written is 1 or 3, similarly to the case of + // 7 or 8 digits. + buffer += (1 + (int(head_digits >= 10) & int(buffer[2] > '0')) * 2); } else { // At least one of the remaining 4 digits are nonzero. + // After this adjustment, now the first destination becomes buffer + 2. + buffer += int(head_digits >= 10); // Obtain the next two digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 4); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 2); // Remaining 2 digits are all zero? - if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) { - buffer += (5 + unsigned(buffer[5] > '0')); + if ((prod & UINT32_C(0xffffffff)) <= + stdr::uint_least32_t((stdr::uint_least64_t(1) << 32) / 100)) { + buffer += (3 + int(buffer[3] > '0')); } else { // Obtain the last two digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 6); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 4); - buffer += (7 + unsigned(buffer[7] > '0')); + buffer += (5 + int(buffer[5] > '0')); } } } - } - else if (s32 >= 1'0000) { - // 5 or 6 digits. - // 429497 = ceil(2^32 / 1'0000) - auto prod = s32 * std::uint64_t(429497); - auto const head_digits = std::uint32_t(prod >> 32); - - // If s32 is of 6 digits, increase the exponent by 5. - // Otherwise, increase it by 4. - exponent += (4 + unsigned(head_digits >= 10)); - - // Write the first digit and the decimal point. - std::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); - // This third character may be overwritten later but we don't care. - buffer[2] = radix_100_table[head_digits * 2 + 1]; - - // Remaining 4 digits are all zero? - if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) { - // The number of characters actually written is 1 or 3, similarly to the case of - // 7 or 8 digits. - buffer += (1 + (unsigned(head_digits >= 10) & unsigned(buffer[2] > '0')) * 2); - } - else { - // At least one of the remaining 4 digits are nonzero. - // After this adjustment, now the first destination becomes buffer + 2. - buffer += unsigned(head_digits >= 10); + else if (s32 >= 100) { + // 3 or 4 digits. + // 42949673 = ceil(2^32 / 100) + auto prod = s32 * UINT64_C(42949673); + auto const head_digits = int(prod >> 32); + + // If s32 is of 4 digits, increase the exponent by 3. + // Otherwise, increase it by 2. + exponent += (2 + int(head_digits >= 10)); - // Obtain the next two digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 2); + // Write the first digit and the decimal point. + stdr::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); + // This third character may be overwritten later but we don't care. + buffer[2] = radix_100_table[head_digits * 2 + 1]; // Remaining 2 digits are all zero? - if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) { - buffer += (3 + unsigned(buffer[3] > '0')); + if ((prod & UINT32_C(0xffffffff)) <= + stdr::uint_least32_t((stdr::uint_least64_t(1) << 32) / 100)) { + // The number of characters actually written is 1 or 3, similarly to the case of + // 7 or 8 digits. + buffer += (1 + (int(head_digits >= 10) & int(buffer[2] > '0')) * 2); } else { + // At least one of the remaining 2 digits are nonzero. + // After this adjustment, now the first destination becomes buffer + 2. + buffer += int(head_digits >= 10); + // Obtain the last two digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 4); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 2); - buffer += (5 + unsigned(buffer[5] > '0')); + buffer += (3 + int(buffer[3] > '0')); } } - } - else if (s32 >= 100) { - // 3 or 4 digits. - // 42949673 = ceil(2^32 / 100) - auto prod = s32 * std::uint64_t(42949673); - auto const head_digits = std::uint32_t(prod >> 32); - - // If s32 is of 4 digits, increase the exponent by 3. - // Otherwise, increase it by 2. - exponent += (2 + int(head_digits >= 10)); - - // Write the first digit and the decimal point. - std::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); - // This third character may be overwritten later but we don't care. - buffer[2] = radix_100_table[head_digits * 2 + 1]; - - // Remaining 2 digits are all zero? - if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100)) { - // The number of characters actually written is 1 or 3, similarly to the case of - // 7 or 8 digits. - buffer += (1 + (unsigned(head_digits >= 10) & unsigned(buffer[2] > '0')) * 2); - } else { - // At least one of the remaining 2 digits are nonzero. - // After this adjustment, now the first destination becomes buffer + 2. - buffer += unsigned(head_digits >= 10); + // 1 or 2 digits. + // If s32 is of 2 digits, increase the exponent by 1. + exponent += int(s32 >= 10); - // Obtain the last two digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 2); + // Write the first digit and the decimal point. + stdr::memcpy(buffer, radix_100_head_table + s32 * 2, 2); + // This third character may be overwritten later but we don't care. + buffer[2] = radix_100_table[s32 * 2 + 1]; - buffer += (3 + unsigned(buffer[3] > '0')); + // The number of characters actually written is 1 or 3, similarly to the case of + // 7 or 8 digits. + buffer += (1 + (int(s32 >= 10) & int(buffer[2] > '0')) * 2); } } - else { - // 1 or 2 digits. - // If s32 is of 2 digits, increase the exponent by 1. - exponent += int(s32 >= 10); - - // Write the first digit and the decimal point. - std::memcpy(buffer, radix_100_head_table + s32 * 2, 2); - // This third character may be overwritten later but we don't care. - buffer[2] = radix_100_table[s32 * 2 + 1]; - - // The number of characters actually written is 1 or 3, similarly to the case of - // 7 or 8 digits. - buffer += (1 + (unsigned(s32 >= 10) & unsigned(buffer[2] > '0')) * 2); - } - } - - template <> - char* to_chars>(std::uint32_t s32, int exponent, - char* buffer) noexcept { - // Print significand. - print_9_digits(s32, exponent, buffer); - - // Print exponent and return - if (exponent < 0) { - std::memcpy(buffer, "E-", 2); - buffer += 2; - exponent = -exponent; - } - else { - buffer[0] = 'E'; - buffer += 1; - } - - if (exponent >= 10) { - print_2_digits(std::uint32_t(exponent), buffer); - buffer += 2; - } - else { - print_1_digit(std::uint32_t(exponent), buffer); - buffer += 1; - } - return buffer; - } + template <> + char* to_chars>(stdr::uint_least32_t s32, int exponent, + char* buffer) noexcept { + // Print significand. + print_9_digits(s32, exponent, buffer); + + // Print exponent and return + if (exponent < 0) { + stdr::memcpy(buffer, "E-", 2); + buffer += 2; + exponent = -exponent; + } + else { + buffer[0] = 'E'; + buffer += 1; + } - template <> - char* to_chars>(std::uint64_t const significand, - int exponent, char* buffer) noexcept { - // Print significand by decomposing it into a 9-digit block and a 8-digit block. - std::uint32_t first_block, second_block; - bool no_second_block; - - if (significand >= 1'0000'0000) { - first_block = std::uint32_t(significand / 1'0000'0000); - second_block = std::uint32_t(significand) - first_block * 1'0000'0000; - exponent += 8; - no_second_block = (second_block == 0); - } - else { - first_block = std::uint32_t(significand); - no_second_block = true; - } + if (exponent >= 10) { + print_2_digits(exponent, buffer); + buffer += 2; + } + else { + print_1_digit(exponent, buffer); + buffer += 1; + } - if (no_second_block) { - print_9_digits(first_block, exponent, buffer); + return buffer; } - else { - // We proceed similarly to print_9_digits(), but since we do not need to remove - // trailing zeros, the procedure is a bit simpler. - if (first_block >= 1'0000'0000) { - // The input is of 17 digits, thus there should be no trailing zero at all. - // The first block is of 9 digits. - // 1441151882 = ceil(2^57 / 1'0000'0000) + 1 - auto prod = first_block * std::uint64_t(1441151882); - prod >>= 25; - std::memcpy(buffer, radix_100_head_table + std::uint32_t(prod >> 32) * 2, 2); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 2); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 4); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 6); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 8); - - // The second block is of 8 digits. - // 281474978 = ceil(2^48 / 100'0000) + 1 - prod = second_block * std::uint64_t(281474978); - prod >>= 16; - prod += 1; - print_2_digits(std::uint32_t(prod >> 32), buffer + 10); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 12); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 14); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 16); + template <> + char* to_chars>(stdr::uint_least64_t const significand, + int exponent, char* buffer) noexcept { + // Print significand by decomposing it into a 9-digit block and a 8-digit block. + stdr::uint_least32_t first_block, second_block; + bool no_second_block; + + if (significand >= UINT64_C(100000000)) { + first_block = stdr::uint_least32_t(significand / UINT64_C(100000000)); + second_block = + stdr::uint_least32_t(significand) - first_block * UINT32_C(100000000); exponent += 8; - buffer += 18; + no_second_block = (second_block == 0); } else { - if (first_block >= 100'0000) { - // 7 or 8 digits. + first_block = stdr::uint_least32_t(significand); + no_second_block = true; + } + + if (no_second_block) { + print_9_digits(first_block, exponent, buffer); + } + else { + // We proceed similarly to print_9_digits(), but since we do not need to remove + // trailing zeros, the procedure is a bit simpler. + if (first_block >= UINT32_C(100000000)) { + // The input is of 17 digits, thus there should be no trailing zero at all. + // The first block is of 9 digits. + // 1441151882 = ceil(2^57 / 1'0000'0000) + 1 + auto prod = first_block * UINT64_C(1441151882); + prod >>= 25; + stdr::memcpy(buffer, radix_100_head_table + int(prod >> 32) * 2, 2); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 2); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 4); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 6); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 8); + + // The second block is of 8 digits. // 281474978 = ceil(2^48 / 100'0000) + 1 - auto prod = first_block * std::uint64_t(281474978); + prod = second_block * UINT64_C(281474978); prod >>= 16; - auto const head_digits = std::uint32_t(prod >> 32); - - std::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); - buffer[2] = radix_100_table[head_digits * 2 + 1]; - - exponent += (6 + unsigned(head_digits >= 10)); - buffer += unsigned(head_digits >= 10); - - // Print remaining 6 digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 2); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 4); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 6); - - buffer += 8; + prod += 1; + print_2_digits(int(prod >> 32), buffer + 10); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 12); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 14); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 16); + + exponent += 8; + buffer += 18; } - else if (first_block >= 1'0000) { - // 5 or 6 digits. - // 429497 = ceil(2^32 / 1'0000) - auto prod = first_block * std::uint64_t(429497); - auto const head_digits = std::uint32_t(prod >> 32); - - std::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); - buffer[2] = radix_100_table[head_digits * 2 + 1]; + else { + if (first_block >= UINT32_C(1000000)) { + // 7 or 8 digits. + // 281474978 = ceil(2^48 / 100'0000) + 1 + auto prod = first_block * UINT64_C(281474978); + prod >>= 16; + auto const head_digits = int(prod >> 32); + + stdr::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); + buffer[2] = radix_100_table[head_digits * 2 + 1]; + + exponent += (6 + int(head_digits >= 10)); + buffer += int(head_digits >= 10); + + // Print remaining 6 digits. + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 2); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 4); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 6); + + buffer += 8; + } + else if (first_block >= 10000) { + // 5 or 6 digits. + // 429497 = ceil(2^32 / 1'0000) + auto prod = first_block * UINT64_C(429497); + auto const head_digits = int(prod >> 32); - exponent += (4 + unsigned(head_digits >= 10)); - buffer += unsigned(head_digits >= 10); + stdr::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); + buffer[2] = radix_100_table[head_digits * 2 + 1]; - // Print remaining 4 digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 2); - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 4); + exponent += (4 + int(head_digits >= 10)); + buffer += int(head_digits >= 10); - buffer += 6; - } - else if (first_block >= 100) { - // 3 or 4 digits. - // 42949673 = ceil(2^32 / 100) - auto prod = first_block * std::uint64_t(42949673); - auto const head_digits = std::uint32_t(prod >> 32); + // Print remaining 4 digits. + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 2); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 4); - std::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); - buffer[2] = radix_100_table[head_digits * 2 + 1]; + buffer += 6; + } + else if (first_block >= 100) { + // 3 or 4 digits. + // 42949673 = ceil(2^32 / 100) + auto prod = first_block * UINT64_C(42949673); + auto const head_digits = int(prod >> 32); - exponent += (2 + unsigned(head_digits >= 10)); - buffer += unsigned(head_digits >= 10); + stdr::memcpy(buffer, radix_100_head_table + head_digits * 2, 2); + buffer[2] = radix_100_table[head_digits * 2 + 1]; - // Print remaining 2 digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 2); + exponent += (2 + int(head_digits >= 10)); + buffer += int(head_digits >= 10); - buffer += 4; - } - else { - // 1 or 2 digits. - std::memcpy(buffer, radix_100_head_table + first_block * 2, 2); - buffer[2] = radix_100_table[first_block * 2 + 1]; + // Print remaining 2 digits. + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 2); - exponent += unsigned(first_block >= 10); - buffer += (2 + unsigned(first_block >= 10)); - } + buffer += 4; + } + else { + // 1 or 2 digits. + stdr::memcpy(buffer, radix_100_head_table + first_block * 2, 2); + buffer[2] = radix_100_table[first_block * 2 + 1]; - // Next, print the second block. - // The second block is of 8 digits, but we may have trailing zeros. - // 281474978 = ceil(2^48 / 100'0000) + 1 - auto prod = second_block * std::uint64_t(281474978); - prod >>= 16; - prod += 1; - print_2_digits(std::uint32_t(prod >> 32), buffer); + exponent += int(first_block >= 10); + buffer += (2 + int(first_block >= 10)); + } - // Remaining 6 digits are all zero? - if (std::uint32_t(prod) <= std::uint32_t((std::uint64_t(1) << 32) / 100'0000)) { - buffer += (1 + unsigned(buffer[1] > '0')); - } - else { - // Obtain the next two digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 2); + // Next, print the second block. + // The second block is of 8 digits, but we may have trailing zeros. + // 281474978 = ceil(2^48 / 100'0000) + 1 + auto prod = second_block * UINT64_C(281474978); + prod >>= 16; + prod += 1; + print_2_digits(int(prod >> 32), buffer); - // Remaining 4 digits are all zero? - if (std::uint32_t(prod) <= - std::uint32_t((std::uint64_t(1) << 32) / 1'0000)) { - buffer += (3 + unsigned(buffer[3] > '0')); + // Remaining 6 digits are all zero? + if ((prod & UINT32_C(0xffffffff)) <= + stdr::uint_least32_t((stdr::uint_least64_t(1) << 32) / UINT64_C(1000000))) { + buffer += (1 + int(buffer[1] > '0')); } else { // Obtain the next two digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 4); + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 2); - // Remaining 2 digits are all zero? - if (std::uint32_t(prod) <= - std::uint32_t((std::uint64_t(1) << 32) / 100)) { - buffer += (5 + unsigned(buffer[5] > '0')); + // Remaining 4 digits are all zero? + if ((prod & UINT32_C(0xffffffff)) <= + stdr::uint_least32_t((stdr::uint_least64_t(1) << 32) / 10000)) { + buffer += (3 + int(buffer[3] > '0')); } else { - // Obtain the last two digits. - prod = std::uint32_t(prod) * std::uint64_t(100); - print_2_digits(std::uint32_t(prod >> 32), buffer + 6); - buffer += (7 + unsigned(buffer[7] > '0')); + // Obtain the next two digits. + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 4); + + // Remaining 2 digits are all zero? + if ((prod & UINT32_C(0xffffffff)) <= + stdr::uint_least32_t((stdr::uint_least64_t(1) << 32) / 100)) { + buffer += (5 + int(buffer[5] > '0')); + } + else { + // Obtain the last two digits. + prod = (prod & UINT32_C(0xffffffff)) * 100; + print_2_digits(int(prod >> 32), buffer + 6); + buffer += (7 + int(buffer[7] > '0')); + } } } } } - } - // Print exponent and return - if (exponent < 0) { - std::memcpy(buffer, "E-", 2); - buffer += 2; - exponent = -exponent; - } - else { - buffer[0] = 'E'; - buffer += 1; - } + // Print exponent and return + if (exponent < 0) { + stdr::memcpy(buffer, "E-", 2); + buffer += 2; + exponent = -exponent; + } + else { + buffer[0] = 'E'; + buffer += 1; + } - if (exponent >= 100) { - // d1 = exponent / 10; d2 = exponent % 10; - // 6554 = ceil(2^16 / 10) - auto prod = std::uint32_t(exponent) * std::uint32_t(6554); - auto d1 = prod >> 16; - prod = std::uint16_t(prod) * std::uint32_t(5); // * 10 - auto d2 = prod >> 15; // >> 16 - print_2_digits(d1, buffer); - print_1_digit(d2, buffer + 2); - buffer += 3; - } - else if (exponent >= 10) { - print_2_digits(std::uint32_t(exponent), buffer); - buffer += 2; - } - else { - print_1_digit(std::uint32_t(exponent), buffer); - buffer += 1; - } + if (exponent >= 100) { + // d1 = exponent / 10; d2 = exponent % 10; + // 6554 = ceil(2^16 / 10) + auto prod = stdr::uint_least32_t(exponent) * UINT32_C(6554); + auto d1 = int(prod >> 16); + prod = (prod & UINT16_C(0xffff)) * 5; // * 10 + auto d2 = int(prod >> 15); // >> 16 + print_2_digits(d1, buffer); + print_1_digit(d2, buffer + 2); + buffer += 3; + } + else if (exponent >= 10) { + print_2_digits(exponent, buffer); + buffer += 2; + } + else { + print_1_digit(exponent, buffer); + buffer += 1; + } - return buffer; + return buffer; + } } } }