Merge pull request #865 from cppalliance/faster_32_mul

cppalliance · Feb 18, 2025 · 9b9548d · 9b9548d
2 parents 7b4fa43 + 5cf8f18
commit 9b9548d
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 11 deletions.
diff --git a/include/boost/decimal/decimal32.hpp b/include/boost/decimal/decimal32.hpp
@@ -1684,19 +1684,9 @@ constexpr auto operator*(decimal32 lhs, decimal32 rhs) noexcept -> decimal32
     #endif
 
     const auto lhs_components {lhs.to_components()};
-
-    auto sig_lhs {lhs_components.sig};
-    auto exp_lhs {lhs_components.exp};
-    detail::normalize(sig_lhs, exp_lhs);
-
     const auto rhs_components {rhs.to_components()};
 
-    auto sig_rhs {rhs_components.sig};
-    auto exp_rhs {rhs_components.exp};
-    detail::normalize(sig_rhs, exp_rhs);
-
-    return detail::mul_impl<decimal32>(sig_lhs, exp_lhs, lhs_components.sign,
-                                       sig_rhs, exp_rhs, rhs_components.sign);
+    return detail::mul_impl<decimal32>(lhs_components, rhs_components);
 }
 
 template <typename Integer>

diff --git a/include/boost/decimal/detail/mul_impl.hpp b/include/boost/decimal/detail/mul_impl.hpp
@@ -25,6 +25,22 @@ namespace detail {
 // 1) Returns a decimal type and lets the constructor handle with shrinking the significand
 // 2) Returns a struct of the constituent components (used with FMAs)
 
+template <typename ReturnType, typename T>
+constexpr auto mul_impl(const T& rhs, const T& lhs) noexcept -> ReturnType
+{
+    using mul_type = std::uint_fast64_t;
+
+    // The constructor needs to calculate the number of digits in the significand which for uint128 is slow
+    // Since we know the value of res_sig is constrained to [1'000'000^2, 9'999'999^2] which equates to
+    // either 13 or 14 decimal digits we can use a single division to make binary search occur with
+    // uint32_t instead. 14 - 5 = 9 or 13 - 5 = 8 which are both still greater than or equal to
+    // digits10 + 1 for rounding which is 8 decimal digits
+
+    auto res_sig {(static_cast<mul_type>(lhs.full_significand()) * static_cast<mul_type>(rhs.full_significand()))};
+    auto res_exp {lhs.biased_exponent() + rhs.biased_exponent()};
+
+    return {res_sig, res_exp, lhs.isneg() != rhs.isneg()};
+}
 
 template <typename ReturnType, typename T, typename U>
 BOOST_DECIMAL_FORCE_INLINE constexpr auto mul_impl(T lhs_sig, U lhs_exp, bool lhs_sign,