Improve -O0 codegen of numeric functions

After some experimentation on Compiler Explorer, it seems like we can use FLUX_ALWAYS_INLINE on the one hand and manually "flattening" calls on the other to (hopefully) improve the unoptimised codegen and the debugger experience
tcbrindle · Nov 15, 2024 · 35081da · 35081da
1 parent 3818ff7
commit 35081da
Showing 1 changed file with 61 additions and 44 deletions.
diff --git a/include/flux/core/numeric.hpp b/include/flux/core/numeric.hpp
@@ -44,6 +44,7 @@ template <integral To>
 struct unchecked_cast_fn {
     template <integral From>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(From from) const noexcept -> To
     {
         return static_cast<To>(from);
@@ -54,6 +55,7 @@ template <integral To>
 struct overflowing_cast_fn {
     template <integral From>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(From from) const noexcept -> overflow_result<To>
     {
         if constexpr (requires { To{from}; }) {
@@ -68,22 +70,28 @@ template <integral To>
 struct checked_cast_fn {
     template <integral From>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(From from,
                               std::source_location loc = std::source_location::current()) const
         -> To
     {
-        auto r = overflowing_cast_fn<To>{}(from);
-        if (r.overflowed) {
-            runtime_error("checked_cast failed", loc);
+        if constexpr (requires { To{from}; }) {
+            return To{from};
+        } else {
+            if (std::in_range<To>(from)) {
+                return static_cast<To>(from);
+            } else {
+                runtime_error("checked_cast failed", loc);
+            }
         }
-        return r.value;
     }
 };
 
 template <integral To>
 struct cast_fn {
     template <integral From>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(From from,
                               std::source_location loc = std::source_location::current()) const
         -> To
@@ -99,6 +107,7 @@ struct cast_fn {
 struct unchecked_add_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> T
     {
         return static_cast<T>(lhs + rhs);
@@ -108,6 +117,7 @@ struct unchecked_add_fn {
 struct unchecked_sub_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> T
     {
         return static_cast<T>(lhs - rhs);
@@ -117,6 +127,7 @@ struct unchecked_sub_fn {
 struct unchecked_mul_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> T
     {
         return static_cast<T>(lhs * rhs);
@@ -126,6 +137,7 @@ struct unchecked_mul_fn {
 struct unchecked_div_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> T
     {
         return static_cast<T>(lhs / rhs);
@@ -135,6 +147,7 @@ struct unchecked_div_fn {
 struct unchecked_mod_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> T
     {
         return static_cast<T>(lhs % rhs);
@@ -144,6 +157,7 @@ struct unchecked_mod_fn {
 struct unchecked_shl_fn {
     template <integral T, integral U>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, U rhs) const noexcept -> T
     {
         return static_cast<T>(lhs << rhs);
@@ -153,6 +167,7 @@ struct unchecked_shl_fn {
 struct unchecked_shr_fn {
     template <integral T, integral U>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, U rhs) const noexcept -> T
     {
         return static_cast<T>(lhs >> rhs);
@@ -162,6 +177,7 @@ struct unchecked_shr_fn {
 struct unchecked_neg_fn {
     template <signed_integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T val) const noexcept -> T
     {
         return static_cast<T>(-val);
@@ -171,48 +187,55 @@ struct unchecked_neg_fn {
 struct wrapping_add_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> T
     {
-        using U = std::make_unsigned_t<T>;
-        return static_cast<T>(static_cast<U>(lhs) + static_cast<U>(rhs));
+        T r;
+        (void) ckd_add(&r, lhs, rhs);
+        return r;
     }
 };
 
 struct wrapping_sub_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> T
     {
-        using U = std::make_unsigned_t<T>;
-        return static_cast<T>(static_cast<U>(lhs) - static_cast<U>(rhs));
+        T r;
+        (void) ckd_sub(&r, lhs, rhs);
+        return r;
     }
 };
 
 struct wrapping_mul_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> T
     {
-        using U = std::conditional_t<(sizeof(T) < sizeof(unsigned)),
-                                     unsigned,
-                                     std::make_unsigned_t<T>>;
-        return static_cast<T>(static_cast<U>(lhs) * static_cast<U>(rhs));
+        T r;
+        (void) ckd_mul(&r, lhs, rhs);
+        return r;
     }
 };
 
 struct wrapping_neg_fn {
     template <signed_integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T val) const noexcept -> T
     {
-        using U = std::make_unsigned_t<T>;
-        return static_cast<T>(static_cast<U>(0) - static_cast<U>(val));
+        T r;
+        (void) ckd_sub(&r, T{0}, val);
+        return r;
     }
 };
 
 struct overflowing_add_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> overflow_result<T>
     {
         T r;
@@ -224,6 +247,7 @@ struct overflowing_add_fn {
 struct overflowing_sub_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> overflow_result<T>
     {
         T r;
@@ -235,6 +259,7 @@ struct overflowing_sub_fn {
 struct overflowing_mul_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs) const noexcept -> overflow_result<T>
     {
         T r;
@@ -246,6 +271,7 @@ struct overflowing_mul_fn {
 struct overflowing_neg_fn {
     template <signed_integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T val) const noexcept -> overflow_result<T>
     {
         T r;
@@ -257,61 +283,47 @@ struct overflowing_neg_fn {
 struct checked_add_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs,
                               std::source_location loc = std::source_location::current()) const
         -> T
     {
-        // For built-in signed types at least as large as int,
-        // constant evaluation already checks for overflow
-        if (signed_integral<T> && (sizeof(T) >= sizeof(int)) &&
-            std::is_constant_evaluated()) {
-            return unchecked_add_fn{}(lhs, rhs); // LCOV_EXCL_LINE
+        if (T r; !ckd_add(&r, lhs, rhs)) {
+            return r;
         } else {
-            auto result = overflowing_add_fn{}(lhs, rhs);
-            if (result.overflowed) {
-                runtime_error("overflow in addition", loc);
-            }
-            return result.value;
+            runtime_error("overflow in addition", loc);
         }
     }
 };
 
 struct checked_sub_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs,
                               std::source_location loc = std::source_location::current()) const
         -> T
     {
-        if (signed_integral<T> && (sizeof(T) >= sizeof(int)) &&
-            std::is_constant_evaluated()) {
-            return unchecked_sub_fn{}(lhs, rhs); // LCOV_EXCL_LINE
+        if (T r; !ckd_sub(&r, lhs, rhs)) {
+            return r;
         } else {
-            auto result = overflowing_sub_fn{}(lhs, rhs);
-            if (result.overflowed) {
-                runtime_error("overflow in subtraction", loc);
-            }
-            return result.value;
+            runtime_error("overflow in subtraction", loc);
         }
     }
 };
 
 struct checked_mul_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs,
                               std::source_location loc = std::source_location::current()) const
         -> T
     {
-        if (signed_integral<T> && (sizeof(T) >= sizeof(int)) &&
-            std::is_constant_evaluated()) {
-            return unchecked_mul_fn{}(lhs, rhs); // LCOV_EXCL_LINE
+        if (T r; !ckd_mul(&r, lhs, rhs)) {
+            return r;
         } else {
-            auto result = overflowing_mul_fn{}(lhs, rhs);
-            if (result.overflowed) {
-                runtime_error("overflow in multiplication", loc);
-            }
-            return result.value;
+            runtime_error("overflow in multiplication", loc);
         }
     }
 };
@@ -321,6 +333,7 @@ template <overflow_policy OnOverflow = overflow_policy::error,
 struct checked_div_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs,
                               std::source_location loc = std::source_location::current()) const
         -> T
@@ -350,6 +363,7 @@ template <overflow_policy OnOverflow = overflow_policy::error,
 struct checked_mod_fn {
     template <integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, T rhs,
                               std::source_location loc = std::source_location::current()) const
         -> T
@@ -375,6 +389,7 @@ struct checked_mod_fn {
 struct checked_shl_fn {
     template <integral T, integral U>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, U rhs,
                               std::source_location loc = std::source_location::current()) const
         -> T
@@ -393,6 +408,7 @@ struct checked_shl_fn {
 struct checked_shr_fn {
     template <integral T, integral U>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T lhs, U rhs,
                std::source_location loc = std::source_location::current()) const
         -> T
@@ -409,15 +425,16 @@ struct checked_shr_fn {
 struct checked_neg_fn {
     template <signed_integral T>
     [[nodiscard]]
+    FLUX_ALWAYS_INLINE
     constexpr auto operator()(T val,
                               std::source_location loc = std::source_location::current()) const
         -> T
     {
-        auto [r, o] = overflowing_neg_fn{}(val);
-        if (o) {
-            flux::runtime_error("Overflow in signed negation", loc);
+        if (T r; !ckd_sub(&r, T{0}, val)) {
+            return r;
+        } else {
+            runtime_error("overflow in signed negation", loc);
         }
-        return r;
     }
 };