Inv trig working + exp2, exp.

rust-lang · Jan 31, 2022 · b674a39 · b674a39
1 parent a067a6f
commit b674a39
Show file tree

Hide file tree

Showing 4 changed files with 177 additions and 25 deletions.
diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs
@@ -3,7 +3,7 @@ use crate::simd::{LaneCount, Simd, SupportedLaneCount};
 
 macro_rules! implement {
     {
-        $type:ty, $int_type:ty
+        $type:ty, $int_type:ty, $uint_type:ty
     } => {
         impl<const LANES: usize> Simd<$type, LANES>
         where
@@ -29,9 +29,30 @@ macro_rules! implement {
             pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
                 unsafe { intrinsics::simd_cast(value) }
             }
+
+            /// Rounds toward zero and converts to the same-width integer type, assuming that
+            /// the value is finite and fits in that type.
+            ///
+            /// # Safety
+            /// The value must:
+            ///
+            /// * Not be NaN
+            /// * Not be infinite
+            /// * Be representable in the return type, after truncating off its fractional part
+            #[inline]
+            pub unsafe fn to_uint_unchecked(self) -> Simd<$uint_type, LANES> {
+                unsafe { intrinsics::simd_cast(self) }
+            }
+
+            /// Creates a floating-point vector from an unsigned integer vector.  Rounds values that are
+            /// not exactly representable.
+            #[inline]
+            pub fn round_from_uint(value: Simd<$uint_type, LANES>) -> Self {
+                unsafe { intrinsics::simd_cast(value) }
+            }
         }
     }
 }
 
-implement! { f32, i32 }
-implement! { f64, i64 }
+implement! { f32, i32, u32 }
+implement! { f64, i64, u64 }
diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs
@@ -122,6 +122,9 @@ pub trait StdFloat: Sealed + Sized {
 }
 
 pub trait StdLibm : StdFloat {
+    type IntType;
+    type UintType;
+
     fn sin(self) -> Self;
 
     fn cos(self) -> Self;
@@ -135,6 +138,10 @@ pub trait StdLibm : StdFloat {
     fn atan(self) -> Self;
 
     fn atan2(self, x: Self) -> Self;
+
+    fn exp2(self) -> Self;
+
+    fn exp(self) -> Self;
 }
 
 impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}

diff --git a/crates/std_float/src/libm32.rs b/crates/std_float/src/libm32.rs
@@ -1,4 +1,5 @@
 #![allow(non_snake_case)]
+#![doc("This code is automatically generated, do not edit.")]
 use super::StdLibm;
 
 use super::StdFloat;
@@ -9,22 +10,36 @@ impl<const N: usize> StdLibm for Simd<f32, N>
 where
     LaneCount<N>: SupportedLaneCount,
 {
+    type IntType = Simd<i32, N>;
+    type UintType = Simd<u32, N>;
     #[inline]
     fn asin(self) -> Self {
+        let PI_BY_2 = Self::splat(1.57079632679489661923);
         let arg = self;
-        arg.atan2((Self::splat(1f32) - arg * arg).sqrt())
+        let LIM: Self = Self::splat(0.70710678118654752440);
+        let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
+        let s: Self =
+            ((arg).lanes_lt(Self::splat(0.0))).select(-Self::splat(1.0), Self::splat(1.0));
+        let x: Self =
+            ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
+        let y: Self = (Self::splat(0.11644821f32))
+            .mul_add(x * x, Self::splat(0.04343228f32))
+            .mul_add(x * x, Self::splat(0.17078044f32))
+            .mul_add(x * x, Self::splat(0.99991643f32))
+            * x;
+        ((arg * arg).lanes_lt(LIM * LIM)).select(y, c - y * s)
     }
     #[inline]
     fn acos(self) -> Self {
-        let PI_BY_2 = Self::splat(1.5707964f32);
-        let PI = Self::splat(3.1415927f32);
+        let PI_BY_2 = Self::splat(1.57079632679489661923);
+        let PI = Self::splat(3.14159265358979323846);
         let arg = self;
-        let LIM: Self = Self::splat(0.9f32);
-        let c: Self = ((arg).lanes_lt(Self::splat(0f32))).select(PI, Self::splat(0f32));
+        let LIM: Self = Self::splat(0.9);
+        let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(PI, Self::splat(0.0));
         let s: Self =
-            ((arg).lanes_lt(Self::splat(0f32))).select(Self::splat(1f32), -Self::splat(1f32));
+            ((arg).lanes_lt(Self::splat(0.0))).select(Self::splat(1.0), -Self::splat(1.0));
         let x: Self =
-            ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1f32) - arg * arg).sqrt());
+            ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
         let y: Self = (Self::splat(1.3740137f32))
             .mul_add(x * x, -Self::splat(3.1993167f32))
             .mul_add(x * x, Self::splat(3.103398f32))
@@ -38,10 +53,10 @@ where
     }
     #[inline]
     fn atan(self) -> Self {
-        let PI_BY_2 = Self::splat(1.5707964f32);
+        let PI_BY_2 = Self::splat(1.57079632679489661923);
         let arg = self;
-        let LIM: Self = Self::splat(1f32);
-        let c: Self = ((arg).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
+        let LIM: Self = Self::splat(1.0);
+        let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
         let x: Self = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip());
         let y: Self = (-Self::splat(0.0039602574f32))
             .mul_add(x * x, Self::splat(0.021659138f32))
@@ -56,14 +71,14 @@ where
     }
     #[inline]
     fn atan2(self, x: Self) -> Self {
-        let PI_BY_2 = Self::splat(1.5707964f32);
-        let PI = Self::splat(3.1415927f32);
+        let PI_BY_2 = Self::splat(1.57079632679489661923);
+        let PI = Self::splat(3.14159265358979323846);
         let y = self;
-        let offset180: Self = ((y).lanes_lt(Self::splat(0f32))).select(-PI, PI);
-        let x1: Self = ((x).lanes_lt(Self::splat(0f32))).select(-x, x);
-        let y1: Self = ((x).lanes_lt(Self::splat(0f32))).select(-y, y);
-        let offset1: Self = ((x).lanes_lt(Self::splat(0f32))).select(offset180, Self::splat(0f32));
-        let offset90: Self = ((y).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
+        let offset180: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI, PI);
+        let x1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-x, x);
+        let y1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-y, y);
+        let offset1: Self = ((x).lanes_lt(Self::splat(0.0))).select(offset180, Self::splat(0.0));
+        let offset90: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
         let x2: Self = ((y1.abs()).lanes_gt(x1)).select(y1, x1);
         let y2: Self = ((y1.abs()).lanes_gt(x1)).select(-x1, y1);
         let offset2: Self = ((y1.abs()).lanes_gt(x1)).select(offset1 + offset90, offset1);
@@ -80,8 +95,32 @@ where
         y3 + offset2
     }
     #[inline]
+    fn exp2(self) -> Self {
+        let arg = self;
+        let r: Self = arg.round();
+        let mul: Self = Self::from_bits(unsafe {
+            (r.mul_add(Self::splat(8388608.0f32), Self::splat(1065353216.0f32))).to_uint_unchecked()
+        });
+        let x: Self = arg - r;
+        (Self::splat(0.000015310081f32))
+            .mul_add(x, Self::splat(0.0001547802f32))
+            .mul_add(x, Self::splat(0.0013333454f32))
+            .mul_add(x, Self::splat(0.009617995f32))
+            .mul_add(x, Self::splat(0.05550411f32))
+            .mul_add(x, Self::splat(0.24022652f32))
+            .mul_add(x, Self::splat(0.6931472f32))
+            .mul_add(x, Self::splat(1f32))
+            * mul
+    }
+    #[inline]
+    fn exp(self) -> Self {
+        let LOG2_E =Self ::splat (1.442695040888963407359769137464649992339735961996202908859290566914912486673985594186422766333708408);
+        let arg = self;
+        (arg * LOG2_E).exp2()
+    }
+    #[inline]
     fn sin(self) -> Self {
-        let RECIP_2PI = Self::splat(0.15915494f32);
+        let RECIP_2PI = Self::splat(0.15915494309189533577);
         let arg = self;
         let scaled: Self = arg * RECIP_2PI;
         let x: Self = scaled - scaled.round();
@@ -95,7 +134,7 @@ where
     }
     #[inline]
     fn cos(self) -> Self {
-        let RECIP_2PI = Self::splat(0.15915494f32);
+        let RECIP_2PI = Self::splat(0.15915494309189533577);
         let arg = self;
         let scaled: Self = arg * RECIP_2PI;
         let x: Self = scaled - scaled.round();
@@ -109,11 +148,11 @@ where
     }
     #[inline]
     fn tan(self) -> Self {
-        let RECIP_PI = Self::splat(0.31830987f32);
+        let RECIP_PI = Self::splat(0.31830988618379067154);
         let arg = self;
         let scaled: Self = arg * RECIP_PI;
         let x: Self = scaled - scaled.round();
-        let recip: Self = Self::splat(1f32) / (x * x - Self::splat(0.25f32));
+        let recip: Self = Self::splat(1.0) / (x * x - Self::splat(0.25));
         let y: Self = (Self::splat(0.014397301f32))
             .mul_add(x * x, Self::splat(0.021017345f32))
             .mul_add(x * x, Self::splat(0.05285888f32))

diff --git a/crates/std_float/src/test_libm32.rs b/crates/std_float/src/test_libm32.rs
@@ -169,7 +169,17 @@ fn asin_f32() {
     test_range!(
         min: -1.0,
         max: 1.0,
-        limit: one_ulp * 8.0,
+        limit: one_ulp * 9.0,
+        scalar_fn: |x : f32| x.asin(),
+        vector_fn: |x : f32x4| x.asin(),
+        scalar_type: f32,
+        vector_type: f32x4,
+    );
+
+    test_range!(
+        min: -0.5,
+        max: 0.5,
+        limit: one_ulp * 2.0,
         scalar_fn: |x : f32| x.asin(),
         vector_fn: |x : f32x4| x.asin(),
         scalar_type: f32,
@@ -204,3 +214,78 @@ fn atan_f32() {
         vector_type: f32x4,
     );
 }
+
+#[test]
+fn acos_f32() {
+    use core_simd::f32x4;
+    use crate::StdLibm;
+
+    let one_ulp = (2.0_f32).powi(-23);
+
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: one_ulp * 8.0,
+        scalar_fn: |x : f32| x.acos(),
+        vector_fn: |x : f32x4| x.acos(),
+        scalar_type: f32,
+        vector_type: f32x4,
+    );
+
+    test_range!(
+        min: -0.5,
+        max: 0.5,
+        limit: one_ulp * 2.0,
+        scalar_fn: |x : f32| x.asin(),
+        vector_fn: |x : f32x4| x.asin(),
+        scalar_type: f32,
+        vector_type: f32x4,
+    );
+}
+
+#[test]
+fn exp2_f32() {
+    use core_simd::f32x4;
+    use crate::StdLibm;
+
+    let one_ulp = (2.0_f32).powi(-23);
+
+    test_range!(
+        min: -2.0,
+        max: 2.0,
+        limit: one_ulp * 2.0,
+        scalar_fn: |x : f32| x.exp2(),
+        vector_fn: |x : f32x4| x.exp2(),
+        scalar_type: f32,
+        vector_type: f32x4,
+    );
+}
+
+#[test]
+fn exp_f32() {
+    use core_simd::f32x4;
+    use crate::StdLibm;
+
+    let one_ulp = (2.0_f32).powi(-23);
+
+    test_range!(
+        min: -2.0,
+        max: 0.0,
+        limit: one_ulp * 2.0,
+        scalar_fn: |x : f32| x.exp(),
+        vector_fn: |x : f32x4| x.exp(),
+        scalar_type: f32,
+        vector_type: f32x4,
+    );
+
+    test_range!(
+        min: 0.0,
+        max: 2.0,
+        limit: one_ulp * 8.0,
+        scalar_fn: |x : f32| x.exp(),
+        vector_fn: |x : f32x4| x.exp(),
+        scalar_type: f32,
+        vector_type: f32x4,
+    );
+}
+