Skip to content

Commit

Permalink
Inv trig working + exp2, exp.
Browse files Browse the repository at this point in the history
  • Loading branch information
andy-thomason committed Jan 31, 2022
1 parent a067a6f commit b674a39
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 25 deletions.
27 changes: 24 additions & 3 deletions crates/core_simd/src/round.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::simd::{LaneCount, Simd, SupportedLaneCount};

macro_rules! implement {
{
$type:ty, $int_type:ty
$type:ty, $int_type:ty, $uint_type:ty
} => {
impl<const LANES: usize> Simd<$type, LANES>
where
Expand All @@ -29,9 +29,30 @@ macro_rules! implement {
pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
unsafe { intrinsics::simd_cast(value) }
}

/// Rounds toward zero and converts to the same-width integer type, assuming that
/// the value is finite and fits in that type.
///
/// # Safety
/// The value must:
///
/// * Not be NaN
/// * Not be infinite
/// * Be representable in the return type, after truncating off its fractional part
#[inline]
pub unsafe fn to_uint_unchecked(self) -> Simd<$uint_type, LANES> {
unsafe { intrinsics::simd_cast(self) }
}

/// Creates a floating-point vector from an unsigned integer vector. Rounds values that are
/// not exactly representable.
#[inline]
pub fn round_from_uint(value: Simd<$uint_type, LANES>) -> Self {
unsafe { intrinsics::simd_cast(value) }
}
}
}
}

implement! { f32, i32 }
implement! { f64, i64 }
implement! { f32, i32, u32 }
implement! { f64, i64, u64 }
7 changes: 7 additions & 0 deletions crates/std_float/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ pub trait StdFloat: Sealed + Sized {
}

pub trait StdLibm : StdFloat {
type IntType;
type UintType;

fn sin(self) -> Self;

fn cos(self) -> Self;
Expand All @@ -135,6 +138,10 @@ pub trait StdLibm : StdFloat {
fn atan(self) -> Self;

fn atan2(self, x: Self) -> Self;

fn exp2(self) -> Self;

fn exp(self) -> Self;
}

impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
Expand Down
81 changes: 60 additions & 21 deletions crates/std_float/src/libm32.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![allow(non_snake_case)]
#![doc("This code is automatically generated, do not edit.")]
use super::StdLibm;

use super::StdFloat;
Expand All @@ -9,22 +10,36 @@ impl<const N: usize> StdLibm for Simd<f32, N>
where
LaneCount<N>: SupportedLaneCount,
{
type IntType = Simd<i32, N>;
type UintType = Simd<u32, N>;
#[inline]
fn asin(self) -> Self {
let PI_BY_2 = Self::splat(1.57079632679489661923);
let arg = self;
arg.atan2((Self::splat(1f32) - arg * arg).sqrt())
let LIM: Self = Self::splat(0.70710678118654752440);
let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
let s: Self =
((arg).lanes_lt(Self::splat(0.0))).select(-Self::splat(1.0), Self::splat(1.0));
let x: Self =
((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
let y: Self = (Self::splat(0.11644821f32))
.mul_add(x * x, Self::splat(0.04343228f32))
.mul_add(x * x, Self::splat(0.17078044f32))
.mul_add(x * x, Self::splat(0.99991643f32))
* x;
((arg * arg).lanes_lt(LIM * LIM)).select(y, c - y * s)
}
#[inline]
fn acos(self) -> Self {
let PI_BY_2 = Self::splat(1.5707964f32);
let PI = Self::splat(3.1415927f32);
let PI_BY_2 = Self::splat(1.57079632679489661923);
let PI = Self::splat(3.14159265358979323846);
let arg = self;
let LIM: Self = Self::splat(0.9f32);
let c: Self = ((arg).lanes_lt(Self::splat(0f32))).select(PI, Self::splat(0f32));
let LIM: Self = Self::splat(0.9);
let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(PI, Self::splat(0.0));
let s: Self =
((arg).lanes_lt(Self::splat(0f32))).select(Self::splat(1f32), -Self::splat(1f32));
((arg).lanes_lt(Self::splat(0.0))).select(Self::splat(1.0), -Self::splat(1.0));
let x: Self =
((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1f32) - arg * arg).sqrt());
((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
let y: Self = (Self::splat(1.3740137f32))
.mul_add(x * x, -Self::splat(3.1993167f32))
.mul_add(x * x, Self::splat(3.103398f32))
Expand All @@ -38,10 +53,10 @@ where
}
#[inline]
fn atan(self) -> Self {
let PI_BY_2 = Self::splat(1.5707964f32);
let PI_BY_2 = Self::splat(1.57079632679489661923);
let arg = self;
let LIM: Self = Self::splat(1f32);
let c: Self = ((arg).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
let LIM: Self = Self::splat(1.0);
let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
let x: Self = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip());
let y: Self = (-Self::splat(0.0039602574f32))
.mul_add(x * x, Self::splat(0.021659138f32))
Expand All @@ -56,14 +71,14 @@ where
}
#[inline]
fn atan2(self, x: Self) -> Self {
let PI_BY_2 = Self::splat(1.5707964f32);
let PI = Self::splat(3.1415927f32);
let PI_BY_2 = Self::splat(1.57079632679489661923);
let PI = Self::splat(3.14159265358979323846);
let y = self;
let offset180: Self = ((y).lanes_lt(Self::splat(0f32))).select(-PI, PI);
let x1: Self = ((x).lanes_lt(Self::splat(0f32))).select(-x, x);
let y1: Self = ((x).lanes_lt(Self::splat(0f32))).select(-y, y);
let offset1: Self = ((x).lanes_lt(Self::splat(0f32))).select(offset180, Self::splat(0f32));
let offset90: Self = ((y).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
let offset180: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI, PI);
let x1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-x, x);
let y1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-y, y);
let offset1: Self = ((x).lanes_lt(Self::splat(0.0))).select(offset180, Self::splat(0.0));
let offset90: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
let x2: Self = ((y1.abs()).lanes_gt(x1)).select(y1, x1);
let y2: Self = ((y1.abs()).lanes_gt(x1)).select(-x1, y1);
let offset2: Self = ((y1.abs()).lanes_gt(x1)).select(offset1 + offset90, offset1);
Expand All @@ -80,8 +95,32 @@ where
y3 + offset2
}
#[inline]
fn exp2(self) -> Self {
let arg = self;
let r: Self = arg.round();
let mul: Self = Self::from_bits(unsafe {
(r.mul_add(Self::splat(8388608.0f32), Self::splat(1065353216.0f32))).to_uint_unchecked()
});
let x: Self = arg - r;
(Self::splat(0.000015310081f32))
.mul_add(x, Self::splat(0.0001547802f32))
.mul_add(x, Self::splat(0.0013333454f32))
.mul_add(x, Self::splat(0.009617995f32))
.mul_add(x, Self::splat(0.05550411f32))
.mul_add(x, Self::splat(0.24022652f32))
.mul_add(x, Self::splat(0.6931472f32))
.mul_add(x, Self::splat(1f32))
* mul
}
#[inline]
fn exp(self) -> Self {
let LOG2_E =Self ::splat (1.442695040888963407359769137464649992339735961996202908859290566914912486673985594186422766333708408);
let arg = self;
(arg * LOG2_E).exp2()
}
#[inline]
fn sin(self) -> Self {
let RECIP_2PI = Self::splat(0.15915494f32);
let RECIP_2PI = Self::splat(0.15915494309189533577);
let arg = self;
let scaled: Self = arg * RECIP_2PI;
let x: Self = scaled - scaled.round();
Expand All @@ -95,7 +134,7 @@ where
}
#[inline]
fn cos(self) -> Self {
let RECIP_2PI = Self::splat(0.15915494f32);
let RECIP_2PI = Self::splat(0.15915494309189533577);
let arg = self;
let scaled: Self = arg * RECIP_2PI;
let x: Self = scaled - scaled.round();
Expand All @@ -109,11 +148,11 @@ where
}
#[inline]
fn tan(self) -> Self {
let RECIP_PI = Self::splat(0.31830987f32);
let RECIP_PI = Self::splat(0.31830988618379067154);
let arg = self;
let scaled: Self = arg * RECIP_PI;
let x: Self = scaled - scaled.round();
let recip: Self = Self::splat(1f32) / (x * x - Self::splat(0.25f32));
let recip: Self = Self::splat(1.0) / (x * x - Self::splat(0.25));
let y: Self = (Self::splat(0.014397301f32))
.mul_add(x * x, Self::splat(0.021017345f32))
.mul_add(x * x, Self::splat(0.05285888f32))
Expand Down
87 changes: 86 additions & 1 deletion crates/std_float/src/test_libm32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,17 @@ fn asin_f32() {
test_range!(
min: -1.0,
max: 1.0,
limit: one_ulp * 8.0,
limit: one_ulp * 9.0,
scalar_fn: |x : f32| x.asin(),
vector_fn: |x : f32x4| x.asin(),
scalar_type: f32,
vector_type: f32x4,
);

test_range!(
min: -0.5,
max: 0.5,
limit: one_ulp * 2.0,
scalar_fn: |x : f32| x.asin(),
vector_fn: |x : f32x4| x.asin(),
scalar_type: f32,
Expand Down Expand Up @@ -204,3 +214,78 @@ fn atan_f32() {
vector_type: f32x4,
);
}

#[test]
fn acos_f32() {
use core_simd::f32x4;
use crate::StdLibm;

let one_ulp = (2.0_f32).powi(-23);

test_range!(
min: -1.0,
max: 1.0,
limit: one_ulp * 8.0,
scalar_fn: |x : f32| x.acos(),
vector_fn: |x : f32x4| x.acos(),
scalar_type: f32,
vector_type: f32x4,
);

test_range!(
min: -0.5,
max: 0.5,
limit: one_ulp * 2.0,
scalar_fn: |x : f32| x.asin(),
vector_fn: |x : f32x4| x.asin(),
scalar_type: f32,
vector_type: f32x4,
);
}

#[test]
fn exp2_f32() {
use core_simd::f32x4;
use crate::StdLibm;

let one_ulp = (2.0_f32).powi(-23);

test_range!(
min: -2.0,
max: 2.0,
limit: one_ulp * 2.0,
scalar_fn: |x : f32| x.exp2(),
vector_fn: |x : f32x4| x.exp2(),
scalar_type: f32,
vector_type: f32x4,
);
}

#[test]
fn exp_f32() {
use core_simd::f32x4;
use crate::StdLibm;

let one_ulp = (2.0_f32).powi(-23);

test_range!(
min: -2.0,
max: 0.0,
limit: one_ulp * 2.0,
scalar_fn: |x : f32| x.exp(),
vector_fn: |x : f32x4| x.exp(),
scalar_type: f32,
vector_type: f32x4,
);

test_range!(
min: 0.0,
max: 2.0,
limit: one_ulp * 8.0,
scalar_fn: |x : f32| x.exp(),
vector_fn: |x : f32x4| x.exp(),
scalar_type: f32,
vector_type: f32x4,
);
}

0 comments on commit b674a39

Please sign in to comment.