From 2583874a5b8dc194b3177d68db97d704b174c1f9 Mon Sep 17 00:00:00 2001 From: Nicholas Thompson Date: Sun, 19 Nov 2023 00:53:50 -0500 Subject: [PATCH 1/2] Optimized native rol3 --- src/intrinsics/native/rot.rs | 42 ++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/intrinsics/native/rot.rs b/src/intrinsics/native/rot.rs index 8a584bc..16a4932 100644 --- a/src/intrinsics/native/rot.rs +++ b/src/intrinsics/native/rot.rs @@ -6,10 +6,48 @@ use core::mem::MaybeUninit; #[inline] pub fn rol3(r: &mut MaybeUninit, a: &U256, b: u32) { - r.write((a << (b & 0xff)) | (a >> ((256 - b) & 0xff))); + let lmask = u64::MAX.wrapping_shl(b); + let rmask = !lmask; + let mut arr = unsafe { core::mem::transmute::(*a) }; + let mut arr_2 = [0u64; 4]; + for i in 0..4 { + arr[i] = arr[i].rotate_left(b); + arr_2[i] = arr[i] & rmask; + arr[i] &= lmask; + } + for i in 0..4 { + #[cfg(target_endian = "little")] + { + arr[i] |= arr_2[(i + 3) & 3]; + } + #[cfg(target_endian = "big")] + { + arr[i] |= arr_2[(i + 1) & 3]; + } + } + if b & 128 != 0 { + if b & 64 != 0 { + r.write(unsafe { core::mem::transmute::<[u64; 4], U256>( + [arr[1], arr[2], arr[3], arr[0]] + ) }); + } else { + r.write(unsafe { core::mem::transmute::<[u64; 4], U256>( + [arr[2], arr[3], arr[0], arr[1]] + ) }); + } + } else { + if b & 64 != 0 { + r.write(unsafe { core::mem::transmute::<[u64; 4], U256>( + [arr[3], arr[0], arr[1], arr[2]] + ) }); + } else { + r.write(unsafe { core::mem::transmute::<[u64; 4], U256>(arr) }); + } + } } +// Perhaps this function should get its own code. #[inline] pub fn ror3(r: &mut MaybeUninit, a: &U256, b: u32) { - r.write((a >> (b & 0xff)) | (a << ((256 - b) & 0xff))); + rol3(r, a, 0u32.wrapping_sub(b) & 255) } From 1555bd130d1d13ebfa831f3f680af8eeb225ca3e Mon Sep 17 00:00:00 2001 From: Nicholas Thompson Date: Sat, 9 Dec 2023 20:27:14 -0500 Subject: [PATCH 2/2] Refined Native rol3 --- src/intrinsics/native/rot.rs | 40 +++++++++++++++--------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/src/intrinsics/native/rot.rs b/src/intrinsics/native/rot.rs index 16a4932..12ebe09 100644 --- a/src/intrinsics/native/rot.rs +++ b/src/intrinsics/native/rot.rs @@ -2,51 +2,45 @@ //! for 256-bit integers. use crate::uint::U256; -use core::mem::MaybeUninit; +use core::mem::{transmute, MaybeUninit}; #[inline] +#[allow(clippy::collapsible_else_if)] pub fn rol3(r: &mut MaybeUninit, a: &U256, b: u32) { - let lmask = u64::MAX.wrapping_shl(b); - let rmask = !lmask; - let mut arr = unsafe { core::mem::transmute::(*a) }; - let mut arr_2 = [0u64; 4]; - for i in 0..4 { - arr[i] = arr[i].rotate_left(b); - arr_2[i] = arr[i] & rmask; - arr[i] &= lmask; - } + let arr = unsafe { transmute::(*a) }; + let mut arr_2 = arr; + let mask = (-((b & 63) as i64) >> 63) as u64; for i in 0..4 { #[cfg(target_endian = "little")] { - arr[i] |= arr_2[(i + 3) & 3]; + arr_2[i] = ((arr[(i + 3) & 3] >> (64 - (b & 63))) & mask) | arr[i].wrapping_shl(b); } #[cfg(target_endian = "big")] { - arr[i] |= arr_2[(i + 1) & 3]; + arr_2[i] = ((arr[(i + 1) & 3] >> (64 - (b & 63))) & mask) | arr[i].wrapping_shl(b); } } if b & 128 != 0 { if b & 64 != 0 { - r.write(unsafe { core::mem::transmute::<[u64; 4], U256>( - [arr[1], arr[2], arr[3], arr[0]] - ) }); + r.write(unsafe { + transmute::<[u64; 4], U256>([arr_2[1], arr_2[2], arr_2[3], arr_2[0]]) + }); } else { - r.write(unsafe { core::mem::transmute::<[u64; 4], U256>( - [arr[2], arr[3], arr[0], arr[1]] - ) }); + r.write(unsafe { + transmute::<[u64; 4], U256>([arr_2[2], arr_2[3], arr_2[0], arr_2[1]]) + }); } } else { if b & 64 != 0 { - r.write(unsafe { core::mem::transmute::<[u64; 4], U256>( - [arr[3], arr[0], arr[1], arr[2]] - ) }); + r.write(unsafe { + transmute::<[u64; 4], U256>([arr_2[3], arr_2[0], arr_2[1], arr_2[2]]) + }); } else { - r.write(unsafe { core::mem::transmute::<[u64; 4], U256>(arr) }); + r.write(unsafe { transmute::<[u64; 4], U256>(arr_2) }); } } } -// Perhaps this function should get its own code. #[inline] pub fn ror3(r: &mut MaybeUninit, a: &U256, b: u32) { rol3(r, a, 0u32.wrapping_sub(b) & 255)