From 47cabfd1983f9d93e533cc4be1da17aca067d207 Mon Sep 17 00:00:00 2001 From: ibmp33 <51358300+ibmp33@users.noreply.github.com> Date: Wed, 29 Nov 2023 09:20:47 +0800 Subject: [PATCH] chore: add avx2 acceleration to poseidon hash function (#155) * chore: add avx2 acceleration to poseidon hash function * fix: add more sample to test overflow(mul/squre) * chore: add avx2 acceleration to poseidon hash function * fix: remove warnings * fix: overflow --------- Co-authored-by: eigmax --- algebraic/src/arch/x86_64/avx2_field_gl.rs | 110 +-- algebraic/src/arch/x86_64/avx512_field_gl.rs | 29 +- algebraic/src/lib.rs | 10 +- algebraic/src/packable.rs | 19 +- algebraic/src/packed.rs | 43 +- starky/src/arch/mod.rs | 2 + starky/src/arch/x86_64/avx2_poseidon_gl.rs | 529 ++++++++++++ starky/src/arch/x86_64/avx512_poseidon_gl.rs | 1 + starky/src/arch/x86_64/mod.rs | 20 + starky/src/constant.rs | 9 + starky/src/lib.rs | 6 + starky/src/linearhash.rs | 3 + starky/src/merklehash.rs | 8 +- starky/src/poseidon_constants_avx.rs | 811 +++++++++++++++++++ test/stark_aggregation.sh | 8 +- 15 files changed, 1495 insertions(+), 113 deletions(-) create mode 100644 starky/src/arch/mod.rs create mode 100644 starky/src/arch/x86_64/avx2_poseidon_gl.rs create mode 100644 starky/src/arch/x86_64/avx512_poseidon_gl.rs create mode 100644 starky/src/arch/x86_64/mod.rs create mode 100644 starky/src/poseidon_constants_avx.rs diff --git a/algebraic/src/arch/x86_64/avx2_field_gl.rs b/algebraic/src/arch/x86_64/avx2_field_gl.rs index 03fc5d5d..c58f49d4 100644 --- a/algebraic/src/arch/x86_64/avx2_field_gl.rs +++ b/algebraic/src/arch/x86_64/avx2_field_gl.rs @@ -6,12 +6,12 @@ //! use crate::ff::*; use crate::field_gl::{Fr, FrRepr as GoldilocksField}; +use crate::packed::PackedField; use core::arch::x86_64::*; use core::fmt; use core::fmt::{Debug, Formatter}; use core::mem::transmute; use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign}; -// use crate::packed::PackedField; /// AVX2 Goldilocks Field /// @@ -24,8 +24,6 @@ use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAss #[repr(transparent)] pub struct Avx2GoldilocksField(pub [GoldilocksField; 4]); -const WIDTH: usize = 4; - impl Avx2GoldilocksField { #[inline] pub fn new(x: __m256i) -> Self { @@ -35,30 +33,40 @@ impl Avx2GoldilocksField { pub fn get(&self) -> __m256i { unsafe { transmute(*self) } } - // } - // unsafe impl PackedField for Avx2GoldilocksField { #[inline] - pub fn from_slice(slice: &[GoldilocksField]) -> &Self { - assert_eq!(slice.len(), WIDTH); + pub fn square(&self) -> Avx2GoldilocksField { + Self::new(unsafe { square(self.get()) }) + } + #[inline] + pub fn reduce(x: __m256i, y: __m256i) -> Avx2GoldilocksField { + Self::new(unsafe { reduce128((x, y)) }) + } +} + +unsafe impl PackedField for Avx2GoldilocksField { + const WIDTH: usize = 4; + type Scalar = GoldilocksField; + const ZEROS: Self = Self([GoldilocksField([0]); 4]); + const ONES: Self = Self([GoldilocksField([1]); 4]); + + #[inline] + fn from_slice(slice: &[GoldilocksField]) -> &Self { + assert_eq!(slice.len(), Self::WIDTH); unsafe { &*slice.as_ptr().cast() } } #[inline] - pub fn from_slice_mut(slice: &mut [GoldilocksField]) -> &mut Self { - assert_eq!(slice.len(), WIDTH); + fn from_slice_mut(slice: &mut [GoldilocksField]) -> &mut Self { + assert_eq!(slice.len(), Self::WIDTH); unsafe { &mut *slice.as_mut_ptr().cast() } } #[inline] - pub fn as_slice(&self) -> &[GoldilocksField] { + fn as_slice(&self) -> &[GoldilocksField] { &self.0[..] } #[inline] - pub fn as_slice_mut(&mut self) -> &mut [GoldilocksField] { + fn as_slice_mut(&mut self) -> &mut [GoldilocksField] { &mut self.0[..] } - #[inline] - pub fn square(&self) -> Avx2GoldilocksField { - Self::new(unsafe { square(self.get()) }) - } #[inline] fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) { @@ -117,7 +125,7 @@ impl Debug for Avx2GoldilocksField { impl Default for Avx2GoldilocksField { #[inline] fn default() -> Self { - Self([GoldilocksField::from(0); 4]) + Self::ZEROS } } @@ -325,7 +333,8 @@ unsafe fn add_no_double_overflow_64_64s_s(x: __m256i, y_s: __m256i) -> __m256i { unsafe fn add(x: __m256i, y: __m256i) -> __m256i { let y_s = shift(y); let res_s = add_no_double_overflow_64_64s_s(x, canonicalize_s(y_s)); - shift(res_s) + // Added by Eigen + shift(canonicalize_s(res_s)) } #[inline] @@ -455,7 +464,8 @@ unsafe fn reduce128(x: (__m256i, __m256i)) -> __m256i { let lo1_s = sub_small_64s_64_s(lo0_s, hi_hi0); let t1 = _mm256_mul_epu32(hi0, EPSILON); let lo2_s = add_small_64s_64_s(lo1_s, t1); - let lo2 = shift(lo2_s); + // Added by Eigen + let lo2 = shift(canonicalize_s(lo2_s)); lo2 } @@ -503,12 +513,12 @@ mod tests { use super::Avx2GoldilocksField; use crate::ff::*; use crate::field_gl::{Fr, FrRepr as GoldilocksField}; + use crate::packed::PackedField; use std::time::Instant; - // use crate::packed::PackedField; fn test_vals_a() -> [GoldilocksField; 4] { [ - GoldilocksField([14479013849828404771u64]), + GoldilocksField([18446744069414584320u64]), GoldilocksField([9087029921428221768u64]), GoldilocksField([2441288194761790662u64]), GoldilocksField([5646033492608483824u64]), @@ -516,7 +526,7 @@ mod tests { } fn test_vals_b() -> [GoldilocksField; 4] { [ - GoldilocksField([17891926589593242302u64]), + GoldilocksField([18446744069414584320u64]), GoldilocksField([11009798273260028228u64]), GoldilocksField([2028722748960791447u64]), GoldilocksField([7929433601095175579u64]), @@ -530,25 +540,24 @@ mod tests { let start = Instant::now(); let packed_a = Avx2GoldilocksField::from_slice(&a_arr); let packed_b = Avx2GoldilocksField::from_slice(&b_arr); - let packed_res = *packed_a + *packed_b; + let packed_res = *packed_a + *packed_b + *packed_a; let arr_res = packed_res.as_slice(); let avx2_duration = start.elapsed(); - // println!("arr_res: {:?}", arr_res); + // log::debug!("arr_res: {:?}", arr_res); let start = Instant::now(); - let expected = a_arr - .iter() - .zip(b_arr) - .map(|(&a, b)| Fr::from_repr(a).unwrap() + Fr::from_repr(b).unwrap()); + let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| { + Fr::from_repr(a).unwrap() + Fr::from_repr(a).unwrap() + Fr::from_repr(b).unwrap() + }); let expected_values: Vec = expected.collect(); - // println!("expected values: {:?}", expected_values); + log::debug!("expected values: {:?}", expected_values[0].as_int()); let non_accelerated_duration = start.elapsed(); for (exp, &res) in expected_values.iter().zip(arr_res) { assert_eq!(res, exp.into_repr()); } - println!("test_add_AVX2_accelerated time: {:?}", avx2_duration); - println!( + log::debug!("test_add_AVX2_accelerated time: {:?}", avx2_duration); + log::debug!( "test_add_Non_accelerated time: {:?}", non_accelerated_duration ); @@ -556,6 +565,7 @@ mod tests { #[test] fn test_mul() { + env_logger::try_init().unwrap_or_default(); let a_arr = test_vals_a(); let b_arr = test_vals_b(); let start = Instant::now(); @@ -564,7 +574,7 @@ mod tests { let packed_res = packed_a * packed_b; let arr_res = packed_res.as_slice(); let avx2_duration = start.elapsed(); - // println!("arr_res: {:?}", arr_res); + // log::debug!("arr_res: {:?}", arr_res); let start = Instant::now(); let expected = a_arr @@ -573,14 +583,14 @@ mod tests { .map(|(&a, b)| Fr::from_repr(a).unwrap() * Fr::from_repr(b).unwrap()); let expected_values: Vec = expected.collect(); let non_accelerated_duration = start.elapsed(); - // println!("expected values: {:?}", expected_values); + log::debug!("expected values: {:?}", expected_values); for (exp, &res) in expected_values.iter().zip(arr_res) { assert_eq!(res, exp.into_repr()); } - println!("test_mul_AVX2_accelerated time: {:?}", avx2_duration); - println!( + log::debug!("test_mul_AVX2_accelerated time: {:?}", avx2_duration); + log::debug!( "test_mul_Non_accelerated time: {:?}", non_accelerated_duration ); @@ -594,7 +604,7 @@ mod tests { let packed_res = packed_a / GoldilocksField([7929433601095175579u64]); let arr_res = packed_res.as_slice(); let avx2_duration = start.elapsed(); - // println!("arr_res: {:?}", arr_res); + // log::debug!("arr_res: {:?}", arr_res); let start = Instant::now(); let expected = a_arr.iter().map(|&a| { @@ -603,14 +613,14 @@ mod tests { }); let expected_values: Vec = expected.collect(); let non_accelerated_duration = start.elapsed(); - // println!("expected values: {:?}", expected_values); + // log::debug!("expected values: {:?}", expected_values); for (exp, &res) in expected_values.iter().zip(arr_res) { assert_eq!(res, exp.into_repr()); } - println!("test_div_AVX2_accelerated time: {:?}", avx2_duration); - println!( + log::debug!("test_div_AVX2_accelerated time: {:?}", avx2_duration); + log::debug!( "test_div_Non_accelerated time: {:?}", non_accelerated_duration ); @@ -624,7 +634,7 @@ mod tests { let packed_res = packed_a.square(); let arr_res = packed_res.as_slice(); let avx2_duration = start.elapsed(); - // println!("arr_res: {:?}", arr_res); + // log::debug!("arr_res: {:?}", arr_res); let start = Instant::now(); let mut expected_values = Vec::new(); @@ -640,12 +650,12 @@ mod tests { } } let non_accelerated_duration = start.elapsed(); - // println!("expected values: {:?}", expected_values); + // log::debug!("expected values: {:?}", expected_values); for (exp, &res) in expected_values.iter().zip(arr_res) { assert_eq!(res, exp.into_repr()); } - println!("test_square_AVX2_accelerated time: {:?}", avx2_duration); - println!( + log::debug!("test_square_AVX2_accelerated time: {:?}", avx2_duration); + log::debug!( "test_square_Non_accelerated time: {:?}", non_accelerated_duration ); @@ -659,20 +669,20 @@ mod tests { let packed_res = -packed_a; let arr_res = packed_res.as_slice(); let avx2_duration = start.elapsed(); - // println!("arr_res: {:?}", arr_res); + // log::debug!("arr_res: {:?}", arr_res); let start = Instant::now(); let expected = a_arr.iter().map(|&a| -Fr::from_repr(a).unwrap()); let expected_values: Vec = expected.collect(); let non_accelerated_duration = start.elapsed(); - // println!("expected values: {:?}", expected_values); + // log::debug!("expected values: {:?}", expected_values); for (exp, &res) in expected_values.iter().zip(arr_res) { assert_eq!(res, exp.into_repr()); } - println!("test_neg_AVX2_accelerated time: {:?}", avx2_duration); - println!( + log::debug!("test_neg_AVX2_accelerated time: {:?}", avx2_duration); + log::debug!( "test_neg_Non_accelerated time: {:?}", non_accelerated_duration ); @@ -688,7 +698,7 @@ mod tests { let packed_res = packed_a - packed_b; let arr_res = packed_res.as_slice(); let avx2_duration = start.elapsed(); - // println!("arr_res: {:?}", arr_res); + // log::debug!("arr_res: {:?}", arr_res); let start = Instant::now(); let expected = a_arr @@ -697,14 +707,14 @@ mod tests { .map(|(&a, b)| Fr::from_repr(a).unwrap() - Fr::from_repr(b).unwrap()); let expected_values: Vec = expected.collect(); let non_accelerated_duration = start.elapsed(); - // println!("expected values: {:?}", expected_values); + // log::debug!("expected values: {:?}", expected_values); for (exp, &res) in expected_values.iter().zip(arr_res) { assert_eq!(res, exp.into_repr()); } - println!("test_sub_AVX2_accelerated time: {:?}", avx2_duration); - println!( + log::debug!("test_sub_AVX2_accelerated time: {:?}", avx2_duration); + log::debug!( "test_sub_Non_accelerated time: {:?}", non_accelerated_duration ); diff --git a/algebraic/src/arch/x86_64/avx512_field_gl.rs b/algebraic/src/arch/x86_64/avx512_field_gl.rs index fe9e8942..84d93dbf 100644 --- a/algebraic/src/arch/x86_64/avx512_field_gl.rs +++ b/algebraic/src/arch/x86_64/avx512_field_gl.rs @@ -5,6 +5,7 @@ //! RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo build --release use crate::ff::*; use crate::field_gl::{Fr, FrRepr as GoldilocksField}; +use crate::packed::PackedField; use core::arch::x86_64::*; use core::fmt; use core::fmt::{Debug, Formatter}; @@ -34,27 +35,36 @@ impl Avx512GoldilocksField { unsafe { transmute(*self) } } #[inline] - pub fn from_slice(slice: &[GoldilocksField]) -> &Self { + pub fn square(&self) -> Avx512GoldilocksField { + Self::new(unsafe { square(self.get()) }) + } +} + +unsafe impl PackedField for Avx512GoldilocksField { + const WIDTH: usize = 8; + + type Scalar = GoldilocksField; + + const ZEROS: Self = Self([GoldilocksField([0]); 8]); + const ONES: Self = Self([GoldilocksField([1]); 8]); + #[inline] + fn from_slice(slice: &[GoldilocksField]) -> &Self { assert_eq!(slice.len(), WIDTH); unsafe { &*slice.as_ptr().cast() } } #[inline] - pub fn from_slice_mut(slice: &mut [GoldilocksField]) -> &mut Self { + fn from_slice_mut(slice: &mut [GoldilocksField]) -> &mut Self { assert_eq!(slice.len(), WIDTH); unsafe { &mut *slice.as_mut_ptr().cast() } } #[inline] - pub fn as_slice(&self) -> &[GoldilocksField] { + fn as_slice(&self) -> &[GoldilocksField] { &self.0[..] } #[inline] - pub fn as_slice_mut(&mut self) -> &mut [GoldilocksField] { + fn as_slice_mut(&mut self) -> &mut [GoldilocksField] { &mut self.0[..] } - #[inline] - pub fn square(&self) -> Avx512GoldilocksField { - Self::new(unsafe { square(self.get()) }) - } #[inline] fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) { @@ -114,7 +124,7 @@ impl Debug for Avx512GoldilocksField { impl Default for Avx512GoldilocksField { #[inline] fn default() -> Self { - Self([GoldilocksField::from(0); 8]) + Self::ZEROS } } @@ -397,6 +407,7 @@ mod tests { use super::Avx512GoldilocksField; use crate::ff::*; use crate::field_gl::{Fr, FrRepr as GoldilocksField}; + use crate::packed::PackedField; use std::time::Instant; fn test_vals_a() -> [GoldilocksField; 8] { diff --git a/algebraic/src/lib.rs b/algebraic/src/lib.rs index 5806fec3..80b8d24a 100644 --- a/algebraic/src/lib.rs +++ b/algebraic/src/lib.rs @@ -1,6 +1,6 @@ #![allow(clippy::unit_arg)] - -// #![feature(stdsimd)] +#![feature(stdsimd)] +#![feature(const_trait_impl)] #[macro_use] extern crate serde; @@ -13,7 +13,7 @@ extern crate num_bigint; extern crate num_traits; extern crate rand; -// pub mod arch; +pub mod arch; pub mod circom_circuit; pub mod errors; @@ -24,13 +24,15 @@ pub mod witness; pub mod utils; +pub mod packable; +pub mod packed; + pub use bellman_ce::pairing::ff; pub use ff::*; pub use franklin_crypto::bellman as bellman_ce; #[cfg(test)] mod field_gl_test; -// mod packed; #[cfg(target_arch = "wasm32")] extern crate wasm_bindgen; diff --git a/algebraic/src/packable.rs b/algebraic/src/packable.rs index c6f8cd36..6aa4e72b 100644 --- a/algebraic/src/packable.rs +++ b/algebraic/src/packable.rs @@ -1,15 +1,18 @@ +use crate::ff::PrimeFieldRepr; use crate::packed::PackedField; -use crate::types::Field; /// Points us to the default packing for a particular field. There may me multiple choices of /// PackedField for a particular Field (e.g. every Field is also a PackedField), but this is the /// recommended one. The recommended packing varies by target_arch and target_feature. -pub trait Packable: Field { +pub trait Packable: PrimeFieldRepr { type Packing: PackedField; } -impl Packable for F { - default type Packing = Self; +impl Packable for F +where + F: PrimeFieldRepr + PackedField, +{ + type Packing = Self; } #[cfg(all( @@ -23,8 +26,8 @@ impl Packable for F { target_feature = "avx512vl" )) ))] -impl Packable for crate::goldilocks_field::GoldilocksField { - type Packing = crate::arch::x86_64::avx2_goldilocks_field::Avx2GoldilocksField; +impl Packable for crate::field_gl::FrRepr { + type Packing = crate::arch::x86_64::avx2_field_gl::Avx2GoldilocksField; } #[cfg(all( @@ -35,6 +38,6 @@ impl Packable for crate::goldilocks_field::GoldilocksField { target_feature = "avx512f", target_feature = "avx512vl" ))] -impl Packable for crate::goldilocks_field::GoldilocksField { - type Packing = crate::arch::x86_64::avx512_goldilocks_field::Avx512GoldilocksField; +impl Packable for crate::field_gl::FrRepr { + type Packing = crate::arch::x86_64::avx512_field_gl::Avx512GoldilocksField; } diff --git a/algebraic/src/packed.rs b/algebraic/src/packed.rs index 0236962d..e4cebb2b 100644 --- a/algebraic/src/packed.rs +++ b/algebraic/src/packed.rs @@ -1,11 +1,9 @@ use core::fmt::Debug; -use core::iter::{Product, Sum}; +// use core::iter::{Product, Sum}; +use crate::ff::PrimeFieldRepr; use core::ops::{Add, AddAssign, Div, Mul, MulAssign, Neg, Sub, SubAssign}; use core::slice; -// use crate::ops::Square; -use crate::ff::*; - /// # Safety /// - WIDTH is assumed to be a power of 2. /// - If P implements PackedField then P must be castable to/from [P::Scalar; P::WIDTH] without UB. @@ -27,20 +25,20 @@ pub unsafe trait PackedField: + MulAssign // + Square + Neg - + Product + // + Product + Send + Sub + Sub + SubAssign + SubAssign - + Sum + // + Sum + Sync where Self::Scalar: Add, Self::Scalar: Mul, Self::Scalar: Sub, { - type Scalar: Field; + type Scalar: PrimeFieldRepr; const WIDTH: usize; const ZEROS: Self; @@ -93,35 +91,4 @@ where unsafe { slice::from_raw_parts_mut(buf_ptr, n) } } - fn doubles(&self) -> Self { - *self * Self::Scalar::TWO - } -} - -unsafe impl PackedField for F { - type Scalar = Self; - - const WIDTH: usize = 1; - const ZEROS: Self = F::ZERO; - const ONES: Self = F::ONE; - - fn from_slice(slice: &[Self::Scalar]) -> &Self { - &slice[0] - } - fn from_slice_mut(slice: &mut [Self::Scalar]) -> &mut Self { - &mut slice[0] - } - fn as_slice(&self) -> &[Self::Scalar] { - slice::from_ref(self) - } - fn as_slice_mut(&mut self) -> &mut [Self::Scalar] { - slice::from_mut(self) - } - - fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) { - match block_len { - 1 => (*self, other), - _ => panic!("unsupported block length"), - } - } } diff --git a/starky/src/arch/mod.rs b/starky/src/arch/mod.rs new file mode 100644 index 00000000..832557ef --- /dev/null +++ b/starky/src/arch/mod.rs @@ -0,0 +1,2 @@ +#[cfg(target_arch = "x86_64")] +pub mod x86_64; diff --git a/starky/src/arch/x86_64/avx2_poseidon_gl.rs b/starky/src/arch/x86_64/avx2_poseidon_gl.rs new file mode 100644 index 00000000..81a8c525 --- /dev/null +++ b/starky/src/arch/x86_64/avx2_poseidon_gl.rs @@ -0,0 +1,529 @@ +#![allow(non_snake_case)] +use crate::constant::POSEIDON_CONSTANTS_OPT_AVX2; +use crate::poseidon_constants_avx as constants; +use algebraic::arch::x86_64::avx2_field_gl::Avx2GoldilocksField; +use algebraic::packed::PackedField; +use core::arch::x86_64::*; +//use core::mem; +use plonky::field_gl::Fr as FGL; +use plonky::field_gl::FrRepr; +//use plonky::Field; +use plonky::PrimeField; + +#[derive(Debug)] +pub struct ConstantsAvx2 { + pub c: Vec, + pub m: Vec, + pub p: Vec, + pub s: Vec, + pub n_rounds_f: usize, + pub n_rounds_p: usize, +} + +pub fn load_constants_avx2() -> ConstantsAvx2 { + let (c_str, m_str, p_str, s_str) = constants::constants(); + let mut c: Vec = Vec::new(); + for v1 in c_str { + c.push(FrRepr([v1])); + } + let mut m: Vec = Vec::new(); + for v1 in m_str { + m.push(FrRepr([v1])); + } + + let mut p: Vec = Vec::new(); + for v1 in p_str { + p.push(FrRepr([v1])); + } + + let mut s: Vec = Vec::new(); + for v1 in s_str { + s.push(FrRepr([v1])); + } + + ConstantsAvx2 { + c, + m, + p, + s, + n_rounds_f: 8, + n_rounds_p: 22, + } +} + +pub struct Poseidon; + +impl Default for Poseidon { + fn default() -> Self { + Self::new() + } +} + +#[inline] +unsafe fn spmv_avx_4x12( + r: &mut Avx2GoldilocksField, + st0: Avx2GoldilocksField, + st1: Avx2GoldilocksField, + st2: Avx2GoldilocksField, + m: Vec, +) { + let m = Avx2GoldilocksField::pack_slice(&m); + *r = (st0 * m[0]) + (st1 * m[1]) + (st2 * m[2]) +} + +impl Poseidon { + pub fn new() -> Poseidon { + Self {} + } + + // #[inline(always)] + // unsafe fn _extract_u64s_from_m256i(value: __m256i) -> [u64; 4] { + // mem::transmute(value) + // } + + #[inline(always)] + fn pow7(x: &mut Avx2GoldilocksField) { + let aux = *x; + *x = x.square(); + *x *= aux; + *x = x.square(); + *x *= aux; + } + + #[inline(always)] + fn pow7_triple( + st0: &mut Avx2GoldilocksField, + st1: &mut Avx2GoldilocksField, + st2: &mut Avx2GoldilocksField, + ) { + let aux0 = *st0; + let aux1 = *st1; + let aux2 = *st2; + *st0 = st0.square(); + *st1 = st1.square(); + *st2 = st2.square(); + *st0 *= aux0; + *st1 *= aux1; + *st2 *= aux2; + *st0 = st0.square(); + *st1 = st1.square(); + *st2 = st2.square(); + *st0 *= aux0; + *st1 *= aux1; + *st2 *= aux2; + } + + #[inline(always)] + fn add_avx( + st0: &mut Avx2GoldilocksField, + st1: &mut Avx2GoldilocksField, + st2: &mut Avx2GoldilocksField, + c: Vec, + ) { + let c = Avx2GoldilocksField::pack_slice(&c); + *st0 = *st0 + c[0]; + *st1 = *st1 + c[1]; + *st2 = *st2 + c[2]; + } + + #[inline(always)] + fn mult_add_avx( + st0: &mut Avx2GoldilocksField, + st1: &mut Avx2GoldilocksField, + st2: &mut Avx2GoldilocksField, + s0: Avx2GoldilocksField, + s: Vec, + ) { + let s = Avx2GoldilocksField::pack_slice(&s); + *st0 = *st0 + s[0] * s0; + *st1 = *st1 + s[1] * s0; + *st2 = *st2 + s[2] * s0; + } + + #[inline(always)] + unsafe fn mmult_avx( + st0: &mut Avx2GoldilocksField, + st1: &mut Avx2GoldilocksField, + st2: &mut Avx2GoldilocksField, + p: Vec, + ) { + let mut tmp0 = Avx2GoldilocksField::ZEROS; + let mut tmp1 = Avx2GoldilocksField::ZEROS; + let mut tmp2 = Avx2GoldilocksField::ZEROS; + Self::mmult_avx_4x12(&mut tmp0, *st0, *st1, *st2, p[0..48].to_vec()); + Self::mmult_avx_4x12(&mut tmp1, *st0, *st1, *st2, p[48..96].to_vec()); + Self::mmult_avx_4x12(&mut tmp2, *st0, *st1, *st2, p[96..144].to_vec()); + *st0 = tmp0; + *st1 = tmp1; + *st2 = tmp2; + } + + // Dense matrix-vector product + #[inline] + unsafe fn mmult_avx_4x12( + tmp: &mut Avx2GoldilocksField, + st0: Avx2GoldilocksField, + st1: Avx2GoldilocksField, + st2: Avx2GoldilocksField, + m: Vec, + ) { + let mut r0 = Avx2GoldilocksField::ZEROS; + let mut r1 = Avx2GoldilocksField::ZEROS; + let mut r2 = Avx2GoldilocksField::ZEROS; + let mut r3 = Avx2GoldilocksField::ZEROS; + spmv_avx_4x12(&mut r0, st0, st1, st2, m[0..12].to_vec()); + spmv_avx_4x12(&mut r1, st0, st1, st2, m[12..24].to_vec()); + spmv_avx_4x12(&mut r2, st0, st1, st2, m[24..36].to_vec()); + spmv_avx_4x12(&mut r3, st0, st1, st2, m[36..48].to_vec()); + // Transpose: transform de 4x4 matrix stored in rows r0...r3 to the columns c0...c3 + let t0 = _mm256_permute2f128_si256(r0.get(), r2.get(), 0b00100000); + let t1 = _mm256_permute2f128_si256(r1.get(), r3.get(), 0b00100000); + let t2 = _mm256_permute2f128_si256(r0.get(), r2.get(), 0b00110001); + let t3 = _mm256_permute2f128_si256(r1.get(), r3.get(), 0b00110001); + let c0 = Avx2GoldilocksField::new(_mm256_castpd_si256(_mm256_unpacklo_pd( + _mm256_castsi256_pd(t0), + _mm256_castsi256_pd(t1), + ))); + let c1 = Avx2GoldilocksField::new(_mm256_castpd_si256(_mm256_unpackhi_pd( + _mm256_castsi256_pd(t0), + _mm256_castsi256_pd(t1), + ))); + let c2 = Avx2GoldilocksField::new(_mm256_castpd_si256(_mm256_unpacklo_pd( + _mm256_castsi256_pd(t2), + _mm256_castsi256_pd(t3), + ))); + let c3 = Avx2GoldilocksField::new(_mm256_castpd_si256(_mm256_unpackhi_pd( + _mm256_castsi256_pd(t2), + _mm256_castsi256_pd(t3), + ))); + // Add columns to obtain result + *tmp = c0 + c1 + c2 + c3; + } + + #[inline(always)] + unsafe fn mmult_avx_8( + st0: &mut Avx2GoldilocksField, + st1: &mut Avx2GoldilocksField, + st2: &mut Avx2GoldilocksField, + m: Vec, + ) { + let mut tmp0 = Avx2GoldilocksField::ZEROS; + let mut tmp1 = Avx2GoldilocksField::ZEROS; + let mut tmp2 = Avx2GoldilocksField::ZEROS; + Self::mmult_avx_4x12_8(&mut tmp0, *st0, *st1, *st2, m[0..48].to_vec()); + Self::mmult_avx_4x12_8(&mut tmp1, *st0, *st1, *st2, m[48..96].to_vec()); + Self::mmult_avx_4x12_8(&mut tmp2, *st0, *st1, *st2, m[96..144].to_vec()); + *st0 = tmp0; + *st1 = tmp1; + *st2 = tmp2; + } + + // Dense matrix-vector product + #[inline] + unsafe fn mmult_avx_4x12_8( + tmp: &mut Avx2GoldilocksField, + st0: Avx2GoldilocksField, + st1: Avx2GoldilocksField, + st2: Avx2GoldilocksField, + m: Vec, + ) { + let mut r0 = Avx2GoldilocksField::ZEROS; + let mut r1 = Avx2GoldilocksField::ZEROS; + let mut r2 = Avx2GoldilocksField::ZEROS; + let mut r3 = Avx2GoldilocksField::ZEROS; + Self::spmv_avx_4x12_8(&mut r0, st0, st1, st2, m[0..12].to_vec()); + Self::spmv_avx_4x12_8(&mut r1, st0, st1, st2, m[12..24].to_vec()); + Self::spmv_avx_4x12_8(&mut r2, st0, st1, st2, m[24..36].to_vec()); + Self::spmv_avx_4x12_8(&mut r3, st0, st1, st2, m[36..48].to_vec()); + // Transpose: transform de 4x4 matrix stored in rows r0...r3 to the columns c0...c3 + let t0 = _mm256_permute2f128_si256(r0.get(), r2.get(), 0b00100000); + let t1 = _mm256_permute2f128_si256(r1.get(), r3.get(), 0b00100000); + let t2 = _mm256_permute2f128_si256(r0.get(), r2.get(), 0b00110001); + let t3 = _mm256_permute2f128_si256(r1.get(), r3.get(), 0b00110001); + let c0 = Avx2GoldilocksField::new(_mm256_castpd_si256(_mm256_unpacklo_pd( + _mm256_castsi256_pd(t0), + _mm256_castsi256_pd(t1), + ))); + let c1 = Avx2GoldilocksField::new(_mm256_castpd_si256(_mm256_unpackhi_pd( + _mm256_castsi256_pd(t0), + _mm256_castsi256_pd(t1), + ))); + let c2 = Avx2GoldilocksField::new(_mm256_castpd_si256(_mm256_unpacklo_pd( + _mm256_castsi256_pd(t2), + _mm256_castsi256_pd(t3), + ))); + let c3 = Avx2GoldilocksField::new(_mm256_castpd_si256(_mm256_unpackhi_pd( + _mm256_castsi256_pd(t2), + _mm256_castsi256_pd(t3), + ))); + // Add columns to obtain result + *tmp = c0 + c1 + c2 + c3; + } + + #[inline] + unsafe fn spmv_avx_4x12_8( + r: &mut Avx2GoldilocksField, + st0: Avx2GoldilocksField, + st1: Avx2GoldilocksField, + st2: Avx2GoldilocksField, + m: Vec, + ) { + let m = Avx2GoldilocksField::pack_slice(&m); + let mut c0_h = Avx2GoldilocksField::ZEROS; + let mut c0_l = Avx2GoldilocksField::ZEROS; + let mut c1_h = Avx2GoldilocksField::ZEROS; + let mut c1_l = Avx2GoldilocksField::ZEROS; + let mut c2_h = Avx2GoldilocksField::ZEROS; + let mut c2_l = Avx2GoldilocksField::ZEROS; + Self::mult_avx_72(&mut c0_h, &mut c0_l, st0, m[0]); + Self::mult_avx_72(&mut c1_h, &mut c1_l, st1, m[1]); + Self::mult_avx_72(&mut c2_h, &mut c2_l, st2, m[2]); + let c_h = c0_h + c1_h + c2_h; + let c_l = c0_l + c1_l + c2_l; + *r = Avx2GoldilocksField::reduce(c_h.get(), c_l.get()) + } + + #[inline] + unsafe fn mult_avx_72( + c_h: &mut Avx2GoldilocksField, + c_l: &mut Avx2GoldilocksField, + a: Avx2GoldilocksField, + b: Avx2GoldilocksField, + ) { + // Obtain a_h in the lower 32 bits + let a_h = _mm256_srli_epi64(a.get(), 32); + //__m256i a_h = _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(a))); + + // c = (a_h+a_l)*(b_l)=a_h*b_l+a_l*b_l=c_hl+c_ll + // note: _mm256_mul_epu32 uses only the lower 32bits of each chunk so a=a_l and b=b_l + let c_hl = _mm256_mul_epu32(a_h, b.get()); + let c_ll = _mm256_mul_epu32(a.get(), b.get()); + + // Bignum addition + // Ranges: c_hl[95:32], c_ll[63:0] + // parts that intersect must be added + + // LOW PART: + // 1: r0 = c_hl + c_ll_h + // does not overflow: c_hl <= (2^32-1)*(2^8-1)< 2^40 + // c_ll_h <= 2^32-1 + // c_hl + c_ll_h <= 2^41 + let c_ll_h = _mm256_srli_epi64(c_ll, 32); + let r0 = _mm256_add_epi64(c_hl, c_ll_h); + + // 2: c_l = r0_l | c_ll_l + let r0_l = _mm256_slli_epi64(r0, 32); + //__m256i r0_l = _mm256_castps_si256(_mm256_moveldup_ps(_mm256_castsi256_ps(r0))); + *c_l = Avx2GoldilocksField::new(_mm256_blend_epi32(c_ll, r0_l, 0xaa)); + // HIGH PART: c_h = r0_h + *c_h = Avx2GoldilocksField::new(_mm256_srli_epi64(r0, 32)); + } + + pub fn hash(&self, inp: &Vec, init_state: &[FGL], out: usize) -> Result, String> { + unsafe { self.hash_inner(inp, init_state, out) } + } + + unsafe fn hash_inner( + &self, + inp: &Vec, + init_state: &[FGL], + out: usize, + ) -> Result, String> { + if inp.len() != 8 { + return Err(format!("Wrong inputs length {} != 8", inp.len(),)); + } + if init_state.len() != 4 { + return Err(format!("Capacity inputs length {} != 4", init_state.len(),)); + } + let t = 12; + let n_rounds_f = POSEIDON_CONSTANTS_OPT_AVX2.n_rounds_f; + let n_rounds_p = POSEIDON_CONSTANTS_OPT_AVX2.n_rounds_p; + let C = &POSEIDON_CONSTANTS_OPT_AVX2.c; + let S = &POSEIDON_CONSTANTS_OPT_AVX2.s; + let M = &POSEIDON_CONSTANTS_OPT_AVX2.m; + let P = &POSEIDON_CONSTANTS_OPT_AVX2.p; + + let mut _state = vec![FGL::ZERO; t]; + _state[0..8].clone_from_slice(inp); + _state[8..].clone_from_slice(init_state); + + let state: Vec<_> = _state.iter().map(|x| x.into_repr()).collect(); + let mut state_vec = state.to_vec(); + let st = Avx2GoldilocksField::pack_slice_mut(&mut state_vec); + let mut st0 = st[0]; + let mut st1 = st[1]; + let mut st2 = st[2]; + + Self::add_avx(&mut st0, &mut st1, &mut st2, (&C[0..12]).to_vec()); + for r in 0..(n_rounds_f / 2 - 1) { + Self::pow7_triple(&mut st0, &mut st1, &mut st2); + Self::add_avx( + &mut st0, + &mut st1, + &mut st2, + (&C[(r + 1) * 12..((r + 1) * 12 + 12)]).to_vec(), + ); + Self::mmult_avx_8(&mut st0, &mut st1, &mut st2, (&M[0..144]).to_vec()); + } + Self::pow7_triple(&mut st0, &mut st1, &mut st2); + Self::add_avx(&mut st0, &mut st1, &mut st2, (&C[48..60]).to_vec()); + Self::mmult_avx(&mut st0, &mut st1, &mut st2, (&P[0..144]).to_vec()); + + for r in 0..n_rounds_p { + let st0_slice = st0.as_slice_mut(); + let mut s_arr = { [st0_slice[0], FrRepr([0]), FrRepr([0]), FrRepr([0])] }; + let mut _st0 = Avx2GoldilocksField::from_slice_mut(&mut s_arr); + + Self::pow7(&mut _st0); + let c_arr = { [C[(4 + 1) * 12 + r], FrRepr([0]), FrRepr([0]), FrRepr([0])] }; + let c = Avx2GoldilocksField::from_slice(&c_arr); + *_st0 = *_st0 + *c; + let st0_slice = st0.as_slice_mut(); + st0_slice[0] = _st0.as_slice_mut()[0]; + + let mut tmp = Avx2GoldilocksField::ZEROS; + spmv_avx_4x12( + &mut tmp, + st0, + st1, + st2, + S[12 * 2 * r..(12 * 2 * r + 12)].to_vec(), + ); + let tmp_slice = tmp.as_slice_mut(); + let sum = FGL::from_repr(tmp_slice[0]).unwrap() + + FGL::from_repr(tmp_slice[1]).unwrap() + + FGL::from_repr(tmp_slice[2]).unwrap() + + FGL::from_repr(tmp_slice[3]).unwrap(); + + let tmp_arr = { + [ + _st0.as_slice_mut()[0], + _st0.as_slice_mut()[0], + _st0.as_slice_mut()[0], + _st0.as_slice_mut()[0], + ] + }; + let s0 = Avx2GoldilocksField::from_slice(&tmp_arr); + Self::mult_add_avx( + &mut st0, + &mut st1, + &mut st2, + *s0, + (&S[(12 * (2 * r + 1))..(12 * (2 * r + 2))]).to_vec(), + ); + + let st0_slice = st0.as_slice_mut(); + st0_slice[0] = sum.into_repr(); + } + + for r in 0..(n_rounds_f / 2 - 1) { + Self::pow7_triple(&mut st0, &mut st1, &mut st2); + Self::add_avx( + &mut st0, + &mut st1, + &mut st2, + (&C[((n_rounds_f / 2 + 1) * t + n_rounds_p + r * t) + ..((n_rounds_f / 2 + 1) * t + n_rounds_p + r * t + 12)]) + .to_vec(), + ); + Self::mmult_avx_8(&mut st0, &mut st1, &mut st2, (&M[0..144]).to_vec()); + } + Self::pow7_triple(&mut st0, &mut st1, &mut st2); + Self::mmult_avx(&mut st0, &mut st1, &mut st2, (&M[0..144]).to_vec()); + + let st0_slice = st0.as_slice(); + + let mut result_vec: Vec = Vec::new(); + result_vec.extend(st0_slice.iter().map(|&repr| FGL::from_repr(repr).unwrap())); + + Ok(result_vec[..out].to_vec()) + } +} + +#[cfg(test)] +mod tests { + use crate::arch::x86_64::avx2_poseidon_gl::*; + use algebraic::arch::x86_64::avx2_field_gl::Avx2GoldilocksField; + use algebraic::packed::PackedField; + use plonky::field_gl::Fr as FGL; + use plonky::PrimeField; + + #[test] + fn test_poseidon_opt_hash_all_0_avx() { + let poseidon = Poseidon::new(); + let input = vec![FGL::ZERO; 8]; + let state = vec![FGL::ZERO; 4]; + let res = poseidon.hash(&input, &state, 4).unwrap(); + let expected = vec![ + FGL::from(0x3c18a9786cb0b359u64), + FGL::from(0xc4055e3364a246c3u64), + FGL::from(0x7953db0ab48808f4u64), + FGL::from(0xc71603f33a1144cau64), + ]; + assert_eq!(res, expected); + } + + #[test] + fn test_poseidon_opt_hash_1_11_avx() { + let poseidon = Poseidon::new(); + let input = (0u64..8).map(FGL::from).collect::>(); + let state = (8u64..12).map(FGL::from).collect::>(); + let res = poseidon.hash(&input, &state, 4).unwrap(); + let expected = vec![ + FGL::from(0xd64e1e3efc5b8e9eu64), + FGL::from(0x53666633020aaa47u64), + FGL::from(0xd40285597c6a8825u64), + FGL::from(0x613a4f81e81231d2u64), + ]; + assert_eq!(res, expected); + } + + #[test] + fn test_poseidon_opt_hash_all_neg_1_avx() { + let poseidon = Poseidon::new(); + let init = FGL::ZERO - FGL::ONE; + let input = vec![init; 8]; + let state = vec![init; 4]; + let res = poseidon.hash(&input, &state, 4).unwrap(); + let expected = vec![ + FGL::from(0xbe0085cfc57a8357u64), + FGL::from(0xd95af71847d05c09u64), + FGL::from(0xcf55a13d33c1c953u64), + FGL::from(0x95803a74f4530e82u64), + ]; + assert_eq!(res, expected); + } + + #[test] + fn test_spmv_avx_4x12() { + let mut out = Avx2GoldilocksField::ZEROS; + let in0 = Avx2GoldilocksField::from_slice(&[ + FrRepr([18446744069414584320]), + FrRepr([18446744069414584320]), + FrRepr([18446744069414584320]), + FrRepr([18446744069414584320]), + ]); + let in1 = Avx2GoldilocksField::from_slice(&[ + FrRepr([18446744069414584320]), + FrRepr([18446744069414584320]), + FrRepr([18446744069414584320]), + FrRepr([18446744069414584320]), + ]); + let in2 = Avx2GoldilocksField::from_slice(&[ + FrRepr([18446744069414584320]), + FrRepr([18446744069414584320]), + FrRepr([18446744069414584320]), + FrRepr([18446744069414584320]), + ]); + + let in12 = vec![FrRepr([18446744069414584320]); 12]; + unsafe { + spmv_avx_4x12(&mut out, *in0, *in1, *in2, in12); + }; + let tmp_slice = out.as_slice_mut(); + let _sum = FGL::from_repr(tmp_slice[0]).unwrap() + + FGL::from_repr(tmp_slice[1]).unwrap() + + FGL::from_repr(tmp_slice[2]).unwrap() + + FGL::from_repr(tmp_slice[3]).unwrap(); + } +} diff --git a/starky/src/arch/x86_64/avx512_poseidon_gl.rs b/starky/src/arch/x86_64/avx512_poseidon_gl.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/starky/src/arch/x86_64/avx512_poseidon_gl.rs @@ -0,0 +1 @@ + diff --git a/starky/src/arch/x86_64/mod.rs b/starky/src/arch/x86_64/mod.rs new file mode 100644 index 00000000..f3314f29 --- /dev/null +++ b/starky/src/arch/x86_64/mod.rs @@ -0,0 +1,20 @@ +#[cfg(all( + target_feature = "avx2", + not(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )) +))] +pub mod avx2_poseidon_gl; + +#[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" +))] +pub mod avx512_poseidon_gl; diff --git a/starky/src/constant.rs b/starky/src/constant.rs index 551493a8..56df8b42 100644 --- a/starky/src/constant.rs +++ b/starky/src/constant.rs @@ -1,4 +1,6 @@ #![allow(non_snake_case)] +#[cfg(target_feature = "avx2")] +use crate::arch::x86_64::avx2_poseidon_gl::{load_constants_avx2, ConstantsAvx2}; use crate::field_bls12381::Fr as Fr_bls12381; use crate::field_bn128::Fr as Fr_bn128; use crate::poseidon_bls12381::load_constants as load_constants_bls12381; @@ -66,6 +68,13 @@ lazy_static::lazy_static! { }; } +#[cfg(target_feature = "avx2")] +lazy_static::lazy_static! { + pub static ref POSEIDON_CONSTANTS_OPT_AVX2: ConstantsAvx2 = { + load_constants_avx2() + }; +} + pub const MIN_OPS_PER_THREAD: usize = 1 << 12; pub const MAX_OPS_PER_THREAD: usize = 1 << 18; pub const GLOBAL_L1: &str = "Global.L1"; diff --git a/starky/src/lib.rs b/starky/src/lib.rs index 078cc6dc..0c5d8fcd 100644 --- a/starky/src/lib.rs +++ b/starky/src/lib.rs @@ -1,5 +1,8 @@ #![allow(clippy::needless_range_loop)] #![allow(dead_code)] +#![feature(stdsimd)] +#![feature(const_trait_impl)] + pub mod polsarray; mod polutils; pub mod stark_verifier_circom; @@ -7,6 +10,8 @@ pub mod stark_verifier_circom_bn128; pub mod traits; pub mod types; +pub mod arch; + mod compressor12; pub use compressor12::*; @@ -19,6 +24,7 @@ mod poseidon_bn128; mod poseidon_bn128_constants; mod poseidon_bn128_constants_opt; pub mod poseidon_bn128_opt; +mod poseidon_constants_avx; mod poseidon_constants_opt; pub mod poseidon_opt; diff --git a/starky/src/linearhash.rs b/starky/src/linearhash.rs index 8eef3416..26465564 100644 --- a/starky/src/linearhash.rs +++ b/starky/src/linearhash.rs @@ -1,5 +1,8 @@ #![allow(non_snake_case)] +#[cfg(target_feature = "avx2")] +use crate::arch::x86_64::avx2_poseidon_gl::Poseidon; use crate::errors::Result; +#[cfg(not(target_feature = "avx2"))] use crate::poseidon_opt::Poseidon; use crate::traits::MTNodeType; use crate::ElementDigest; diff --git a/starky/src/merklehash.rs b/starky/src/merklehash.rs index fdbb0737..12b0f43f 100644 --- a/starky/src/merklehash.rs +++ b/starky/src/merklehash.rs @@ -1,9 +1,12 @@ #![allow(dead_code)] +#[cfg(target_feature = "avx2")] +use crate::arch::x86_64::avx2_poseidon_gl::Poseidon; use crate::constant::{get_max_workers, MAX_OPS_PER_THREAD, MIN_OPS_PER_THREAD}; use crate::digest::ElementDigest; use crate::errors::{EigenError, Result}; use crate::f3g::F3G; use crate::linearhash::LinearHash; +#[cfg(not(target_feature = "avx2"))] use crate::poseidon_opt::Poseidon; use crate::traits::MTNodeType; use crate::traits::MerkleTree; @@ -276,6 +279,7 @@ mod tests { use crate::traits::MTNodeType; use crate::traits::MerkleTree; use plonky::field_gl::Fr as FGL; + use std::time::Instant; #[test] fn test_merklehash_gl_simple() { @@ -289,11 +293,13 @@ mod tests { cols[i * n_pols + j] = FGL::from((i + j * 1000) as u64); } } - + let start = Instant::now(); let mut tree = MerkleTreeGL::new(); tree.merkelize(cols, n_pols, n).unwrap(); let (v, mp) = tree.get_group_proof(idx).unwrap(); let root = tree.root(); + let duration = start.elapsed(); + println!("time: {:?}", duration); let re = root.as_elements(); let expected = vec![ FGL::from(11508832812350783315u64), diff --git a/starky/src/poseidon_constants_avx.rs b/starky/src/poseidon_constants_avx.rs new file mode 100644 index 00000000..ae066e99 --- /dev/null +++ b/starky/src/poseidon_constants_avx.rs @@ -0,0 +1,811 @@ +#[allow(clippy::type_complexity)] +pub fn constants() -> (Vec, Vec, Vec, Vec) { + let c_str: Vec = vec![ + 0xb585f766f2144405, + 0x7746a55f43921ad7, + 0xb2fb0d31cee799b4, + 0xf6760a4803427d7, + 0xe10d666650f4e012, + 0x8cae14cb07d09bf1, + 0xd438539c95f63e9f, + 0xef781c7ce35b4c3d, + 0xcdc4a239b0c44426, + 0x277fa208bf337bff, + 0xe17653a29da578a1, + 0xc54302f225db2c76, + 0xac6c9c2b4418dd61, + 0xe0888eb1e8a01286, + 0x813dbe952b98904e, + 0xcc3033609c9cf175, + 0x72cebc82a59c0f82, + 0x8150d8525753e741, + 0xb1122c74b268d66e, + 0x7c6ddd482375aa2, + 0xa4dd6f1ef49fb6af, + 0xd33b0d5b4f7ccfe5, + 0xc523112247209124, + 0x464804200134c32d, + 0xcd09dea180de4f2c, + 0xadb069225c93e4e6, + 0xbf01209b8a7c8534, + 0xb1eb37d319913823, + 0xdadf943b8d3e5a0d, + 0x6d15f3cb7a3520ba, + 0xf07af62b134ef181, + 0x568355076c6b0de6, + 0x31ca4bf93cab68b8, + 0xfbad37a125735ba, + 0x9d3a9caaf1ac9e0a, + 0x4f265810f020c095, + 0x6a84c9524e81a8bc, + 0x68ba410537925c79, + 0x422604631b34b07a, + 0x28e3a001f62f8290, + 0x3adfdccb8f734d41, + 0x73503e539baec66a, + 0xe8c1fd0142d9849c, + 0xe204ac13660546c5, + 0x8e2bb3ea97a40c53, + 0xac2800d1bf56548c, + 0x9494dca005d180d0, + 0xf36e1d066383ef53, + 0x8aa35b97a0e03c04, + 0xcf42a59addbd1f0c, + 0xa43ace89f8fdbd79, + 0x37585d8c243870c, + 0x4ab94ee3e26596fe, + 0xcee3abbb50d57b23, + 0xac91a7101a5ec55b, + 0x9173aa8462280d2d, + 0xaec1ca46ccb95105, + 0x57b2f2845db61e4a, + 0x95704158500c90c6, + 0x66e023b0e6c9df5f, + 0x315f63f4fec360ba, + 0xf3009795713abcf1, + 0xf4decc3fb00765ee, + 0x32620ac918682d50, + 0x49717d63a5fc742e, + 0x153516f22014ea2d, + 0xcc316380a2761fe4, + 0x2e49b3f7076d203d, + 0x44ac3e9bf0a2dc89, + 0x49d1e388d8e35c, + 0x53ec867cb39989fa, + 0xd2c9bcc8d65f5a62, + 0xc0cc930ee8540455, + 0x40651e0872505e8, + 0x168973b2ebafbe6c, + 0x9c7eecb3b40581c2, + 0x389473bcdfca97a2, + 0xb1cb0b3abe9753ad, + 0x41afceccffdb18e6, + 0x7bf841e237ccd6c9, + 0x6082a3f101fb888, + 0x8c1a39196f4163cc, + 0xb56664760c1c9476, + 0x2a02ac020d1eb5a3, + 0x6a9d48e8aa83605d, + 0x8a0d2f5c4c9c51b2, + 0x75fc65575b284ad4, + 0xadaedf7d1ce2a8dd, + 0x235bc889cc83968e, + 0xa8c30cf1781738f5, + 0x546b2a846753bcf8, + 0x9b68e8c06c04bd25, + 0x3fdf80794ebb443b, + 0x92ca132a9bec5a45, + 0x76133eecfd9bd1ff, + 0x3fb0fd5381054812, + 0xf15925978dbd52ff, + 0x2ee289ac37f0e879, + 0xd8af8654e9a2e659, + 0x8595bbd7f34c5e8a, + 0x206ddbf781e47b2, + 0xe101a767854a2f97, + 0xf4d4f0a01072c996, + 0x197aec2894aab642, + 0x8d0c3911220db49b, + 0xa62a8bad609227ca, + 0x1e4813a7e7b9cbce, + 0x6b547528731244eb, + 0xd08e48512bfea84e, + 0xb2920c88d3885857, + 0x1f0cd5d7a309fcc2, + 0x99a0ea0842fdb4fb, + 0xc227210554b6c53d, + 0x70e5269708f6f3a9, + 0xbe8f71c8c98bb3bd, + 0xf96fb39adc4baaf6, + 0x7f9a7555c60fc6c7, + 0xccaa5446d71fe6a5, + ]; + let m_str: Vec = vec![ + 0x19, 0xf, 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0x27, 0x12, 0x22, 0x14, 0x14, 0x11, 0xf, 0x29, + 0x10, 0x2, 0x1c, 0xd, 0xd, 0x27, 0x12, 0x22, 0x22, 0x14, 0x11, 0xf, 0x29, 0x10, 0x2, 0x1c, + 0xd, 0xd, 0x27, 0x12, 0x12, 0x22, 0x14, 0x11, 0xf, 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0x27, + 0x27, 0x12, 0x22, 0x14, 0x11, 0xf, 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0xd, 0x27, 0x12, 0x22, + 0x14, 0x11, 0xf, 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0xd, 0x27, 0x12, 0x22, 0x14, 0x11, 0xf, + 0x29, 0x10, 0x2, 0x1c, 0x1c, 0xd, 0xd, 0x27, 0x12, 0x22, 0x14, 0x11, 0xf, 0x29, 0x10, 0x2, + 0x2, 0x1c, 0xd, 0xd, 0x27, 0x12, 0x22, 0x14, 0x11, 0xf, 0x29, 0x10, 0x10, 0x2, 0x1c, 0xd, + 0xd, 0x27, 0x12, 0x22, 0x14, 0x11, 0xf, 0x29, 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0x27, 0x12, + 0x22, 0x14, 0x11, 0xf, 0xf, 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0x27, 0x12, 0x22, 0x14, 0x11, + ]; + let p_str: Vec = vec![ + 0x19, + 0xf, + 0x29, + 0x10, + 0x2, + 0x1c, + 0xd, + 0xd, + 0x27, + 0x12, + 0x22, + 0x14, + 0x78566230aa7cc5d0, + 0x817bd8a7869ed1b5, + 0xd267254bea1097f4, + 0x60c33ebd1e023f0a, + 0xa89ef32ae1462322, + 0x6250f5f176d483e7, + 0xe16a6c1dee3ba347, + 0xec9730136b7c2c05, + 0x3cf7c3a39d94c236, + 0xb4707207455f57e3, + 0xaadb39e83e76a9e0, + 0x32f8ae916e567d39, + 0xdbf23e50005e7f24, + 0x819f2c14a8366b1f, + 0x2dc10fce3233f443, + 0xdb6945a20d277091, + 0x77c1a153e73659e8, + 0xaad1255d46e78f07, + 0x13d316e45539aef4, + 0xe1ecc5c21eec0646, + 0x9e62c7d7b000cb0b, + 0x8e1de42b665c6706, + 0xcd9bf0bd292c5fda, + 0xaadb39e83e76a9e0, + 0xb4a02c5c826d523e, + 0x7a5cf5b7b922e946, + 0xfa9db0de2d852e7a, + 0x383dd77e07998487, + 0x2aec981be4b62ed5, + 0x8a00c7c83c762584, + 0x577e0472764f061d, + 0x956d3c8b5528e064, + 0xe202be7ad7265af6, + 0xee7b04568203481, + 0x8e1de42b665c6706, + 0xb4707207455f57e3, + 0x466d8f66a8f9fed5, + 0x727eca45c8d7bb71, + 0xde2a0516f8c9d943, + 0xe04ea1957ad8305c, + 0xb70fb5f2b4f1f85f, + 0xc734f3829ed30b0c, + 0x226a4dcf5db3316d, + 0x6df1d31fa84398f4, + 0x82178371fa5fff69, + 0xe202be7ad7265af6, + 0x9e62c7d7b000cb0b, + 0x3cf7c3a39d94c236, + 0x68da2264f65ec3e, + 0x605a82c52b5ad2f1, + 0xe6fdf23648931b99, + 0xd499fcbf63fbd266, + 0x7c66d474cd2087cb, + 0xb1a0132288b1619b, + 0x3373035a3ca3dac6, + 0xf4898a1a3554ee49, + 0x6df1d31fa84398f4, + 0x956d3c8b5528e064, + 0xe1ecc5c21eec0646, + 0xec9730136b7c2c05, + 0xb59f9ff0ac6d5d78, + 0x59ccc4d5184bc93a, + 0x3743057c07a5dbfa, + 0x462269e4b04620a5, + 0x39302966be7df654, + 0x88685b4f0798dfd1, + 0x441f3a3747b5adb7, + 0x3373035a3ca3dac6, + 0x226a4dcf5db3316d, + 0x577e0472764f061d, + 0x13d316e45539aef4, + 0xe16a6c1dee3ba347, + 0xcfb03c902d447551, + 0x66c8bab2096cfd38, + 0xa6fdb8ebccc51667, + 0x63c9679d8572a867, + 0xb827c807875511c0, + 0xfc02e869e21b72f8, + 0x88685b4f0798dfd1, + 0xb1a0132288b1619b, + 0xc734f3829ed30b0c, + 0x8a00c7c83c762584, + 0xaad1255d46e78f07, + 0x6250f5f176d483e7, + 0x2044ce14eaf8f5d9, + 0xeb4c0ce280c3e935, + 0x2c4916605e3dea58, + 0x81c44e9699915693, + 0xa4daffb3ffd0e78f, + 0xb827c807875511c0, + 0x39302966be7df654, + 0x7c66d474cd2087cb, + 0xb70fb5f2b4f1f85f, + 0x2aec981be4b62ed5, + 0x77c1a153e73659e8, + 0xa89ef32ae1462322, + 0xfb9373c8481e0f0d, + 0x17f9202c16676b2f, + 0xe95c10ae32e05085, + 0x62ecbe05e02433fc, + 0x81c44e9699915693, + 0x63c9679d8572a867, + 0x462269e4b04620a5, + 0xd499fcbf63fbd266, + 0xe04ea1957ad8305c, + 0x383dd77e07998487, + 0xdb6945a20d277091, + 0x60c33ebd1e023f0a, + 0x72af70cdcb99214f, + 0x9b6e5164ed35d878, + 0x97f9b7d2cfc2ade5, + 0xe95c10ae32e05085, + 0x2c4916605e3dea58, + 0xa6fdb8ebccc51667, + 0x3743057c07a5dbfa, + 0xe6fdf23648931b99, + 0xde2a0516f8c9d943, + 0xfa9db0de2d852e7a, + 0x2dc10fce3233f443, + 0xd267254bea1097f4, + 0xe3ef40eacc6ff78d, + 0x6fadc9347faeee81, + 0x9b6e5164ed35d878, + 0x17f9202c16676b2f, + 0xeb4c0ce280c3e935, + 0x66c8bab2096cfd38, + 0x59ccc4d5184bc93a, + 0x605a82c52b5ad2f1, + 0x727eca45c8d7bb71, + 0x7a5cf5b7b922e946, + 0x819f2c14a8366b1f, + 0x817bd8a7869ed1b5, + ]; + let s_str: Vec = vec![ + 0x19, + 0x3d999c961b7c63b0, + 0x814e82efcd172529, + 0x2421e5d236704588, + 0x887af7d4dd482328, + 0xa5e9c291f6119b27, + 0xbdc52b2676a4b4aa, + 0x64832009d29bcf57, + 0x9c4155174a552cc, + 0x463f9ee03d290810, + 0xc810936e64982542, + 0x43b1c289f7bc3ac, + 0x0, + 0x94877900674181c3, + 0xc6c67cc37a2a2bbd, + 0xd667c2055387940f, + 0xba63a63e94b5ff0, + 0x99460cc41b8f079f, + 0x7ff02375ed524bb3, + 0xea0870b47a8caf0e, + 0xabcad82633b7bc9d, + 0x3b8d135261052241, + 0xfb4515f5e5b0d539, + 0x3ee8011c2b37f77c, + 0x19, + 0x673655aae8be5a8b, + 0xd510fe714f39fa10, + 0x2c68a099b51c9e73, + 0xa667bfa9aa96999d, + 0x4d67e72f063e2108, + 0xf84dde3e6acda179, + 0x40f9cc8c08f80981, + 0x5ead032050097142, + 0x6591b02092d671bb, + 0xe18c71963dd1b7, + 0x8a21bcd24a14218a, + 0x0, + 0xadef3740e71c726, + 0xa37bf67c6f986559, + 0xc6b16f7ed4fa1b00, + 0x6a065da88d8bfc3c, + 0x4cabc0916844b46f, + 0x407faac0f02e78d1, + 0x7a786d9cf0852cf, + 0x42433fb6949a629a, + 0x891682a147ce43b0, + 0x26cfd58e7b003b55, + 0x2bbf0ed7b657acb3, + 0x19, + 0x202800f4addbdc87, + 0xe4b5bdb1cc3504ff, + 0xbe32b32a825596e7, + 0x8e0f68c5dc223b9a, + 0x58022d9e1c256ce3, + 0x584d29227aa073ac, + 0x8b9352ad04bef9e7, + 0xaead42a3f445ecbf, + 0x3c667a1d833a3cca, + 0xda6f61838efa1ffe, + 0xe8f749470bd7c446, + 0x0, + 0x481ac7746b159c67, + 0xe367de32f108e278, + 0x73f260087ad28bec, + 0x5cfc82216bc1bdca, + 0xcaccc870a2663a0e, + 0xdb69cd7b4298c45d, + 0x7bc9e0c57243e62d, + 0x3cc51c5d368693ae, + 0x366b4e8cc068895b, + 0x2bd18715cdabbca4, + 0xa752061c4f33b8cf, + 0x19, + 0xc5b85bab9e5b3869, + 0x45245258aec51cf7, + 0x16e6b8e68b931830, + 0xe2ae0f051418112c, + 0x470e26a0093a65b, + 0x6bef71973a8146ed, + 0x119265be51812daf, + 0xb0be7356254bea2e, + 0x8584defff7589bd7, + 0x3c5fe4aeb1fb52ba, + 0x9e7cd88acf543a5e, + 0x0, + 0xb22d2432b72d5098, + 0x9e18a487f44d2fe4, + 0x4b39e14ce22abd3c, + 0x9e77fde2eb315e0d, + 0xca5e0385fe67014d, + 0xc2cb99bf1b6bddb, + 0x99ec1cd2a4460bfe, + 0x8577a815a2ff843f, + 0x7d80a6b4fd6518a5, + 0xeb6c67123eab62cb, + 0x8f7851650eca21a5, + 0x19, + 0x179be4bba87f0a8c, + 0xacf63d95d8887355, + 0x6696670196b0074f, + 0xd99ddf1fe75085f9, + 0xc2597881fef0283b, + 0xcf48395ee6c54f14, + 0x15226a8e4cd8d3b6, + 0xc053297389af5d3b, + 0x2c08893f0d1580e2, + 0xed3cbcff6fcc5ba, + 0xc82f510ecf81f6d0, + 0x0, + 0x11ba9a1b81718c2a, + 0x9f7d798a3323410c, + 0xa821855c8c1cf5e5, + 0x535e8d6fac0031b2, + 0x404e7c751b634320, + 0xa729353f6e55d354, + 0x4db97d92e58bb831, + 0xb53926c27897bf7d, + 0x965040d52fe115c5, + 0x9565fa41ebd31fd7, + 0xaae4438c877ea8f4, + 0x19, + 0x94b06183acb715cc, + 0x500392ed0d431137, + 0x861cc95ad5c86323, + 0x5830a443f86c4ac, + 0x3b68225874a20a7c, + 0x10b3309838e236fb, + 0x9b77fc8bcd559e2c, + 0xbdecf5e0cb9cb213, + 0x30276f1221ace5fa, + 0x7935dd342764a144, + 0xeac6db520bb03708, + 0x0, + 0x37f4e36af6073c6e, + 0x4edc0918210800e9, + 0xc44998e99eae4188, + 0x9f4310d05d068338, + 0x9ec7fe4350680f29, + 0xc5b2c1fdc0b50874, + 0xa01920c5ef8b2ebe, + 0x59fa6f8bd91d58ba, + 0x8bfc9eb89b515a82, + 0xbe86a7a2555ae775, + 0xcbb8bbaa3810babf, + 0x19, + 0x7186a80551025f8f, + 0x622247557e9b5371, + 0xc4cbe326d1ad9742, + 0x55f1523ac6a23ea2, + 0xa13dfe77a3d52f53, + 0xe30750b6301c0452, + 0x8bd488070a3a32b, + 0xcd800caef5b72ae3, + 0x83329c90f04233ce, + 0xb5b99e6664a0a3ee, + 0x6b0731849e200a7f, + 0x0, + 0x577f9a9e7ee3f9c2, + 0x88c522b949ace7b1, + 0x82f07007c8b72106, + 0x8283d37c6675b50e, + 0x98b074d9bbac1123, + 0x75c56fb7758317c1, + 0xfed24e206052bc72, + 0x26d7c3d1bc07dae5, + 0xf88c5e441e28dbb4, + 0x4fe27f9f96615270, + 0x514d4ba49c2b14fe, + 0x19, + 0xec3fabc192b01799, + 0x382b38cee8ee5375, + 0x3bfb6c3f0e616572, + 0x514abd0cf6c7bc86, + 0x47521b1361dcc546, + 0x178093843f863d14, + 0xad1003c5d28918e7, + 0x738450e42495bc81, + 0xaf947c59af5e4047, + 0x4653fb0685084ef2, + 0x57fde2062ae35bf, + 0x0, + 0xf02a3ac068ee110b, + 0xa3630dafb8ae2d7, + 0xce0dc874eaf9b55c, + 0x9a95f6cff5b55c7e, + 0x626d76abfed00c7b, + 0xa0c1cf1251c204ad, + 0xdaebd3006321052c, + 0x3d4bd48b625a8065, + 0x7f1e584e071f6ed2, + 0x720574f0501caed3, + 0xe3260ba93d23540a, + 0x19, + 0xe376678d843ce55e, + 0x66f3860d7514e7fc, + 0x7817f3dfff8b4ffa, + 0x3929624a9def725b, + 0x126ca37f215a80a, + 0xfce2f5d02762a303, + 0x1bc927375febbad7, + 0x85b481e5243f60bf, + 0x2d3c5f42a39c91a0, + 0x811719919351ae8, + 0xf669de0add993131, + 0x0, + 0xab1cbd41d8c1e335, + 0x9322ed4c0bc2df01, + 0x51c3c0983d4284e5, + 0x94178e291145c231, + 0xfd0f1a973d6b2085, + 0xd427ad96e2b39719, + 0x8a52437fecaac06b, + 0xdc20ee4b8c4c9a80, + 0xa2c98e9549da2100, + 0x1603fe12613db5b6, + 0xe174929433c5505, + 0x19, + 0x7de38bae084da92d, + 0x5b848442237e8a9b, + 0xf6c705da84d57310, + 0x31e6a4bdb6a49017, + 0x889489706e5c5c0f, + 0xe4a205459692a1b, + 0xbac3fa75ee26f299, + 0x5f5894f4057d755e, + 0xb0dc3ecd724bb076, + 0x5e34d8554a6452ba, + 0x4f78fd8c1fdcc5f, + 0x0, + 0x3d4eab2b8ef5f796, + 0xcfff421583896e22, + 0x4143cb32d39ac3d9, + 0x22365051b78a5b65, + 0x6f7fd010d027c9b6, + 0xd9dd36fba77522ab, + 0xa44cf1cb33e37165, + 0x3fc83d3038c86417, + 0xc4588d418e88d270, + 0xce1320f10ab80fe2, + 0xdb5eadbbec18de5d, + 0x19, + 0x4dd19c38779512ea, + 0xdb79ba02704620e9, + 0x92a29a3675a5d2be, + 0xd5177029fe495166, + 0xd32b3298a13330c1, + 0x251c4a3eb2c5f8fd, + 0xe1c48b26e0d98825, + 0x3301d3362a4ffccb, + 0x9bb6c88de8cd178, + 0xdc05b676564f538a, + 0x60192d883e473fee, + 0x0, + 0x1183dfce7c454afd, + 0x21cea4aa3d3ed949, + 0xfce6f70303f2304, + 0x19557d34b55551be, + 0x4c56f689afc5bbc9, + 0xa1e920844334f944, + 0xbad66d423d2ec861, + 0xf318c785dc9e0479, + 0x99e2032e765ddd81, + 0x400ccc9906d66f45, + 0xe1197454db2e0dd9, + 0x19, + 0x16b9774801ac44a0, + 0x3cb8411e786d3c8e, + 0xa86e9cf505072491, + 0x178928152e109ae, + 0x5317b905a6e1ab7b, + 0xda20b3be7f53d59f, + 0xcb97dedecebee9ad, + 0x4bd545218c59f58d, + 0x77dc8d856c05a44a, + 0x87948589e4f243fd, + 0x7e5217af969952c2, + 0x0, + 0x84d1ecc4d53d2ff1, + 0xd8af8b9ceb4e11b6, + 0x335856bb527b52f4, + 0xc756f17fb59be595, + 0xc0654e4ea5553a78, + 0x9e9a46b61f2ea942, + 0x14fc8b5b3b809127, + 0xd7009f0f103be413, + 0x3e0ee7b7a9fb4601, + 0xa74e888922085ed7, + 0xe80a7cde3d4ac526, + 0x19, + 0xbc58987d06a84e4d, + 0xb5d420244c9cae3, + 0xa3c4711b938c02c0, + 0x3aace640a3e03990, + 0x865a0f3249aacd8a, + 0x8d00b2a7dbed06c7, + 0x6eacb905beb7e2f8, + 0x45322b216ec3ec7, + 0xeb9de00d594828e6, + 0x88c5f20df9e5c26, + 0xf555f4112b19781f, + 0x0, + 0x238aa6daa612186d, + 0x9137a5c630bad4b4, + 0xc7db3817870c5eda, + 0x217e4f04e5718dc9, + 0xcae814e2817bd99d, + 0xe3292e7ab770a8ba, + 0x7bb36ef70b6b9482, + 0x3c7835fb85bca2d3, + 0xfe2cdf8ee3c25e86, + 0x61b3915ad7274b20, + 0xeab75ca7c918e4ef, + 0x19, + 0xa8cedbff1813d3a7, + 0x50dcaee0fd27d164, + 0xf1cb02417e23bd82, + 0xfaf322786e2abe8b, + 0x937a4315beb5d9b6, + 0x1b18992921a11d85, + 0x7d66c4368b3c497b, + 0xe7946317a6b4e99, + 0xbe4430134182978b, + 0x3771e82493ab262d, + 0xa671690d8095ce82, + 0x0, + 0xd6e15ffc055e154e, + 0xec67881f381a32bf, + 0xfbb1196092bf409c, + 0xdc9d2e07830ba226, + 0x698ef3245ff7988, + 0x194fae2974f8b576, + 0x7a5d9bea6ca4910e, + 0x7aebfea95ccdd1c9, + 0xf9bd38a67d5f0e86, + 0xfa65539de65492d8, + 0xf0dfcbe7653ff787, + 0x19, + 0xb035585f6e929d9d, + 0xba1579c7e219b954, + 0xcb201cf846db4ba3, + 0x287bf9177372cf45, + 0xa350e4f61147d0a6, + 0xd5d0ecfb50bcff99, + 0x2e166aa6c776ed21, + 0xe1e66c991990e282, + 0x662b329b01e7bb38, + 0x8aa674b36144d9a9, + 0xcbabf78f97f95e65, + 0x0, + 0xbd87ad390420258, + 0xad8617bca9e33c8, + 0xc00ad377a1e2666, + 0xac6fc58b3f0518f, + 0xc0cc8a892cc4173, + 0xc210accb117bc21, + 0xb73630dbb46ca18, + 0xc8be4920cbd4a54, + 0xbfe877a21be1690, + 0xae790559b0ded81, + 0xbf50db2f8d6ce31, + 0x19, + 0xeec24b15a06b53fe, + 0xc8a7aa07c5633533, + 0xefe9c6fa4311ad51, + 0xb9173f13977109a1, + 0x69ce43c9cc94aedc, + 0xecf623c9cd118815, + 0x28625def198c33c7, + 0xccfc5f7de5c3636a, + 0xf5e6c40f1621c299, + 0xcec0e58c34cb64b1, + 0xa868ea113387939f, + 0x0, + 0xcf29427ff7c58, + 0xbd9b3cf49eec8, + 0xd1dc8aa81fb26, + 0xbc792d5c394ef, + 0xd2ae0b2266453, + 0xd413f12c496c1, + 0xc84128cfed618, + 0xdb5ebd48fc0d4, + 0xd1b77326dcb90, + 0xbeb0ccc145421, + 0xd10e5b22b11d1, + 0x19, + 0xd8dddbdc5ce4ef45, + 0xacfc51de8131458c, + 0x146bb3c0fe499ac0, + 0x9e65309f15943903, + 0x80d0ad980773aa70, + 0xf97817d4ddbf0607, + 0xe4626620a75ba276, + 0xdfdc7fd6fc74f66, + 0xf464864ad6f2bb93, + 0x2d55e52a5d44414, + 0xdd8de62487c40925, + 0x0, + 0xe24c99adad8, + 0xcf389ed4bc8, + 0xe580cbf6966, + 0xcde5fd7e04f, + 0xe63628041b3, + 0xe7e81a87361, + 0xdabe78f6d98, + 0xefb14cac554, + 0xe5574743b10, + 0xd05709f42c1, + 0xe4690c96af1, + 0x19, + 0xc15acf44759545a3, + 0xcbfdcf39869719d4, + 0x33f62042e2f80225, + 0x2599c5ead81d8fa3, + 0xb306cb6c1d7c8d0, + 0x658c80d3df3729b1, + 0xe8d1b2b21b41429c, + 0xa1b67f09d4b3ccb8, + 0xe1adf8b84437180, + 0xd593a5e584af47b, + 0xa023d94c56e151c7, + 0x0, + 0xf7157bc98, + 0xe3006d948, + 0xfa65811e6, + 0xe0d127e2f, + 0xfc18bfe53, + 0xfd002d901, + 0xeed6461d8, + 0x1068562754, + 0xfa0236f50, + 0xe3af13ee1, + 0xfa460f6d1, + 0x19, + 0x49026cc3a4afc5a6, + 0xe06dff00ab25b91b, + 0xab38c561e8850ff, + 0x92c3c8275e105eeb, + 0xb65256e546889bd0, + 0x3c0468236ea142f6, + 0xee61766b889e18f2, + 0xa206f41b12c30415, + 0x2fe9d756c9f12d1, + 0xe9633210630cbf12, + 0x1ffea9fe85a0b0b1, + 0x0, + 0x11131738, + 0xf56d588, + 0x11050f86, + 0xf848f4f, + 0x111527d3, + 0x114369a1, + 0x106f2f38, + 0x11e2ca94, + 0x110a29f0, + 0xfa9f5c1, + 0x10f625d1, + 0x19, + 0x81d1ae8cc50240f3, + 0xf4c77a079a4607d7, + 0xed446b2315e3efc1, + 0xb0a6b70915178c3, + 0xb11ff3e089f15d9a, + 0x1d4dba0b7ae9cc18, + 0x65d74e2f43b48d05, + 0xa2df8c6b8ae0804a, + 0xa4e6f0a8c33348a6, + 0xc0a26efc7be5669b, + 0xa6b6582c547d0d60, + 0x0, + 0x11f718, + 0x10b6c8, + 0x134a96, + 0x10cf7f, + 0x124d03, + 0x13f8a1, + 0x117c58, + 0x132c94, + 0x134fc0, + 0x10a091, + 0x128961, + 0x19, + 0x84afc741f1c13213, + 0x2f8f43734fc906f3, + 0xde682d72da0a02d9, + 0xbb005236adb9ef2, + 0x5bdf35c10a8b5624, + 0x739a8a343950010, + 0x52f515f44785cfbc, + 0xcbaf4e5d82856c60, + 0xac9ea09074e3e150, + 0x8f0fa011a2035fb0, + 0x1a37905d8450904a, + 0x0, + 0x1300, + 0x1750, + 0x114e, + 0x131f, + 0x167b, + 0x1371, + 0x1230, + 0x182c, + 0x1368, + 0xf31, + 0x15c9, + 0x19, + 0x3abeb80def61cc85, + 0x9d19c9dd4eac4133, + 0x75a652d9641a985, + 0x9daf69ae1b67e667, + 0x364f71da77920a18, + 0x50bd769f745c95b1, + 0xf223d1180dbbf3fc, + 0x2f885e584e04aa99, + 0xb69a0fa70aea684a, + 0x9584acaa6e062a0, + 0xbc051640145b19b, + 0x0, + 0x14, + 0x22, + 0x12, + 0x27, + 0xd, + 0xd, + 0x1c, + 0x2, + 0x10, + 0x29, + 0xf, + ]; + (c_str, m_str, p_str, s_str) +} diff --git a/test/stark_aggregation.sh b/test/stark_aggregation.sh index 2a3f45e3..79220c26 100755 --- a/test/stark_aggregation.sh +++ b/test/stark_aggregation.sh @@ -1,8 +1,10 @@ -#!/bin/zsh +#!/bin/bash set -ex -## build -cargo build --release +# ## build +# cargo build --release +## build with avx2 feature +RUSTFLAGS="-C target-feature=+avx2" cargo build --release export NODE_OPTIONS="--max-old-space-size=81920"