From 27fc512548a5ab6861af143140de2adce5686680 Mon Sep 17 00:00:00 2001 From: ibmp33 <51358300+ibmp33@users.noreply.github.com> Date: Mon, 18 Dec 2023 08:55:51 +0800 Subject: [PATCH] chore: avx512 acceleration (#169) * chore: avx512 acceleration --------- Co-authored-by: eigmax --- algebraic/Cargo.toml | 1 + algebraic/src/arch/x86_64/avx512_field_gl.rs | 24 +- algebraic/src/lib.rs | 1 + starky/Cargo.toml | 4 + starky/README.md | 7 + starky/benches/merklehash.rs | 4 +- starky/src/arch/x86_64/avx2_poseidon_gl.rs | 2 +- starky/src/arch/x86_64/avx512_poseidon_gl.rs | 588 ++++++ starky/src/constant.rs | 43 +- starky/src/lib.rs | 4 +- starky/src/linearhash.rs | 226 ++- starky/src/merklehash.rs | 197 +- ...ants_avx.rs => poseidon_constants_avx2.rs} | 0 starky/src/poseidon_constants_avx512.rs | 1589 +++++++++++++++++ test/stark_aggregation.sh | 3 + 15 files changed, 2664 insertions(+), 29 deletions(-) rename starky/src/{poseidon_constants_avx.rs => poseidon_constants_avx2.rs} (100%) create mode 100644 starky/src/poseidon_constants_avx512.rs diff --git a/algebraic/Cargo.toml b/algebraic/Cargo.toml index 3ecc1c1b..22d82f7f 100644 --- a/algebraic/Cargo.toml +++ b/algebraic/Cargo.toml @@ -48,3 +48,4 @@ wasm-bindgen-test = "0.3" [features] default = ["franklin-crypto/multicore", "wasmer/default"] wasm = ["wasmer/js-default"] +avx512 = [] \ No newline at end of file diff --git a/algebraic/src/arch/x86_64/avx512_field_gl.rs b/algebraic/src/arch/x86_64/avx512_field_gl.rs index 84d93dbf..ed204e94 100644 --- a/algebraic/src/arch/x86_64/avx512_field_gl.rs +++ b/algebraic/src/arch/x86_64/avx512_field_gl.rs @@ -2,7 +2,7 @@ //! https://github.com/0xPolygonZero/plonky2/blob/main/field/src/arch/x86_64/avx512_goldilocks_field.rs //! //! How to build/run/test: -//! RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo build --release +//! RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo build --features "avx512" --release use crate::ff::*; use crate::field_gl::{Fr, FrRepr as GoldilocksField}; use crate::packed::PackedField; @@ -27,14 +27,22 @@ const WIDTH: usize = 8; impl Avx512GoldilocksField { #[inline] - fn new(x: __m512i) -> Self { + pub fn new(x: __m512i) -> Self { unsafe { transmute(x) } } #[inline] - fn get(&self) -> __m512i { + pub fn get(&self) -> __m512i { unsafe { transmute(*self) } } #[inline] + pub fn interleave2(x: __m512i, y: __m512i) -> (__m512i, __m512i) { + unsafe { interleave2(x, y) } + } + #[inline] + pub fn reduce(x: __m512i, y: __m512i) -> Avx512GoldilocksField { + Self::new(unsafe { reduce128((x, y)) }) + } + #[inline] pub fn square(&self) -> Avx512GoldilocksField { Self::new(unsafe { square(self.get()) }) } @@ -271,7 +279,8 @@ unsafe fn sub_no_double_overflow_64_64(x: __m512i, y: __m512i) -> __m512i { #[inline] unsafe fn add(x: __m512i, y: __m512i) -> __m512i { - add_no_double_overflow_64_64(x, canonicalize(y)) + let res_s = add_no_double_overflow_64_64(x, canonicalize(y)); + canonicalize(res_s) } #[inline] @@ -356,7 +365,8 @@ unsafe fn reduce128(x: (__m512i, __m512i)) -> __m512i { let hi_hi0 = _mm512_srli_epi64::<32>(hi0); let lo1 = sub_no_double_overflow_64_64(lo0, hi_hi0); let t1 = _mm512_mul_epu32(hi0, EPSILON); - let lo2 = add_no_double_overflow_64_64(lo1, t1); + let _lo2 = add_no_double_overflow_64_64(lo1, t1); + let lo2 = canonicalize(_lo2); lo2 } @@ -412,7 +422,7 @@ mod tests { fn test_vals_a() -> [GoldilocksField; 8] { [ - GoldilocksField([14479013849828404771u64]), + GoldilocksField([18446744069414584320u64]), GoldilocksField([9087029921428221768u64]), GoldilocksField([2441288194761790662u64]), GoldilocksField([5646033492608483824u64]), @@ -424,7 +434,7 @@ mod tests { } fn test_vals_b() -> [GoldilocksField; 8] { [ - GoldilocksField([17891926589593242302u64]), + GoldilocksField([18446744069414584320u64]), GoldilocksField([11009798273260028228u64]), GoldilocksField([2028722748960791447u64]), GoldilocksField([7929433601095175579u64]), diff --git a/algebraic/src/lib.rs b/algebraic/src/lib.rs index a16fd6ef..23aa3619 100644 --- a/algebraic/src/lib.rs +++ b/algebraic/src/lib.rs @@ -1,4 +1,5 @@ #![allow(clippy::unit_arg)] +#![cfg_attr(feature = "avx512", feature(stdsimd))] #[macro_use] extern crate serde; diff --git a/starky/Cargo.toml b/starky/Cargo.toml index b129fc24..54fe23df 100644 --- a/starky/Cargo.toml +++ b/starky/Cargo.toml @@ -60,3 +60,7 @@ harness = false [[bench]] name = "poseidon" harness = false + +[features] +default = [] +avx512 = ["algebraic/avx512"] \ No newline at end of file diff --git a/starky/README.md b/starky/README.md index 7f6900dc..c1084593 100644 --- a/starky/README.md +++ b/starky/README.md @@ -102,8 +102,15 @@ Because the nExtBits reaches up to 24 so this step would be very slow. Consider ``` cargo bench --bench merklehash -- --profile-time=5 + +# or with AVX enabled like this + +RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo bench --features avx512 -- merklehash +RUSTFLAGS="-C target-feature=+avx2" cargo bench -- merklehash + ``` + * https://www.jibbow.com/posts/criterion-flamegraphs/ diff --git a/starky/benches/merklehash.rs b/starky/benches/merklehash.rs index f31cbbdb..e051f871 100644 --- a/starky/benches/merklehash.rs +++ b/starky/benches/merklehash.rs @@ -2,7 +2,7 @@ use criterion::*; use plonky::field_gl::Fr as FGL; use rayon::prelude::*; -use starky::merklehash_bn128::MerkleTreeBN128; +use starky::merklehash::MerkleTreeGL; use starky::traits::MerkleTree; mod perf; @@ -12,7 +12,7 @@ fn run_merklehash(pols: Vec) { let n_pols = 10; let now = std::time::Instant::now(); - let mut tree: MerkleTreeBN128 = MerkleTree::new(); + let mut tree: MerkleTreeGL = MerkleTree::new(); tree.merkelize(pols, n_pols, n).unwrap(); log::trace!("time cost: {}", now.elapsed().as_secs()); let (group_elements, mp) = tree.get_group_proof(idx).unwrap(); diff --git a/starky/src/arch/x86_64/avx2_poseidon_gl.rs b/starky/src/arch/x86_64/avx2_poseidon_gl.rs index 184e1879..fc520f0e 100644 --- a/starky/src/arch/x86_64/avx2_poseidon_gl.rs +++ b/starky/src/arch/x86_64/avx2_poseidon_gl.rs @@ -1,6 +1,6 @@ #![allow(non_snake_case)] use crate::constant::POSEIDON_CONSTANTS_OPT_AVX2; -use crate::poseidon_constants_avx as constants; +use crate::poseidon_constants_avx2 as constants; use algebraic::arch::x86_64::avx2_field_gl::Avx2GoldilocksField; use algebraic::packed::PackedField; use core::arch::x86_64::*; diff --git a/starky/src/arch/x86_64/avx512_poseidon_gl.rs b/starky/src/arch/x86_64/avx512_poseidon_gl.rs index 8b137891..c5061958 100644 --- a/starky/src/arch/x86_64/avx512_poseidon_gl.rs +++ b/starky/src/arch/x86_64/avx512_poseidon_gl.rs @@ -1 +1,589 @@ +#![allow(non_snake_case)] +use crate::constant::POSEIDON_CONSTANTS_OPT_AVX512; +use crate::poseidon_constants_avx512 as constants; +use algebraic::arch::x86_64::avx512_field_gl::Avx512GoldilocksField; +use algebraic::packed::PackedField; +use core::arch::x86_64::*; +// use core::mem; +use plonky::field_gl::Fr as FGL; +use plonky::field_gl::FrRepr; +//use plonky::Field; +use plonky::PrimeField; +#[derive(Debug)] +pub struct ConstantsAvx512 { + pub c: Vec, + pub m: Vec, + pub p: Vec, + pub s: Vec, + pub n_rounds_f: usize, + pub n_rounds_p: usize, +} + +pub fn load_constants_avx512() -> ConstantsAvx512 { + let (c_str, m_str, p_str, s_str) = constants::constants(); + let mut c: Vec = Vec::new(); + for v1 in c_str { + c.push(FrRepr([v1])); + } + let mut m: Vec = Vec::new(); + for v1 in m_str { + m.push(FrRepr([v1])); + } + + let mut p: Vec = Vec::new(); + for v1 in p_str { + p.push(FrRepr([v1])); + } + + let mut s: Vec = Vec::new(); + for v1 in s_str { + s.push(FrRepr([v1])); + } + + ConstantsAvx512 { + c, + m, + p, + s, + n_rounds_f: 8, + n_rounds_p: 22, + } +} + +pub struct Poseidon; + +impl Default for Poseidon { + fn default() -> Self { + Self::new() + } +} + +#[inline(always)] +unsafe fn spmv_avx512_4x12( + r: &mut Avx512GoldilocksField, + st0: Avx512GoldilocksField, + st1: Avx512GoldilocksField, + st2: Avx512GoldilocksField, + m: &[FrRepr], +) { + let m = Avx512GoldilocksField::pack_slice(m); + *r = (st0 * m[0]) + (st1 * m[1]) + (st2 * m[2]); +} + +impl Poseidon { + pub fn new() -> Poseidon { + Self {} + } + + // #[inline(always)] + // unsafe fn _extract_u64s_from_m512i(value: __m512i) -> [u64; 8] { + // mem::transmute(value) + // } + + #[inline(always)] + fn pow7(x: &mut Avx512GoldilocksField) { + let aux = *x; + *x = x.square(); + *x *= aux; + *x = x.square(); + *x *= aux; + } + + #[inline(always)] + fn pow7_triple( + st0: &mut Avx512GoldilocksField, + st1: &mut Avx512GoldilocksField, + st2: &mut Avx512GoldilocksField, + ) { + let aux0 = *st0; + let aux1 = *st1; + let aux2 = *st2; + *st0 = st0.square(); + *st1 = st1.square(); + *st2 = st2.square(); + *st0 *= aux0; + *st1 *= aux1; + *st2 *= aux2; + *st0 = st0.square(); + *st1 = st1.square(); + *st2 = st2.square(); + *st0 *= aux0; + *st1 *= aux1; + *st2 *= aux2; + } + + #[inline(always)] + fn add_avx512( + st0: &mut Avx512GoldilocksField, + st1: &mut Avx512GoldilocksField, + st2: &mut Avx512GoldilocksField, + c: &[FrRepr], + ) { + let c = Avx512GoldilocksField::pack_slice(c); + *st0 = *st0 + c[0]; + *st1 = *st1 + c[1]; + *st2 = *st2 + c[2]; + } + + #[inline(always)] + fn mult_add_avx512( + st0: &mut Avx512GoldilocksField, + st1: &mut Avx512GoldilocksField, + st2: &mut Avx512GoldilocksField, + s0: Avx512GoldilocksField, + s: &[FrRepr], + ) { + let s = Avx512GoldilocksField::pack_slice(s); + *st0 = *st0 + s[0] * s0; + *st1 = *st1 + s[1] * s0; + *st2 = *st2 + s[2] * s0; + } + + #[inline(always)] + unsafe fn mmult_avx512( + st0: &mut Avx512GoldilocksField, + st1: &mut Avx512GoldilocksField, + st2: &mut Avx512GoldilocksField, + p: &[FrRepr], + ) { + let mut tmp0 = Avx512GoldilocksField::ZEROS; + let mut tmp1 = Avx512GoldilocksField::ZEROS; + let mut tmp2 = Avx512GoldilocksField::ZEROS; + Self::mmult_avx512_4x12(&mut tmp0, *st0, *st1, *st2, &p[0..96]); + Self::mmult_avx512_4x12(&mut tmp1, *st0, *st1, *st2, &p[96..192]); + Self::mmult_avx512_4x12(&mut tmp2, *st0, *st1, *st2, &p[192..288]); + *st0 = tmp0; + *st1 = tmp1; + *st2 = tmp2; + } + + // Dense matrix-vector product + #[inline(always)] + unsafe fn mmult_avx512_4x12( + tmp: &mut Avx512GoldilocksField, + st0: Avx512GoldilocksField, + st1: Avx512GoldilocksField, + st2: Avx512GoldilocksField, + m: &[FrRepr], + ) { + let mut r0 = Avx512GoldilocksField::ZEROS; + let mut r1 = Avx512GoldilocksField::ZEROS; + let mut r2 = Avx512GoldilocksField::ZEROS; + let mut r3 = Avx512GoldilocksField::ZEROS; + spmv_avx512_4x12(&mut r0, st0, st1, st2, &m[0..24]); + spmv_avx512_4x12(&mut r1, st0, st1, st2, &m[24..48]); + spmv_avx512_4x12(&mut r2, st0, st1, st2, &m[48..72]); + spmv_avx512_4x12(&mut r3, st0, st1, st2, &m[72..96]); + // Transpose: transform de 4x4 matrix stored in rows r0...r3 to the columns c0...c3 + let (t0, t2) = Avx512GoldilocksField::interleave2(r0.get(), r2.get()); + let (t1, t3) = Avx512GoldilocksField::interleave2(r1.get(), r3.get()); + let c0 = Avx512GoldilocksField::new(_mm512_castpd_si512(_mm512_unpacklo_pd( + _mm512_castsi512_pd(t0), + _mm512_castsi512_pd(t1), + ))); + let c1 = Avx512GoldilocksField::new(_mm512_castpd_si512(_mm512_unpackhi_pd( + _mm512_castsi512_pd(t0), + _mm512_castsi512_pd(t1), + ))); + let c2 = Avx512GoldilocksField::new(_mm512_castpd_si512(_mm512_unpacklo_pd( + _mm512_castsi512_pd(t2), + _mm512_castsi512_pd(t3), + ))); + let c3 = Avx512GoldilocksField::new(_mm512_castpd_si512(_mm512_unpackhi_pd( + _mm512_castsi512_pd(t2), + _mm512_castsi512_pd(t3), + ))); + // Add columns to obtain result + *tmp = c0 + c1 + c2 + c3; + } + + #[inline(always)] + unsafe fn mmult_avx512_8( + st0: &mut Avx512GoldilocksField, + st1: &mut Avx512GoldilocksField, + st2: &mut Avx512GoldilocksField, + m: &[FrRepr], + ) { + let mut tmp0 = Avx512GoldilocksField::ZEROS; + let mut tmp1 = Avx512GoldilocksField::ZEROS; + let mut tmp2 = Avx512GoldilocksField::ZEROS; + Self::mmult_avx512_4x12_8(&mut tmp0, *st0, *st1, *st2, &m[0..96]); + Self::mmult_avx512_4x12_8(&mut tmp1, *st0, *st1, *st2, &m[96..192]); + Self::mmult_avx512_4x12_8(&mut tmp2, *st0, *st1, *st2, &m[192..288]); + *st0 = tmp0; + *st1 = tmp1; + *st2 = tmp2; + } + + // Dense matrix-vector product + #[inline(always)] + unsafe fn mmult_avx512_4x12_8( + tmp: &mut Avx512GoldilocksField, + st0: Avx512GoldilocksField, + st1: Avx512GoldilocksField, + st2: Avx512GoldilocksField, + m: &[FrRepr], + ) { + let mut r0 = Avx512GoldilocksField::ZEROS; + let mut r1 = Avx512GoldilocksField::ZEROS; + let mut r2 = Avx512GoldilocksField::ZEROS; + let mut r3 = Avx512GoldilocksField::ZEROS; + Self::spmv_avx512_4x12_8(&mut r0, st0, st1, st2, &m[0..24]); + Self::spmv_avx512_4x12_8(&mut r1, st0, st1, st2, &m[24..48]); + Self::spmv_avx512_4x12_8(&mut r2, st0, st1, st2, &m[48..72]); + Self::spmv_avx512_4x12_8(&mut r3, st0, st1, st2, &m[72..96]); + // Transpose: transform de 4x4 matrix stored in rows r0...r3 to the columns c0...c3 + let (t0, t2) = Avx512GoldilocksField::interleave2(r0.get(), r2.get()); + let (t1, t3) = Avx512GoldilocksField::interleave2(r1.get(), r3.get()); + let c0 = Avx512GoldilocksField::new(_mm512_castpd_si512(_mm512_unpacklo_pd( + _mm512_castsi512_pd(t0), + _mm512_castsi512_pd(t1), + ))); + let c1 = Avx512GoldilocksField::new(_mm512_castpd_si512(_mm512_unpackhi_pd( + _mm512_castsi512_pd(t0), + _mm512_castsi512_pd(t1), + ))); + let c2 = Avx512GoldilocksField::new(_mm512_castpd_si512(_mm512_unpacklo_pd( + _mm512_castsi512_pd(t2), + _mm512_castsi512_pd(t3), + ))); + let c3 = Avx512GoldilocksField::new(_mm512_castpd_si512(_mm512_unpackhi_pd( + _mm512_castsi512_pd(t2), + _mm512_castsi512_pd(t3), + ))); + // Add columns to obtain result + *tmp = c0 + c1 + c2 + c3; + } + + #[inline(always)] + unsafe fn spmv_avx512_4x12_8( + r: &mut Avx512GoldilocksField, + st0: Avx512GoldilocksField, + st1: Avx512GoldilocksField, + st2: Avx512GoldilocksField, + m: &[FrRepr], + ) { + let m = Avx512GoldilocksField::pack_slice(&m); + let mut c0_h = Avx512GoldilocksField::ZEROS; + let mut c0_l = Avx512GoldilocksField::ZEROS; + let mut c1_h = Avx512GoldilocksField::ZEROS; + let mut c1_l = Avx512GoldilocksField::ZEROS; + let mut c2_h = Avx512GoldilocksField::ZEROS; + let mut c2_l = Avx512GoldilocksField::ZEROS; + Self::mult_avx512_72(&mut c0_h, &mut c0_l, st0, m[0]); + Self::mult_avx512_72(&mut c1_h, &mut c1_l, st1, m[1]); + Self::mult_avx512_72(&mut c2_h, &mut c2_l, st2, m[2]); + let c_h = c0_h + c1_h + c2_h; + let c_l = c0_l + c1_l + c2_l; + *r = Avx512GoldilocksField::reduce(c_h.get(), c_l.get()) + } + + #[inline(always)] + unsafe fn mult_avx512_72( + c_h: &mut Avx512GoldilocksField, + c_l: &mut Avx512GoldilocksField, + a: Avx512GoldilocksField, + b: Avx512GoldilocksField, + ) { + // Obtain a_h in the lower 32 bits + let a_h = _mm512_castps_si512(_mm512_movehdup_ps(_mm512_castsi512_ps(a.get()))); + + // c = (a_h+a_l)*(b_l)=a_h*b_l+a_l*b_l=c_hl+c_ll + // note: _mm512_mul_epu32 uses only the lower 32bits of each chunk so a=a_l and b=b_l + let c_hl = _mm512_mul_epu32(a_h, b.get()); + let c_ll = _mm512_mul_epu32(a.get(), b.get()); + + // Bignum addition + // Ranges: c_hl[95:32], c_ll[63:0] + // parts that intersect must be added + + // LOW PART: + // 1: r0 = c_hl + c_ll_h + // does not overflow: c_hl <= (2^32-1)*(2^8-1)< 2^40 + // c_ll_h <= 2^32-1 + // c_hl + c_ll_h <= 2^41 + let c_ll_h = _mm512_srli_epi64(c_ll, 32); + let r0 = _mm512_add_epi64(c_hl, c_ll_h); + + // 2: c_l = r0_l | c_ll_l + const LO_32_BITS_MASK: __mmask16 = 0xAAAA; + let r0_l = _mm512_castps_si512(_mm512_moveldup_ps(_mm512_castsi512_ps(r0))); + *c_l = Avx512GoldilocksField::new(_mm512_mask_blend_epi32(LO_32_BITS_MASK, c_ll, r0_l)); + // HIGH PART: c_h = r0_h + *c_h = Avx512GoldilocksField::new(_mm512_srli_epi64(r0, 32)); + } + + pub fn hash(&self, inp: &Vec, init_state: &[FGL], out: usize) -> Result, String> { + unsafe { self.hash_inner(inp, init_state, out) } + } + + unsafe fn hash_inner( + &self, + inp: &Vec, + init_state: &[FGL], + out: usize, + ) -> Result, String> { + if inp.len() != 16 { + return Err(format!("Wrong inputs length {} != 16", inp.len(),)); + } + if init_state.len() != 8 { + return Err(format!("Capacity inputs length {} != 8", init_state.len(),)); + } + let t = 24; + let n_rounds_f = POSEIDON_CONSTANTS_OPT_AVX512.n_rounds_f; + let n_rounds_p = POSEIDON_CONSTANTS_OPT_AVX512.n_rounds_p; + let C = &POSEIDON_CONSTANTS_OPT_AVX512.c; + let S = &POSEIDON_CONSTANTS_OPT_AVX512.s; + let M = &POSEIDON_CONSTANTS_OPT_AVX512.m; + let P = &POSEIDON_CONSTANTS_OPT_AVX512.p; + + let mut _state = vec![FGL::ZERO; t]; + _state[0..16].clone_from_slice(inp); + _state[16..].clone_from_slice(init_state); + + let state: Vec<_> = _state.iter().map(|x| x.into_repr()).collect(); + let mut state_vec = state.to_vec(); + let st = Avx512GoldilocksField::pack_slice_mut(&mut state_vec); + let mut st0 = st[0]; + let mut st1 = st[1]; + let mut st2 = st[2]; + Self::add_avx512(&mut st0, &mut st1, &mut st2, &C[0..t]); + + for r in 0..(n_rounds_f / 2 - 1) { + Self::pow7_triple(&mut st0, &mut st1, &mut st2); + Self::add_avx512( + &mut st0, + &mut st1, + &mut st2, + &C[(r + 1) * t..((r + 1) * t + t)], + ); + Self::mmult_avx512_8(&mut st0, &mut st1, &mut st2, &M[0..288]); + } + + Self::pow7_triple(&mut st0, &mut st1, &mut st2); + Self::add_avx512(&mut st0, &mut st1, &mut st2, &C[96..120]); + Self::mmult_avx512(&mut st0, &mut st1, &mut st2, &P[0..288]); + + for r in 0..n_rounds_p { + let st0_slice = st0.as_slice_mut(); + let mut s_arr = { + [ + st0_slice[0], + FrRepr([0]), + FrRepr([0]), + FrRepr([0]), + st0_slice[4], + FrRepr([0]), + FrRepr([0]), + FrRepr([0]), + ] + }; + let mut _st0 = Avx512GoldilocksField::from_slice_mut(&mut s_arr); + + Self::pow7(&mut _st0); + let c_arr = { + [ + C[(4 + 1) * t + r], + FrRepr([0]), + FrRepr([0]), + FrRepr([0]), + C[(4 + 1) * t + r], + FrRepr([0]), + FrRepr([0]), + FrRepr([0]), + ] + }; + let c = Avx512GoldilocksField::from_slice(&c_arr); + *_st0 = *_st0 + *c; + let st0_slice = st0.as_slice_mut(); + st0_slice[0] = _st0.as_slice_mut()[0]; + st0_slice[4] = _st0.as_slice_mut()[4]; + + let mut tmp = Avx512GoldilocksField::ZEROS; + spmv_avx512_4x12(&mut tmp, st0, st1, st2, &S[t * 2 * r..(t * 2 * r + t)]); + let tmp_slice = tmp.as_slice_mut(); + let sum0 = FGL::from_repr(tmp_slice[0]).unwrap() + + FGL::from_repr(tmp_slice[1]).unwrap() + + FGL::from_repr(tmp_slice[2]).unwrap() + + FGL::from_repr(tmp_slice[3]).unwrap(); + let sum1 = FGL::from_repr(tmp_slice[4]).unwrap() + + FGL::from_repr(tmp_slice[5]).unwrap() + + FGL::from_repr(tmp_slice[6]).unwrap() + + FGL::from_repr(tmp_slice[7]).unwrap(); + + let tmp_arr = { + [ + _st0.as_slice_mut()[0], + _st0.as_slice_mut()[0], + _st0.as_slice_mut()[0], + _st0.as_slice_mut()[0], + _st0.as_slice_mut()[4], + _st0.as_slice_mut()[4], + _st0.as_slice_mut()[4], + _st0.as_slice_mut()[4], + ] + }; + let s0 = Avx512GoldilocksField::from_slice(&tmp_arr); + Self::mult_add_avx512( + &mut st0, + &mut st1, + &mut st2, + *s0, + &S[(t * (2 * r + 1))..(t * (2 * r + 2))], + ); + + let st0_slice = st0.as_slice_mut(); + st0_slice[0] = sum0.into_repr(); + st0_slice[4] = sum1.into_repr(); + } + + for r in 0..(n_rounds_f / 2 - 1) { + Self::pow7_triple(&mut st0, &mut st1, &mut st2); + Self::add_avx512( + &mut st0, + &mut st1, + &mut st2, + &C[((n_rounds_f / 2 + 1) * t + n_rounds_p + r * t) + ..((n_rounds_f / 2 + 1) * t + n_rounds_p + r * t + t)], + ); + Self::mmult_avx512_8(&mut st0, &mut st1, &mut st2, &M[0..288]); + } + + Self::pow7_triple(&mut st0, &mut st1, &mut st2); + Self::mmult_avx512(&mut st0, &mut st1, &mut st2, &M[0..288]); + + let st0_slice = st0.as_slice(); + + let mut result_vec: Vec = Vec::new(); + result_vec.extend(st0_slice.iter().map(|&repr| FGL::from_repr(repr).unwrap())); + Ok(result_vec[..out].to_vec()) + } +} + +#[cfg(test)] +mod tests { + use crate::arch::x86_64::avx512_poseidon_gl::*; + use algebraic::arch::x86_64::avx512_field_gl::Avx512GoldilocksField; + use algebraic::packed::PackedField; + use plonky::field_gl::Fr as FGL; + use plonky::PrimeField; + use std::time::{Duration, Instant}; + + #[test] + fn test_poseidon_opt_hash_all_0_avx() { + let poseidon = Poseidon::new(); + let input = vec![FGL::ZERO; 16]; + let state = vec![FGL::ZERO; 8]; + + let start = Instant::now(); + let res = poseidon.hash(&input, &state, 8).unwrap(); + let hash_avx512_duration = start.elapsed(); + log::debug!("hash_avx512_duration_0: {:?}", hash_avx512_duration); + + let expected = vec![ + FGL::from(0x3c18a9786cb0b359u64), + FGL::from(0xc4055e3364a246c3u64), + FGL::from(0x7953db0ab48808f4u64), + FGL::from(0xc71603f33a1144cau64), + FGL::from(0x3c18a9786cb0b359u64), + FGL::from(0xc4055e3364a246c3u64), + FGL::from(0x7953db0ab48808f4u64), + FGL::from(0xc71603f33a1144cau64), + ]; + assert_eq!(res, expected); + } + + #[test] + fn test_poseidon_opt_hash_avx512() { + let poseidon = Poseidon::new(); + let input = vec![ + FGL::from(0u64), + FGL::from(1u64), + FGL::from(2u64), + FGL::from(3u64), + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + FGL::from(4u64), + FGL::from(5u64), + FGL::from(6u64), + FGL::from(7u64), + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + ]; + let state = vec![ + FGL::from(8u64), + FGL::from(9u64), + FGL::from(10u64), + FGL::from(11u64), + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + ]; + + let start = Instant::now(); + let res = poseidon.hash(&input, &state, 8).unwrap(); + let hash_avx512_duration = start.elapsed(); + log::debug!("hash_avx512_duration_0: {:?}", hash_avx512_duration); + + let expected = vec![ + FGL::from(0xd64e1e3efc5b8e9eu64), + FGL::from(0x53666633020aaa47u64), + FGL::from(0xd40285597c6a8825u64), + FGL::from(0x613a4f81e81231d2u64), + FGL::from(0x3c18a9786cb0b359u64), + FGL::from(0xc4055e3364a246c3u64), + FGL::from(0x7953db0ab48808f4u64), + FGL::from(0xc71603f33a1144cau64), + ]; + assert_eq!(res, expected); + } + #[test] + fn test_poseidon_opt_hash_1_11_avx512_average() { + let poseidon = Poseidon::new(); + let input = vec![ + FGL::from(0u64), + FGL::from(1u64), + FGL::from(2u64), + FGL::from(3u64), + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + FGL::from(4u64), + FGL::from(5u64), + FGL::from(6u64), + FGL::from(7u64), + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + ]; + let state = vec![ + FGL::from(8u64), + FGL::from(9u64), + FGL::from(10u64), + FGL::from(11u64), + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + FGL::ZERO, + ]; + let mut total_duration = Duration::new(0, 0); + let iterations = 100; + + for _ in 0..iterations { + let start = Instant::now(); + let _res = poseidon.hash(&input, &state, 4).unwrap(); + total_duration += start.elapsed(); + } + + let average_duration = total_duration / iterations; + log::debug!("Average hash_avx512_duration_1: {:?}", average_duration); + } +} diff --git a/starky/src/constant.rs b/starky/src/constant.rs index 56df8b42..b5a93583 100644 --- a/starky/src/constant.rs +++ b/starky/src/constant.rs @@ -1,6 +1,23 @@ #![allow(non_snake_case)] -#[cfg(target_feature = "avx2")] +#[cfg(all( + target_feature = "avx2", + not(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )) +))] use crate::arch::x86_64::avx2_poseidon_gl::{load_constants_avx2, ConstantsAvx2}; +#[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" +))] +use crate::arch::x86_64::avx512_poseidon_gl::{load_constants_avx512, ConstantsAvx512}; use crate::field_bls12381::Fr as Fr_bls12381; use crate::field_bn128::Fr as Fr_bn128; use crate::poseidon_bls12381::load_constants as load_constants_bls12381; @@ -68,13 +85,35 @@ lazy_static::lazy_static! { }; } -#[cfg(target_feature = "avx2")] +#[cfg(all( + target_feature = "avx2", + not(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )) +))] lazy_static::lazy_static! { pub static ref POSEIDON_CONSTANTS_OPT_AVX2: ConstantsAvx2 = { load_constants_avx2() }; } +#[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" +))] +lazy_static::lazy_static! { + pub static ref POSEIDON_CONSTANTS_OPT_AVX512: ConstantsAvx512 = { + load_constants_avx512() + }; +} + pub const MIN_OPS_PER_THREAD: usize = 1 << 12; pub const MAX_OPS_PER_THREAD: usize = 1 << 18; pub const GLOBAL_L1: &str = "Global.L1"; diff --git a/starky/src/lib.rs b/starky/src/lib.rs index b9d13604..8e9861b9 100644 --- a/starky/src/lib.rs +++ b/starky/src/lib.rs @@ -1,5 +1,6 @@ #![allow(clippy::needless_range_loop)] #![allow(dead_code)] +#![cfg_attr(feature = "avx512", feature(stdsimd))] pub mod polsarray; mod polutils; @@ -22,7 +23,8 @@ mod poseidon_bn128; mod poseidon_bn128_constants; mod poseidon_bn128_constants_opt; pub mod poseidon_bn128_opt; -mod poseidon_constants_avx; +mod poseidon_constants_avx2; +mod poseidon_constants_avx512; mod poseidon_constants_opt; pub mod poseidon_opt; diff --git a/starky/src/linearhash.rs b/starky/src/linearhash.rs index 26465564..6b76c688 100644 --- a/starky/src/linearhash.rs +++ b/starky/src/linearhash.rs @@ -1,12 +1,37 @@ #![allow(non_snake_case)] -#[cfg(target_feature = "avx2")] +#[cfg(all( + target_feature = "avx2", + not(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )) +))] use crate::arch::x86_64::avx2_poseidon_gl::Poseidon; +#[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" +))] +use crate::arch::x86_64::avx512_poseidon_gl::Poseidon; use crate::errors::Result; -#[cfg(not(target_feature = "avx2"))] +#[cfg(not(any( + target_feature = "avx2", + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" +)))] use crate::poseidon_opt::Poseidon; use crate::traits::MTNodeType; use crate::ElementDigest; use plonky::field_gl::Fr as FGL; +use rayon::prelude::*; #[derive(Default)] pub struct LinearHash { @@ -17,21 +42,37 @@ impl LinearHash { pub fn new() -> Self { LinearHash { h: Poseidon::new() } } - + #[cfg(not(any( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )))] pub fn hash_element_matrix( &self, vals: &[Vec], batch_size: usize, ) -> Result> { - let mut flatvals: Vec = vec![]; - for col in vals.iter() { - for elem in col.iter() { - flatvals.push(*elem); - } - } + let mut flatvals = vec![FGL::default(); vals.len() * vals[0].len()]; + + flatvals + .par_chunks_mut(vals[0].len()) + .zip(vals.par_iter()) + .for_each(|(flat_chunk, col)| { + flat_chunk.copy_from_slice(col); + }); + self.hash(&flatvals, batch_size) } + #[cfg(not(any( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )))] pub fn hash(&self, flatvals: &[FGL], batch_size: usize) -> Result> { let mut bs = batch_size; if bs == 0 { @@ -68,6 +109,13 @@ impl LinearHash { } } + #[cfg(not(any( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )))] pub fn _hash(&self, flatvals: &[FGL]) -> Result> { let mut st = [FGL::ZERO; 4]; if flatvals.len() <= 4 { @@ -95,6 +143,166 @@ impl LinearHash { } Ok(ElementDigest::<4>::new(&st)) } + + #[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + ))] + pub fn hash_element_matrix( + &self, + vals: &[Vec], + batch_size: usize, + ) -> Result> { + let mut flatvals = vec![FGL::default(); vals.len() * vals[0].len()]; + + flatvals + .par_chunks_mut(vals[0].len()) + .zip(vals.par_iter()) + .for_each(|(flat_chunk, col)| { + flat_chunk.copy_from_slice(col); + }); + + let flatvals_1: Vec = [flatvals.clone(), flatvals.clone()].concat(); + + let hash_result = self.hash(&flatvals_1, batch_size).unwrap()[0]; + Ok(hash_result) + } + + #[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + ))] + pub fn hash(&self, flatvals: &[FGL], batch_size: usize) -> Result<[ElementDigest<4>; 2]> { + let mid = flatvals.len() / 2; + let flatvals0 = &flatvals[..mid]; + let flatvals1 = &flatvals[mid..]; + + let mut bs = batch_size; + if bs == 0 { + bs = core::cmp::max(8, (mid + 3) / 4); + } + + let mut st0 = [FGL::ZERO; 4]; + let mut st1 = [FGL::ZERO; 4]; + if mid <= 4 { + for (i, v) in flatvals0.iter().enumerate() { + st0[i] = *v; + } + for (i, v) in flatvals1.iter().enumerate() { + st1[i] = *v; + } + return Ok([ElementDigest::<4>::new(&st0), ElementDigest::<4>::new(&st1)]); + } + + let hsz = (mid + bs - 1) / bs; + let mut hashes: Vec = vec![FGL::ZERO; hsz * 4 * 2]; + // NOTE flatsvals.len <= hashes.len + hashes + .chunks_mut(8) + .zip(flatvals0.chunks(bs)) + .zip(flatvals1.chunks(bs)) + .for_each(|((outs, chunk0), chunk1)| { + let mut inps = Vec::new(); + inps.extend_from_slice(chunk0); + inps.extend_from_slice(chunk1); + let hash_result = self._hash(inps.as_slice()).unwrap(); + outs.copy_from_slice(&hash_result); + }); + + if hashes.len() <= 8 { + let mid = hashes.len() / 2; + for (i, &v) in hashes.iter().take(mid).enumerate() { + st0[i % 4] = v; + } + for (i, &v) in hashes.iter().skip(mid).enumerate() { + st1[i % 4] = v; + } + return Ok([ElementDigest::<4>::new(&st0), ElementDigest::<4>::new(&st1)]); + } else { + let mut hash: Vec = Vec::with_capacity(hashes.len()); + for chunk in hashes.chunks(8) { + let (first_half, _) = chunk.split_at(4); + hash.extend_from_slice(first_half); + } + for chunk in hashes.chunks(8) { + let (_, second_half) = chunk.split_at(4); + hash.extend_from_slice(second_half); + } + let tmp = self._hash(&hash).unwrap(); + return Ok([ + ElementDigest::<4>::new(&tmp[0..4]), + ElementDigest::<4>::new(&tmp[4..8]), + ]); + } + } + + #[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + ))] + pub fn _hash(&self, flatvals: &[FGL]) -> Result<[FGL; 8]> { + let mid = flatvals.len() / 2; + let flatvals0 = &flatvals[..mid]; + let flatvals1 = &flatvals[mid..]; + let mut st0 = [FGL::ZERO; 4]; + let mut st1 = [FGL::ZERO; 4]; + if mid <= 4 { + for (i, v) in flatvals0.iter().enumerate() { + st0[i] = *v; + } + for (i, v) in flatvals1.iter().enumerate() { + st1[i] = *v; + } + let result = [ + st0[0], st0[1], st0[2], st0[3], st1[0], st1[1], st1[2], st1[3], + ]; + return Ok(result); + } + let mut count = 0; + let mut st = [FGL::ZERO; 8]; + let mut inhashes: Vec = vec![]; + + for v in flatvals0.iter() { + inhashes.push(*v); + if inhashes.len() == 8 { + let start = count * 8; + let mid = start + 4; + let end = start + 8; + let first_half = &flatvals1[start..mid]; + inhashes.splice(4..4, first_half.iter().cloned()); + let second_half = &flatvals1[mid..end]; + inhashes.extend_from_slice(second_half); + let t = self.h.hash(&inhashes, &st, 8).unwrap(); + st.copy_from_slice(&t); + inhashes.clear(); + count += 1; + } + } + + if !inhashes.is_empty() { + while inhashes.len() < 8 { + inhashes.push(FGL::ZERO); + } + inhashes.extend_from_slice(&flatvals1[count * 8..]); + while inhashes.len() < 16 { + inhashes.push(FGL::ZERO); + } + let middle_chunk = inhashes.splice(4..8, vec![]).collect::>(); + inhashes.splice(8..8, middle_chunk.iter().cloned()); + let t = self.h.hash(&inhashes, &st, 8).unwrap(); + st.copy_from_slice(&t); + } + Ok(st) + } } #[cfg(test)] diff --git a/starky/src/merklehash.rs b/starky/src/merklehash.rs index 12b0f43f..dfb22f02 100644 --- a/starky/src/merklehash.rs +++ b/starky/src/merklehash.rs @@ -1,12 +1,30 @@ #![allow(dead_code)] -#[cfg(target_feature = "avx2")] +#[cfg(all( + target_feature = "avx2", + not(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )) +))] use crate::arch::x86_64::avx2_poseidon_gl::Poseidon; use crate::constant::{get_max_workers, MAX_OPS_PER_THREAD, MIN_OPS_PER_THREAD}; use crate::digest::ElementDigest; use crate::errors::{EigenError, Result}; use crate::f3g::F3G; use crate::linearhash::LinearHash; -#[cfg(not(target_feature = "avx2"))] +#[cfg(any( + not(target_feature = "avx2"), + all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + ) +))] use crate::poseidon_opt::Poseidon; use crate::traits::MTNodeType; use crate::traits::MerkleTree; @@ -82,6 +100,13 @@ impl MerkleTreeGL { Ok(()) } + #[cfg(not(any( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )))] fn do_merklize_level( &self, buff_in: &[ElementDigest<4>], @@ -107,6 +132,54 @@ impl MerkleTreeGL { Ok(buff_out64) } + #[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + ))] + fn do_merklize_level( + &self, + buff_in: &[ElementDigest<4>], + _st_i: usize, + _st_n: usize, + ) -> Result>> { + log::trace!( + "merklizing GL hash start.... {}/{}, buff size {}", + _st_i, + _st_n, + buff_in.len() + ); + let n_ops = buff_in.len() / 4; + let mut buff_out64: Vec> = + vec![ElementDigest::<4>::default(); buff_in.len() / 2]; + let process = |chunk: &[ElementDigest<4>], four: &mut [FGL; 16]| { + for (j, item) in chunk.iter().enumerate() { + let one: &[FGL] = item.as_elements(); + four[j * 4..(j + 1) * 4].copy_from_slice(one); + } + self.h.hash(four, 0).unwrap() + }; + + let mut four = [FGL::ZERO; 16]; + if n_ops == 0 { + let hash_result = process(&buff_in[..2], &mut four); + buff_out64[0] = hash_result[0]; + } else { + for i in 0..n_ops { + let hash_result = process(&buff_in[i * 4..i * 4 + 4], &mut four); + buff_out64[i * 2] = hash_result[0]; + buff_out64[i * 2 + 1] = hash_result[1]; + } + if buff_in.len() % 4 != 0 { + let hash_result = process(&buff_in[buff_in.len() - 2..], &mut four); + buff_out64[n_ops * 2] = hash_result[0]; + } + } + Ok(buff_out64) + } + fn merkle_calculate_root_from_proof( &self, mp: &[Vec], @@ -136,6 +209,13 @@ impl MerkleTreeGL { self.merkle_calculate_root_from_proof(mp, next_idx, &next_value, offset + 1) } + #[cfg(not(any( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )))] fn calculate_root_from_group_proof( &self, mp: &[Vec], @@ -145,6 +225,26 @@ impl MerkleTreeGL { let h = self.h.hash(vals, 0)?; self.merkle_calculate_root_from_proof(mp, idx, &h, 0) } + + #[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + ))] + fn calculate_root_from_group_proof( + &self, + mp: &[Vec], + idx: usize, + vals: &[FGL], + ) -> Result> { + let mut vals_0: Vec = Vec::with_capacity(vals.len() * 2); + vals_0.extend_from_slice(vals); + vals_0.extend_from_slice(vals); + let h = self.h.hash(&vals_0, 0)?; + self.merkle_calculate_root_from_proof(mp, idx, &h[0], 0) + } } impl MerkleTree for MerkleTreeGL { @@ -175,6 +275,13 @@ impl MerkleTree for MerkleTreeGL { }); } + #[cfg(not(any( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )))] fn merkelize(&mut self, buff: Vec, width: usize, height: usize) -> Result<()> { let max_workers = get_max_workers(); @@ -234,6 +341,87 @@ impl MerkleTree for MerkleTreeGL { Ok(()) } + #[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + ))] + fn merkelize(&mut self, buff: Vec, width: usize, height: usize) -> Result<()> { + let max_workers = get_max_workers(); + + let mut n_per_thread_f = (height - 1) / max_workers + 1; + + let div = core::cmp::max(width / 8, 1); + let max_corrected = MAX_OPS_PER_THREAD / div; + let min_corrected = MIN_OPS_PER_THREAD / div; + + if n_per_thread_f > max_corrected { + n_per_thread_f = max_corrected; + } + if n_per_thread_f < min_corrected { + n_per_thread_f = min_corrected; + } + + let mut nodes = vec![Self::MTNode::default(); get_n_nodes(height)]; + let now = Instant::now(); + + if !buff.is_empty() { + nodes + .par_chunks_mut(n_per_thread_f) + .zip(buff.par_chunks(n_per_thread_f * width)) + .for_each(|(out, bb)| { + let cur_n = bb.len() / width / 2; + (0..cur_n).for_each(|j| { + let batch = &bb[(j * width * 2)..((j + 1) * width * 2)]; + let hash_result = self.h.hash(batch, 0).unwrap(); + let index = j * 2; + if index < out.len() && index + 1 < out.len() { + out[index] = hash_result[0]; + out[index + 1] = hash_result[1]; + } + }); + if bb.len() % (width * 2) != 0 { + let remaining = &bb[cur_n * width * 2..]; + let mut batch = vec![FGL::ZERO; width * 2]; + batch[..remaining.len()].copy_from_slice(remaining); + batch[remaining.len()..].copy_from_slice(remaining); + let hash_result = self.h.hash(&batch, 0).unwrap(); + out[cur_n * 2] = hash_result[0]; + } + }); + } + + log::trace!("linearhash time cost: {}", now.elapsed().as_secs_f64()); + + // merklize level + self.nodes = nodes; + self.elements = buff; + self.width = width; + self.height = height; + + let mut n64: usize = height; + let mut next_n64: usize = (n64 - 1) / 2 + 1; + let mut p_in: usize = 0; + let mut p_out: usize = p_in + next_n64 * 2; + while n64 > 1 { + let now = Instant::now(); + self.merklize_level(p_in, next_n64, p_out)?; + log::trace!( + "merklize_level {} time cost: {}", + next_n64, + now.elapsed().as_secs_f64() + ); + n64 = next_n64; + next_n64 = (n64 - 1) / 2 + 1; + p_in = p_out; + p_out = p_in + next_n64 * 2; + } + + Ok(()) + } + fn get_element(&self, idx: usize, sub_idx: usize) -> FGL { self.elements[self.width * idx + sub_idx] } @@ -279,7 +467,6 @@ mod tests { use crate::traits::MTNodeType; use crate::traits::MerkleTree; use plonky::field_gl::Fr as FGL; - use std::time::Instant; #[test] fn test_merklehash_gl_simple() { @@ -293,13 +480,10 @@ mod tests { cols[i * n_pols + j] = FGL::from((i + j * 1000) as u64); } } - let start = Instant::now(); let mut tree = MerkleTreeGL::new(); tree.merkelize(cols, n_pols, n).unwrap(); let (v, mp) = tree.get_group_proof(idx).unwrap(); let root = tree.root(); - let duration = start.elapsed(); - println!("time: {:?}", duration); let re = root.as_elements(); let expected = vec![ FGL::from(11508832812350783315u64), @@ -375,7 +559,6 @@ mod tests { pols[i * n_pols + j] = FGL::from((i + j * 1000) as u64); } } - let mut tree = MerkleTreeGL::new(); tree.merkelize(pols, n_pols, n).unwrap(); let (group_elements, mp) = tree.get_group_proof(idx).unwrap(); diff --git a/starky/src/poseidon_constants_avx.rs b/starky/src/poseidon_constants_avx2.rs similarity index 100% rename from starky/src/poseidon_constants_avx.rs rename to starky/src/poseidon_constants_avx2.rs diff --git a/starky/src/poseidon_constants_avx512.rs b/starky/src/poseidon_constants_avx512.rs new file mode 100644 index 00000000..adb5656c --- /dev/null +++ b/starky/src/poseidon_constants_avx512.rs @@ -0,0 +1,1589 @@ +#[allow(clippy::type_complexity)] +pub fn constants() -> (Vec, Vec, Vec, Vec) { + let c_str: Vec = vec![ + 0xb585f766f2144405, + 0x7746a55f43921ad7, + 0xb2fb0d31cee799b4, + 0xf6760a4803427d7, + 0xb585f766f2144405, + 0x7746a55f43921ad7, + 0xb2fb0d31cee799b4, + 0xf6760a4803427d7, + 0xe10d666650f4e012, + 0x8cae14cb07d09bf1, + 0xd438539c95f63e9f, + 0xef781c7ce35b4c3d, + 0xe10d666650f4e012, + 0x8cae14cb07d09bf1, + 0xd438539c95f63e9f, + 0xef781c7ce35b4c3d, + 0xcdc4a239b0c44426, + 0x277fa208bf337bff, + 0xe17653a29da578a1, + 0xc54302f225db2c76, + 0xcdc4a239b0c44426, + 0x277fa208bf337bff, + 0xe17653a29da578a1, + 0xc54302f225db2c76, + 0xac6c9c2b4418dd61, + 0xe0888eb1e8a01286, + 0x813dbe952b98904e, + 0xcc3033609c9cf175, + 0xac6c9c2b4418dd61, + 0xe0888eb1e8a01286, + 0x813dbe952b98904e, + 0xcc3033609c9cf175, + 0x72cebc82a59c0f82, + 0x8150d8525753e741, + 0xb1122c74b268d66e, + 0x7c6ddd482375aa2, + 0x72cebc82a59c0f82, + 0x8150d8525753e741, + 0xb1122c74b268d66e, + 0x7c6ddd482375aa2, + 0xa4dd6f1ef49fb6af, + 0xd33b0d5b4f7ccfe5, + 0xc523112247209124, + 0x464804200134c32d, + 0xa4dd6f1ef49fb6af, + 0xd33b0d5b4f7ccfe5, + 0xc523112247209124, + 0x464804200134c32d, + 0xcd09dea180de4f2c, + 0xadb069225c93e4e6, + 0xbf01209b8a7c8534, + 0xb1eb37d319913823, + 0xcd09dea180de4f2c, + 0xadb069225c93e4e6, + 0xbf01209b8a7c8534, + 0xb1eb37d319913823, + 0xdadf943b8d3e5a0d, + 0x6d15f3cb7a3520ba, + 0xf07af62b134ef181, + 0x568355076c6b0de6, + 0xdadf943b8d3e5a0d, + 0x6d15f3cb7a3520ba, + 0xf07af62b134ef181, + 0x568355076c6b0de6, + 0x31ca4bf93cab68b8, + 0xfbad37a125735ba, + 0x9d3a9caaf1ac9e0a, + 0x4f265810f020c095, + 0x31ca4bf93cab68b8, + 0xfbad37a125735ba, + 0x9d3a9caaf1ac9e0a, + 0x4f265810f020c095, + 0x6a84c9524e81a8bc, + 0x68ba410537925c79, + 0x422604631b34b07a, + 0x28e3a001f62f8290, + 0x6a84c9524e81a8bc, + 0x68ba410537925c79, + 0x422604631b34b07a, + 0x28e3a001f62f8290, + 0x3adfdccb8f734d41, + 0x73503e539baec66a, + 0xe8c1fd0142d9849c, + 0xe204ac13660546c5, + 0x3adfdccb8f734d41, + 0x73503e539baec66a, + 0xe8c1fd0142d9849c, + 0xe204ac13660546c5, + 0x8e2bb3ea97a40c53, + 0xac2800d1bf56548c, + 0x9494dca005d180d0, + 0xf36e1d066383ef53, + 0x8e2bb3ea97a40c53, + 0xac2800d1bf56548c, + 0x9494dca005d180d0, + 0xf36e1d066383ef53, + 0x8aa35b97a0e03c04, + 0xcf42a59addbd1f0c, + 0xa43ace89f8fdbd79, + 0x37585d8c243870c, + 0x8aa35b97a0e03c04, + 0xcf42a59addbd1f0c, + 0xa43ace89f8fdbd79, + 0x37585d8c243870c, + 0x4ab94ee3e26596fe, + 0xcee3abbb50d57b23, + 0xac91a7101a5ec55b, + 0x9173aa8462280d2d, + 0x4ab94ee3e26596fe, + 0xcee3abbb50d57b23, + 0xac91a7101a5ec55b, + 0x9173aa8462280d2d, + 0xaec1ca46ccb95105, + 0x57b2f2845db61e4a, + 0x95704158500c90c6, + 0x66e023b0e6c9df5f, + 0xaec1ca46ccb95105, + 0x57b2f2845db61e4a, + 0x95704158500c90c6, + 0x66e023b0e6c9df5f, + 0x315f63f4fec360ba, + 0xf3009795713abcf1, + 0xf4decc3fb00765ee, + 0x32620ac918682d50, + 0x49717d63a5fc742e, + 0x153516f22014ea2d, + 0xcc316380a2761fe4, + 0x2e49b3f7076d203d, + 0x44ac3e9bf0a2dc89, + 0x49d1e388d8e35c, + 0x53ec867cb39989fa, + 0xd2c9bcc8d65f5a62, + 0xc0cc930ee8540455, + 0x40651e0872505e8, + 0x168973b2ebafbe6c, + 0x9c7eecb3b40581c2, + 0x389473bcdfca97a2, + 0xb1cb0b3abe9753ad, + 0x41afceccffdb18e6, + 0x7bf841e237ccd6c9, + 0x6082a3f101fb888, + 0x8c1a39196f4163cc, + 0xb56664760c1c9476, + 0x2a02ac020d1eb5a3, + 0x6a9d48e8aa83605d, + 0x8a0d2f5c4c9c51b2, + 0xb56664760c1c9476, + 0x2a02ac020d1eb5a3, + 0x6a9d48e8aa83605d, + 0x8a0d2f5c4c9c51b2, + 0x75fc65575b284ad4, + 0xadaedf7d1ce2a8dd, + 0x235bc889cc83968e, + 0xa8c30cf1781738f5, + 0x75fc65575b284ad4, + 0xadaedf7d1ce2a8dd, + 0x235bc889cc83968e, + 0xa8c30cf1781738f5, + 0x546b2a846753bcf8, + 0x9b68e8c06c04bd25, + 0x3fdf80794ebb443b, + 0x92ca132a9bec5a45, + 0x546b2a846753bcf8, + 0x9b68e8c06c04bd25, + 0x3fdf80794ebb443b, + 0x92ca132a9bec5a45, + 0x76133eecfd9bd1ff, + 0x3fb0fd5381054812, + 0xf15925978dbd52ff, + 0x2ee289ac37f0e879, + 0x76133eecfd9bd1ff, + 0x3fb0fd5381054812, + 0xf15925978dbd52ff, + 0x2ee289ac37f0e879, + 0xd8af8654e9a2e659, + 0x8595bbd7f34c5e8a, + 0x206ddbf781e47b2, + 0xe101a767854a2f97, + 0xd8af8654e9a2e659, + 0x8595bbd7f34c5e8a, + 0x206ddbf781e47b2, + 0xe101a767854a2f97, + 0xf4d4f0a01072c996, + 0x197aec2894aab642, + 0x8d0c3911220db49b, + 0xa62a8bad609227ca, + 0xf4d4f0a01072c996, + 0x197aec2894aab642, + 0x8d0c3911220db49b, + 0xa62a8bad609227ca, + 0x1e4813a7e7b9cbce, + 0x6b547528731244eb, + 0xd08e48512bfea84e, + 0xb2920c88d3885857, + 0x1e4813a7e7b9cbce, + 0x6b547528731244eb, + 0xd08e48512bfea84e, + 0xb2920c88d3885857, + 0x1f0cd5d7a309fcc2, + 0x99a0ea0842fdb4fb, + 0xc227210554b6c53d, + 0x70e5269708f6f3a9, + 0x1f0cd5d7a309fcc2, + 0x99a0ea0842fdb4fb, + 0xc227210554b6c53d, + 0x70e5269708f6f3a9, + 0xbe8f71c8c98bb3bd, + 0xf96fb39adc4baaf6, + 0x7f9a7555c60fc6c7, + 0xccaa5446d71fe6a5, + 0xbe8f71c8c98bb3bd, + 0xf96fb39adc4baaf6, + 0x7f9a7555c60fc6c7, + 0xccaa5446d71fe6a5, + ]; + let m_str: Vec = vec![ + 0x19, 0xf, 0x29, 0x10, 0x19, 0xf, 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0x2, 0x1c, 0xd, 0xd, + 0x27, 0x12, 0x22, 0x14, 0x27, 0x12, 0x22, 0x14, 0x14, 0x11, 0xf, 0x29, 0x14, 0x11, 0xf, + 0x29, 0x10, 0x2, 0x1c, 0xd, 0x10, 0x2, 0x1c, 0xd, 0xd, 0x27, 0x12, 0x22, 0xd, 0x27, 0x12, + 0x22, 0x22, 0x14, 0x11, 0xf, 0x22, 0x14, 0x11, 0xf, 0x29, 0x10, 0x2, 0x1c, 0x29, 0x10, 0x2, + 0x1c, 0xd, 0xd, 0x27, 0x12, 0xd, 0xd, 0x27, 0x12, 0x12, 0x22, 0x14, 0x11, 0x12, 0x22, 0x14, + 0x11, 0xf, 0x29, 0x10, 0x2, 0xf, 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0x27, 0x1c, 0xd, 0xd, + 0x27, 0x27, 0x12, 0x22, 0x14, 0x27, 0x12, 0x22, 0x14, 0x11, 0xf, 0x29, 0x10, 0x11, 0xf, + 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0x2, 0x1c, 0xd, 0xd, 0xd, 0x27, 0x12, 0x22, 0xd, 0x27, + 0x12, 0x22, 0x14, 0x11, 0xf, 0x29, 0x14, 0x11, 0xf, 0x29, 0x10, 0x2, 0x1c, 0xd, 0x10, 0x2, + 0x1c, 0xd, 0xd, 0xd, 0x27, 0x12, 0xd, 0xd, 0x27, 0x12, 0x22, 0x14, 0x11, 0xf, 0x22, 0x14, + 0x11, 0xf, 0x29, 0x10, 0x2, 0x1c, 0x29, 0x10, 0x2, 0x1c, 0x1c, 0xd, 0xd, 0x27, 0x1c, 0xd, + 0xd, 0x27, 0x12, 0x22, 0x14, 0x11, 0x12, 0x22, 0x14, 0x11, 0xf, 0x29, 0x10, 0x2, 0xf, 0x29, + 0x10, 0x2, 0x2, 0x1c, 0xd, 0xd, 0x2, 0x1c, 0xd, 0xd, 0x27, 0x12, 0x22, 0x14, 0x27, 0x12, + 0x22, 0x14, 0x11, 0xf, 0x29, 0x10, 0x11, 0xf, 0x29, 0x10, 0x10, 0x2, 0x1c, 0xd, 0x10, 0x2, + 0x1c, 0xd, 0xd, 0x27, 0x12, 0x22, 0xd, 0x27, 0x12, 0x22, 0x14, 0x11, 0xf, 0x29, 0x14, 0x11, + 0xf, 0x29, 0x29, 0x10, 0x2, 0x1c, 0x29, 0x10, 0x2, 0x1c, 0xd, 0xd, 0x27, 0x12, 0xd, 0xd, + 0x27, 0x12, 0x22, 0x14, 0x11, 0xf, 0x22, 0x14, 0x11, 0xf, 0xf, 0x29, 0x10, 0x2, 0xf, 0x29, + 0x10, 0x2, 0x1c, 0xd, 0xd, 0x27, 0x1c, 0xd, 0xd, 0x27, 0x12, 0x22, 0x14, 0x11, 0x12, 0x22, + 0x14, 0x11, + ]; + let p_str: Vec = vec![ + 0x19, + 0xf, + 0x29, + 0x10, + 0x19, + 0xf, + 0x29, + 0x10, + 0x2, + 0x1c, + 0xd, + 0xd, + 0x2, + 0x1c, + 0xd, + 0xd, + 0x27, + 0x12, + 0x22, + 0x14, + 0x27, + 0x12, + 0x22, + 0x14, + 0x78566230aa7cc5d0, + 0x817bd8a7869ed1b5, + 0xd267254bea1097f4, + 0x60c33ebd1e023f0a, + 0x78566230aa7cc5d0, + 0x817bd8a7869ed1b5, + 0xd267254bea1097f4, + 0x60c33ebd1e023f0a, + 0xa89ef32ae1462322, + 0x6250f5f176d483e7, + 0xe16a6c1dee3ba347, + 0xec9730136b7c2c05, + 0xa89ef32ae1462322, + 0x6250f5f176d483e7, + 0xe16a6c1dee3ba347, + 0xec9730136b7c2c05, + 0x3cf7c3a39d94c236, + 0xb4707207455f57e3, + 0xaadb39e83e76a9e0, + 0x32f8ae916e567d39, + 0x3cf7c3a39d94c236, + 0xb4707207455f57e3, + 0xaadb39e83e76a9e0, + 0x32f8ae916e567d39, + 0xdbf23e50005e7f24, + 0x819f2c14a8366b1f, + 0x2dc10fce3233f443, + 0xdb6945a20d277091, + 0xdbf23e50005e7f24, + 0x819f2c14a8366b1f, + 0x2dc10fce3233f443, + 0xdb6945a20d277091, + 0x77c1a153e73659e8, + 0xaad1255d46e78f07, + 0x13d316e45539aef4, + 0xe1ecc5c21eec0646, + 0x77c1a153e73659e8, + 0xaad1255d46e78f07, + 0x13d316e45539aef4, + 0xe1ecc5c21eec0646, + 0x9e62c7d7b000cb0b, + 0x8e1de42b665c6706, + 0xcd9bf0bd292c5fda, + 0xaadb39e83e76a9e0, + 0x9e62c7d7b000cb0b, + 0x8e1de42b665c6706, + 0xcd9bf0bd292c5fda, + 0xaadb39e83e76a9e0, + 0xb4a02c5c826d523e, + 0x7a5cf5b7b922e946, + 0xfa9db0de2d852e7a, + 0x383dd77e07998487, + 0xb4a02c5c826d523e, + 0x7a5cf5b7b922e946, + 0xfa9db0de2d852e7a, + 0x383dd77e07998487, + 0x2aec981be4b62ed5, + 0x8a00c7c83c762584, + 0x577e0472764f061d, + 0x956d3c8b5528e064, + 0x2aec981be4b62ed5, + 0x8a00c7c83c762584, + 0x577e0472764f061d, + 0x956d3c8b5528e064, + 0xe202be7ad7265af6, + 0xee7b04568203481, + 0x8e1de42b665c6706, + 0xb4707207455f57e3, + 0xe202be7ad7265af6, + 0xee7b04568203481, + 0x8e1de42b665c6706, + 0xb4707207455f57e3, + 0x466d8f66a8f9fed5, + 0x727eca45c8d7bb71, + 0xde2a0516f8c9d943, + 0xe04ea1957ad8305c, + 0x466d8f66a8f9fed5, + 0x727eca45c8d7bb71, + 0xde2a0516f8c9d943, + 0xe04ea1957ad8305c, + 0xb70fb5f2b4f1f85f, + 0xc734f3829ed30b0c, + 0x226a4dcf5db3316d, + 0x6df1d31fa84398f4, + 0xb70fb5f2b4f1f85f, + 0xc734f3829ed30b0c, + 0x226a4dcf5db3316d, + 0x6df1d31fa84398f4, + 0x82178371fa5fff69, + 0xe202be7ad7265af6, + 0x9e62c7d7b000cb0b, + 0x3cf7c3a39d94c236, + 0x82178371fa5fff69, + 0xe202be7ad7265af6, + 0x9e62c7d7b000cb0b, + 0x3cf7c3a39d94c236, + 0x68da2264f65ec3e, + 0x605a82c52b5ad2f1, + 0xe6fdf23648931b99, + 0xd499fcbf63fbd266, + 0x68da2264f65ec3e, + 0x605a82c52b5ad2f1, + 0xe6fdf23648931b99, + 0xd499fcbf63fbd266, + 0x7c66d474cd2087cb, + 0xb1a0132288b1619b, + 0x3373035a3ca3dac6, + 0xf4898a1a3554ee49, + 0x7c66d474cd2087cb, + 0xb1a0132288b1619b, + 0x3373035a3ca3dac6, + 0xf4898a1a3554ee49, + 0x6df1d31fa84398f4, + 0x956d3c8b5528e064, + 0xe1ecc5c21eec0646, + 0xec9730136b7c2c05, + 0x6df1d31fa84398f4, + 0x956d3c8b5528e064, + 0xe1ecc5c21eec0646, + 0xec9730136b7c2c05, + 0xb59f9ff0ac6d5d78, + 0x59ccc4d5184bc93a, + 0x3743057c07a5dbfa, + 0x462269e4b04620a5, + 0xb59f9ff0ac6d5d78, + 0x59ccc4d5184bc93a, + 0x3743057c07a5dbfa, + 0x462269e4b04620a5, + 0x39302966be7df654, + 0x88685b4f0798dfd1, + 0x441f3a3747b5adb7, + 0x3373035a3ca3dac6, + 0x39302966be7df654, + 0x88685b4f0798dfd1, + 0x441f3a3747b5adb7, + 0x3373035a3ca3dac6, + 0x226a4dcf5db3316d, + 0x577e0472764f061d, + 0x13d316e45539aef4, + 0xe16a6c1dee3ba347, + 0x226a4dcf5db3316d, + 0x577e0472764f061d, + 0x13d316e45539aef4, + 0xe16a6c1dee3ba347, + 0xcfb03c902d447551, + 0x66c8bab2096cfd38, + 0xa6fdb8ebccc51667, + 0x63c9679d8572a867, + 0xcfb03c902d447551, + 0x66c8bab2096cfd38, + 0xa6fdb8ebccc51667, + 0x63c9679d8572a867, + 0xb827c807875511c0, + 0xfc02e869e21b72f8, + 0x88685b4f0798dfd1, + 0xb1a0132288b1619b, + 0xb827c807875511c0, + 0xfc02e869e21b72f8, + 0x88685b4f0798dfd1, + 0xb1a0132288b1619b, + 0xc734f3829ed30b0c, + 0x8a00c7c83c762584, + 0xaad1255d46e78f07, + 0x6250f5f176d483e7, + 0xc734f3829ed30b0c, + 0x8a00c7c83c762584, + 0xaad1255d46e78f07, + 0x6250f5f176d483e7, + 0x2044ce14eaf8f5d9, + 0xeb4c0ce280c3e935, + 0x2c4916605e3dea58, + 0x81c44e9699915693, + 0x2044ce14eaf8f5d9, + 0xeb4c0ce280c3e935, + 0x2c4916605e3dea58, + 0x81c44e9699915693, + 0xa4daffb3ffd0e78f, + 0xb827c807875511c0, + 0x39302966be7df654, + 0x7c66d474cd2087cb, + 0xa4daffb3ffd0e78f, + 0xb827c807875511c0, + 0x39302966be7df654, + 0x7c66d474cd2087cb, + 0xb70fb5f2b4f1f85f, + 0x2aec981be4b62ed5, + 0x77c1a153e73659e8, + 0xa89ef32ae1462322, + 0xb70fb5f2b4f1f85f, + 0x2aec981be4b62ed5, + 0x77c1a153e73659e8, + 0xa89ef32ae1462322, + 0xfb9373c8481e0f0d, + 0x17f9202c16676b2f, + 0xe95c10ae32e05085, + 0x62ecbe05e02433fc, + 0xfb9373c8481e0f0d, + 0x17f9202c16676b2f, + 0xe95c10ae32e05085, + 0x62ecbe05e02433fc, + 0x81c44e9699915693, + 0x63c9679d8572a867, + 0x462269e4b04620a5, + 0xd499fcbf63fbd266, + 0x81c44e9699915693, + 0x63c9679d8572a867, + 0x462269e4b04620a5, + 0xd499fcbf63fbd266, + 0xe04ea1957ad8305c, + 0x383dd77e07998487, + 0xdb6945a20d277091, + 0x60c33ebd1e023f0a, + 0xe04ea1957ad8305c, + 0x383dd77e07998487, + 0xdb6945a20d277091, + 0x60c33ebd1e023f0a, + 0x72af70cdcb99214f, + 0x9b6e5164ed35d878, + 0x97f9b7d2cfc2ade5, + 0xe95c10ae32e05085, + 0x72af70cdcb99214f, + 0x9b6e5164ed35d878, + 0x97f9b7d2cfc2ade5, + 0xe95c10ae32e05085, + 0x2c4916605e3dea58, + 0xa6fdb8ebccc51667, + 0x3743057c07a5dbfa, + 0xe6fdf23648931b99, + 0x2c4916605e3dea58, + 0xa6fdb8ebccc51667, + 0x3743057c07a5dbfa, + 0xe6fdf23648931b99, + 0xde2a0516f8c9d943, + 0xfa9db0de2d852e7a, + 0x2dc10fce3233f443, + 0xd267254bea1097f4, + 0xde2a0516f8c9d943, + 0xfa9db0de2d852e7a, + 0x2dc10fce3233f443, + 0xd267254bea1097f4, + 0xe3ef40eacc6ff78d, + 0x6fadc9347faeee81, + 0x9b6e5164ed35d878, + 0x17f9202c16676b2f, + 0xe3ef40eacc6ff78d, + 0x6fadc9347faeee81, + 0x9b6e5164ed35d878, + 0x17f9202c16676b2f, + 0xeb4c0ce280c3e935, + 0x66c8bab2096cfd38, + 0x59ccc4d5184bc93a, + 0x605a82c52b5ad2f1, + 0xeb4c0ce280c3e935, + 0x66c8bab2096cfd38, + 0x59ccc4d5184bc93a, + 0x605a82c52b5ad2f1, + 0x727eca45c8d7bb71, + 0x7a5cf5b7b922e946, + 0x819f2c14a8366b1f, + 0x817bd8a7869ed1b5, + 0x727eca45c8d7bb71, + 0x7a5cf5b7b922e946, + 0x819f2c14a8366b1f, + 0x817bd8a7869ed1b5, + ]; + let s_str: Vec = vec![ + 0x19, + 0x3d999c961b7c63b0, + 0x814e82efcd172529, + 0x2421e5d236704588, + 0x19, + 0x3d999c961b7c63b0, + 0x814e82efcd172529, + 0x2421e5d236704588, + 0x887af7d4dd482328, + 0xa5e9c291f6119b27, + 0xbdc52b2676a4b4aa, + 0x64832009d29bcf57, + 0x887af7d4dd482328, + 0xa5e9c291f6119b27, + 0xbdc52b2676a4b4aa, + 0x64832009d29bcf57, + 0x9c4155174a552cc, + 0x463f9ee03d290810, + 0xc810936e64982542, + 0x43b1c289f7bc3ac, + 0x9c4155174a552cc, + 0x463f9ee03d290810, + 0xc810936e64982542, + 0x43b1c289f7bc3ac, + 0x0, + 0x94877900674181c3, + 0xc6c67cc37a2a2bbd, + 0xd667c2055387940f, + 0x0, + 0x94877900674181c3, + 0xc6c67cc37a2a2bbd, + 0xd667c2055387940f, + 0xba63a63e94b5ff0, + 0x99460cc41b8f079f, + 0x7ff02375ed524bb3, + 0xea0870b47a8caf0e, + 0xba63a63e94b5ff0, + 0x99460cc41b8f079f, + 0x7ff02375ed524bb3, + 0xea0870b47a8caf0e, + 0xabcad82633b7bc9d, + 0x3b8d135261052241, + 0xfb4515f5e5b0d539, + 0x3ee8011c2b37f77c, + 0xabcad82633b7bc9d, + 0x3b8d135261052241, + 0xfb4515f5e5b0d539, + 0x3ee8011c2b37f77c, + 0x19, + 0x673655aae8be5a8b, + 0xd510fe714f39fa10, + 0x2c68a099b51c9e73, + 0x19, + 0x673655aae8be5a8b, + 0xd510fe714f39fa10, + 0x2c68a099b51c9e73, + 0xa667bfa9aa96999d, + 0x4d67e72f063e2108, + 0xf84dde3e6acda179, + 0x40f9cc8c08f80981, + 0xa667bfa9aa96999d, + 0x4d67e72f063e2108, + 0xf84dde3e6acda179, + 0x40f9cc8c08f80981, + 0x5ead032050097142, + 0x6591b02092d671bb, + 0xe18c71963dd1b7, + 0x8a21bcd24a14218a, + 0x5ead032050097142, + 0x6591b02092d671bb, + 0xe18c71963dd1b7, + 0x8a21bcd24a14218a, + 0x0, + 0xadef3740e71c726, + 0xa37bf67c6f986559, + 0xc6b16f7ed4fa1b00, + 0x0, + 0xadef3740e71c726, + 0xa37bf67c6f986559, + 0xc6b16f7ed4fa1b00, + 0x6a065da88d8bfc3c, + 0x4cabc0916844b46f, + 0x407faac0f02e78d1, + 0x7a786d9cf0852cf, + 0x6a065da88d8bfc3c, + 0x4cabc0916844b46f, + 0x407faac0f02e78d1, + 0x7a786d9cf0852cf, + 0x42433fb6949a629a, + 0x891682a147ce43b0, + 0x26cfd58e7b003b55, + 0x2bbf0ed7b657acb3, + 0x42433fb6949a629a, + 0x891682a147ce43b0, + 0x26cfd58e7b003b55, + 0x2bbf0ed7b657acb3, + 0x19, + 0x202800f4addbdc87, + 0xe4b5bdb1cc3504ff, + 0xbe32b32a825596e7, + 0x19, + 0x202800f4addbdc87, + 0xe4b5bdb1cc3504ff, + 0xbe32b32a825596e7, + 0x8e0f68c5dc223b9a, + 0x58022d9e1c256ce3, + 0x584d29227aa073ac, + 0x8b9352ad04bef9e7, + 0x8e0f68c5dc223b9a, + 0x58022d9e1c256ce3, + 0x584d29227aa073ac, + 0x8b9352ad04bef9e7, + 0xaead42a3f445ecbf, + 0x3c667a1d833a3cca, + 0xda6f61838efa1ffe, + 0xe8f749470bd7c446, + 0xaead42a3f445ecbf, + 0x3c667a1d833a3cca, + 0xda6f61838efa1ffe, + 0xe8f749470bd7c446, + 0x0, + 0x481ac7746b159c67, + 0xe367de32f108e278, + 0x73f260087ad28bec, + 0x0, + 0x481ac7746b159c67, + 0xe367de32f108e278, + 0x73f260087ad28bec, + 0x5cfc82216bc1bdca, + 0xcaccc870a2663a0e, + 0xdb69cd7b4298c45d, + 0x7bc9e0c57243e62d, + 0x5cfc82216bc1bdca, + 0xcaccc870a2663a0e, + 0xdb69cd7b4298c45d, + 0x7bc9e0c57243e62d, + 0x3cc51c5d368693ae, + 0x366b4e8cc068895b, + 0x2bd18715cdabbca4, + 0xa752061c4f33b8cf, + 0x3cc51c5d368693ae, + 0x366b4e8cc068895b, + 0x2bd18715cdabbca4, + 0xa752061c4f33b8cf, + 0x19, + 0xc5b85bab9e5b3869, + 0x45245258aec51cf7, + 0x16e6b8e68b931830, + 0x19, + 0xc5b85bab9e5b3869, + 0x45245258aec51cf7, + 0x16e6b8e68b931830, + 0xe2ae0f051418112c, + 0x470e26a0093a65b, + 0x6bef71973a8146ed, + 0x119265be51812daf, + 0xe2ae0f051418112c, + 0x470e26a0093a65b, + 0x6bef71973a8146ed, + 0x119265be51812daf, + 0xb0be7356254bea2e, + 0x8584defff7589bd7, + 0x3c5fe4aeb1fb52ba, + 0x9e7cd88acf543a5e, + 0xb0be7356254bea2e, + 0x8584defff7589bd7, + 0x3c5fe4aeb1fb52ba, + 0x9e7cd88acf543a5e, + 0x0, + 0xb22d2432b72d5098, + 0x9e18a487f44d2fe4, + 0x4b39e14ce22abd3c, + 0x0, + 0xb22d2432b72d5098, + 0x9e18a487f44d2fe4, + 0x4b39e14ce22abd3c, + 0x9e77fde2eb315e0d, + 0xca5e0385fe67014d, + 0xc2cb99bf1b6bddb, + 0x99ec1cd2a4460bfe, + 0x9e77fde2eb315e0d, + 0xca5e0385fe67014d, + 0xc2cb99bf1b6bddb, + 0x99ec1cd2a4460bfe, + 0x8577a815a2ff843f, + 0x7d80a6b4fd6518a5, + 0xeb6c67123eab62cb, + 0x8f7851650eca21a5, + 0x8577a815a2ff843f, + 0x7d80a6b4fd6518a5, + 0xeb6c67123eab62cb, + 0x8f7851650eca21a5, + 0x19, + 0x179be4bba87f0a8c, + 0xacf63d95d8887355, + 0x6696670196b0074f, + 0x19, + 0x179be4bba87f0a8c, + 0xacf63d95d8887355, + 0x6696670196b0074f, + 0xd99ddf1fe75085f9, + 0xc2597881fef0283b, + 0xcf48395ee6c54f14, + 0x15226a8e4cd8d3b6, + 0xd99ddf1fe75085f9, + 0xc2597881fef0283b, + 0xcf48395ee6c54f14, + 0x15226a8e4cd8d3b6, + 0xc053297389af5d3b, + 0x2c08893f0d1580e2, + 0xed3cbcff6fcc5ba, + 0xc82f510ecf81f6d0, + 0xc053297389af5d3b, + 0x2c08893f0d1580e2, + 0xed3cbcff6fcc5ba, + 0xc82f510ecf81f6d0, + 0x0, + 0x11ba9a1b81718c2a, + 0x9f7d798a3323410c, + 0xa821855c8c1cf5e5, + 0x0, + 0x11ba9a1b81718c2a, + 0x9f7d798a3323410c, + 0xa821855c8c1cf5e5, + 0x535e8d6fac0031b2, + 0x404e7c751b634320, + 0xa729353f6e55d354, + 0x4db97d92e58bb831, + 0x535e8d6fac0031b2, + 0x404e7c751b634320, + 0xa729353f6e55d354, + 0x4db97d92e58bb831, + 0xb53926c27897bf7d, + 0x965040d52fe115c5, + 0x9565fa41ebd31fd7, + 0xaae4438c877ea8f4, + 0xb53926c27897bf7d, + 0x965040d52fe115c5, + 0x9565fa41ebd31fd7, + 0xaae4438c877ea8f4, + 0x19, + 0x94b06183acb715cc, + 0x500392ed0d431137, + 0x861cc95ad5c86323, + 0x19, + 0x94b06183acb715cc, + 0x500392ed0d431137, + 0x861cc95ad5c86323, + 0x5830a443f86c4ac, + 0x3b68225874a20a7c, + 0x10b3309838e236fb, + 0x9b77fc8bcd559e2c, + 0x5830a443f86c4ac, + 0x3b68225874a20a7c, + 0x10b3309838e236fb, + 0x9b77fc8bcd559e2c, + 0xbdecf5e0cb9cb213, + 0x30276f1221ace5fa, + 0x7935dd342764a144, + 0xeac6db520bb03708, + 0xbdecf5e0cb9cb213, + 0x30276f1221ace5fa, + 0x7935dd342764a144, + 0xeac6db520bb03708, + 0x0, + 0x37f4e36af6073c6e, + 0x4edc0918210800e9, + 0xc44998e99eae4188, + 0x0, + 0x37f4e36af6073c6e, + 0x4edc0918210800e9, + 0xc44998e99eae4188, + 0x9f4310d05d068338, + 0x9ec7fe4350680f29, + 0xc5b2c1fdc0b50874, + 0xa01920c5ef8b2ebe, + 0x9f4310d05d068338, + 0x9ec7fe4350680f29, + 0xc5b2c1fdc0b50874, + 0xa01920c5ef8b2ebe, + 0x59fa6f8bd91d58ba, + 0x8bfc9eb89b515a82, + 0xbe86a7a2555ae775, + 0xcbb8bbaa3810babf, + 0x59fa6f8bd91d58ba, + 0x8bfc9eb89b515a82, + 0xbe86a7a2555ae775, + 0xcbb8bbaa3810babf, + 0x19, + 0x7186a80551025f8f, + 0x622247557e9b5371, + 0xc4cbe326d1ad9742, + 0x19, + 0x7186a80551025f8f, + 0x622247557e9b5371, + 0xc4cbe326d1ad9742, + 0x55f1523ac6a23ea2, + 0xa13dfe77a3d52f53, + 0xe30750b6301c0452, + 0x8bd488070a3a32b, + 0x55f1523ac6a23ea2, + 0xa13dfe77a3d52f53, + 0xe30750b6301c0452, + 0x8bd488070a3a32b, + 0xcd800caef5b72ae3, + 0x83329c90f04233ce, + 0xb5b99e6664a0a3ee, + 0x6b0731849e200a7f, + 0xcd800caef5b72ae3, + 0x83329c90f04233ce, + 0xb5b99e6664a0a3ee, + 0x6b0731849e200a7f, + 0x0, + 0x577f9a9e7ee3f9c2, + 0x88c522b949ace7b1, + 0x82f07007c8b72106, + 0x0, + 0x577f9a9e7ee3f9c2, + 0x88c522b949ace7b1, + 0x82f07007c8b72106, + 0x8283d37c6675b50e, + 0x98b074d9bbac1123, + 0x75c56fb7758317c1, + 0xfed24e206052bc72, + 0x8283d37c6675b50e, + 0x98b074d9bbac1123, + 0x75c56fb7758317c1, + 0xfed24e206052bc72, + 0x26d7c3d1bc07dae5, + 0xf88c5e441e28dbb4, + 0x4fe27f9f96615270, + 0x514d4ba49c2b14fe, + 0x26d7c3d1bc07dae5, + 0xf88c5e441e28dbb4, + 0x4fe27f9f96615270, + 0x514d4ba49c2b14fe, + 0x19, + 0xec3fabc192b01799, + 0x382b38cee8ee5375, + 0x3bfb6c3f0e616572, + 0x19, + 0xec3fabc192b01799, + 0x382b38cee8ee5375, + 0x3bfb6c3f0e616572, + 0x514abd0cf6c7bc86, + 0x47521b1361dcc546, + 0x178093843f863d14, + 0xad1003c5d28918e7, + 0x514abd0cf6c7bc86, + 0x47521b1361dcc546, + 0x178093843f863d14, + 0xad1003c5d28918e7, + 0x738450e42495bc81, + 0xaf947c59af5e4047, + 0x4653fb0685084ef2, + 0x57fde2062ae35bf, + 0x738450e42495bc81, + 0xaf947c59af5e4047, + 0x4653fb0685084ef2, + 0x57fde2062ae35bf, + 0x0, + 0xf02a3ac068ee110b, + 0xa3630dafb8ae2d7, + 0xce0dc874eaf9b55c, + 0x0, + 0xf02a3ac068ee110b, + 0xa3630dafb8ae2d7, + 0xce0dc874eaf9b55c, + 0x9a95f6cff5b55c7e, + 0x626d76abfed00c7b, + 0xa0c1cf1251c204ad, + 0xdaebd3006321052c, + 0x9a95f6cff5b55c7e, + 0x626d76abfed00c7b, + 0xa0c1cf1251c204ad, + 0xdaebd3006321052c, + 0x3d4bd48b625a8065, + 0x7f1e584e071f6ed2, + 0x720574f0501caed3, + 0xe3260ba93d23540a, + 0x3d4bd48b625a8065, + 0x7f1e584e071f6ed2, + 0x720574f0501caed3, + 0xe3260ba93d23540a, + 0x19, + 0xe376678d843ce55e, + 0x66f3860d7514e7fc, + 0x7817f3dfff8b4ffa, + 0x19, + 0xe376678d843ce55e, + 0x66f3860d7514e7fc, + 0x7817f3dfff8b4ffa, + 0x3929624a9def725b, + 0x126ca37f215a80a, + 0xfce2f5d02762a303, + 0x1bc927375febbad7, + 0x3929624a9def725b, + 0x126ca37f215a80a, + 0xfce2f5d02762a303, + 0x1bc927375febbad7, + 0x85b481e5243f60bf, + 0x2d3c5f42a39c91a0, + 0x811719919351ae8, + 0xf669de0add993131, + 0x85b481e5243f60bf, + 0x2d3c5f42a39c91a0, + 0x811719919351ae8, + 0xf669de0add993131, + 0x0, + 0xab1cbd41d8c1e335, + 0x9322ed4c0bc2df01, + 0x51c3c0983d4284e5, + 0x0, + 0xab1cbd41d8c1e335, + 0x9322ed4c0bc2df01, + 0x51c3c0983d4284e5, + 0x94178e291145c231, + 0xfd0f1a973d6b2085, + 0xd427ad96e2b39719, + 0x8a52437fecaac06b, + 0x94178e291145c231, + 0xfd0f1a973d6b2085, + 0xd427ad96e2b39719, + 0x8a52437fecaac06b, + 0xdc20ee4b8c4c9a80, + 0xa2c98e9549da2100, + 0x1603fe12613db5b6, + 0xe174929433c5505, + 0xdc20ee4b8c4c9a80, + 0xa2c98e9549da2100, + 0x1603fe12613db5b6, + 0xe174929433c5505, + 0x19, + 0x7de38bae084da92d, + 0x5b848442237e8a9b, + 0xf6c705da84d57310, + 0x19, + 0x7de38bae084da92d, + 0x5b848442237e8a9b, + 0xf6c705da84d57310, + 0x31e6a4bdb6a49017, + 0x889489706e5c5c0f, + 0xe4a205459692a1b, + 0xbac3fa75ee26f299, + 0x31e6a4bdb6a49017, + 0x889489706e5c5c0f, + 0xe4a205459692a1b, + 0xbac3fa75ee26f299, + 0x5f5894f4057d755e, + 0xb0dc3ecd724bb076, + 0x5e34d8554a6452ba, + 0x4f78fd8c1fdcc5f, + 0x5f5894f4057d755e, + 0xb0dc3ecd724bb076, + 0x5e34d8554a6452ba, + 0x4f78fd8c1fdcc5f, + 0x0, + 0x3d4eab2b8ef5f796, + 0xcfff421583896e22, + 0x4143cb32d39ac3d9, + 0x0, + 0x3d4eab2b8ef5f796, + 0xcfff421583896e22, + 0x4143cb32d39ac3d9, + 0x22365051b78a5b65, + 0x6f7fd010d027c9b6, + 0xd9dd36fba77522ab, + 0xa44cf1cb33e37165, + 0x22365051b78a5b65, + 0x6f7fd010d027c9b6, + 0xd9dd36fba77522ab, + 0xa44cf1cb33e37165, + 0x3fc83d3038c86417, + 0xc4588d418e88d270, + 0xce1320f10ab80fe2, + 0xdb5eadbbec18de5d, + 0x3fc83d3038c86417, + 0xc4588d418e88d270, + 0xce1320f10ab80fe2, + 0xdb5eadbbec18de5d, + 0x19, + 0x4dd19c38779512ea, + 0xdb79ba02704620e9, + 0x92a29a3675a5d2be, + 0x19, + 0x4dd19c38779512ea, + 0xdb79ba02704620e9, + 0x92a29a3675a5d2be, + 0xd5177029fe495166, + 0xd32b3298a13330c1, + 0x251c4a3eb2c5f8fd, + 0xe1c48b26e0d98825, + 0xd5177029fe495166, + 0xd32b3298a13330c1, + 0x251c4a3eb2c5f8fd, + 0xe1c48b26e0d98825, + 0x3301d3362a4ffccb, + 0x9bb6c88de8cd178, + 0xdc05b676564f538a, + 0x60192d883e473fee, + 0x3301d3362a4ffccb, + 0x9bb6c88de8cd178, + 0xdc05b676564f538a, + 0x60192d883e473fee, + 0x0, + 0x1183dfce7c454afd, + 0x21cea4aa3d3ed949, + 0xfce6f70303f2304, + 0x0, + 0x1183dfce7c454afd, + 0x21cea4aa3d3ed949, + 0xfce6f70303f2304, + 0x19557d34b55551be, + 0x4c56f689afc5bbc9, + 0xa1e920844334f944, + 0xbad66d423d2ec861, + 0x19557d34b55551be, + 0x4c56f689afc5bbc9, + 0xa1e920844334f944, + 0xbad66d423d2ec861, + 0xf318c785dc9e0479, + 0x99e2032e765ddd81, + 0x400ccc9906d66f45, + 0xe1197454db2e0dd9, + 0xf318c785dc9e0479, + 0x99e2032e765ddd81, + 0x400ccc9906d66f45, + 0xe1197454db2e0dd9, + 0x19, + 0x16b9774801ac44a0, + 0x3cb8411e786d3c8e, + 0xa86e9cf505072491, + 0x19, + 0x16b9774801ac44a0, + 0x3cb8411e786d3c8e, + 0xa86e9cf505072491, + 0x178928152e109ae, + 0x5317b905a6e1ab7b, + 0xda20b3be7f53d59f, + 0xcb97dedecebee9ad, + 0x178928152e109ae, + 0x5317b905a6e1ab7b, + 0xda20b3be7f53d59f, + 0xcb97dedecebee9ad, + 0x4bd545218c59f58d, + 0x77dc8d856c05a44a, + 0x87948589e4f243fd, + 0x7e5217af969952c2, + 0x4bd545218c59f58d, + 0x77dc8d856c05a44a, + 0x87948589e4f243fd, + 0x7e5217af969952c2, + 0x0, + 0x84d1ecc4d53d2ff1, + 0xd8af8b9ceb4e11b6, + 0x335856bb527b52f4, + 0x0, + 0x84d1ecc4d53d2ff1, + 0xd8af8b9ceb4e11b6, + 0x335856bb527b52f4, + 0xc756f17fb59be595, + 0xc0654e4ea5553a78, + 0x9e9a46b61f2ea942, + 0x14fc8b5b3b809127, + 0xc756f17fb59be595, + 0xc0654e4ea5553a78, + 0x9e9a46b61f2ea942, + 0x14fc8b5b3b809127, + 0xd7009f0f103be413, + 0x3e0ee7b7a9fb4601, + 0xa74e888922085ed7, + 0xe80a7cde3d4ac526, + 0xd7009f0f103be413, + 0x3e0ee7b7a9fb4601, + 0xa74e888922085ed7, + 0xe80a7cde3d4ac526, + 0x19, + 0xbc58987d06a84e4d, + 0xb5d420244c9cae3, + 0xa3c4711b938c02c0, + 0x19, + 0xbc58987d06a84e4d, + 0xb5d420244c9cae3, + 0xa3c4711b938c02c0, + 0x3aace640a3e03990, + 0x865a0f3249aacd8a, + 0x8d00b2a7dbed06c7, + 0x6eacb905beb7e2f8, + 0x3aace640a3e03990, + 0x865a0f3249aacd8a, + 0x8d00b2a7dbed06c7, + 0x6eacb905beb7e2f8, + 0x45322b216ec3ec7, + 0xeb9de00d594828e6, + 0x88c5f20df9e5c26, + 0xf555f4112b19781f, + 0x45322b216ec3ec7, + 0xeb9de00d594828e6, + 0x88c5f20df9e5c26, + 0xf555f4112b19781f, + 0x0, + 0x238aa6daa612186d, + 0x9137a5c630bad4b4, + 0xc7db3817870c5eda, + 0x0, + 0x238aa6daa612186d, + 0x9137a5c630bad4b4, + 0xc7db3817870c5eda, + 0x217e4f04e5718dc9, + 0xcae814e2817bd99d, + 0xe3292e7ab770a8ba, + 0x7bb36ef70b6b9482, + 0x217e4f04e5718dc9, + 0xcae814e2817bd99d, + 0xe3292e7ab770a8ba, + 0x7bb36ef70b6b9482, + 0x3c7835fb85bca2d3, + 0xfe2cdf8ee3c25e86, + 0x61b3915ad7274b20, + 0xeab75ca7c918e4ef, + 0x3c7835fb85bca2d3, + 0xfe2cdf8ee3c25e86, + 0x61b3915ad7274b20, + 0xeab75ca7c918e4ef, + 0x19, + 0xa8cedbff1813d3a7, + 0x50dcaee0fd27d164, + 0xf1cb02417e23bd82, + 0x19, + 0xa8cedbff1813d3a7, + 0x50dcaee0fd27d164, + 0xf1cb02417e23bd82, + 0xfaf322786e2abe8b, + 0x937a4315beb5d9b6, + 0x1b18992921a11d85, + 0x7d66c4368b3c497b, + 0xfaf322786e2abe8b, + 0x937a4315beb5d9b6, + 0x1b18992921a11d85, + 0x7d66c4368b3c497b, + 0xe7946317a6b4e99, + 0xbe4430134182978b, + 0x3771e82493ab262d, + 0xa671690d8095ce82, + 0xe7946317a6b4e99, + 0xbe4430134182978b, + 0x3771e82493ab262d, + 0xa671690d8095ce82, + 0x0, + 0xd6e15ffc055e154e, + 0xec67881f381a32bf, + 0xfbb1196092bf409c, + 0x0, + 0xd6e15ffc055e154e, + 0xec67881f381a32bf, + 0xfbb1196092bf409c, + 0xdc9d2e07830ba226, + 0x698ef3245ff7988, + 0x194fae2974f8b576, + 0x7a5d9bea6ca4910e, + 0xdc9d2e07830ba226, + 0x698ef3245ff7988, + 0x194fae2974f8b576, + 0x7a5d9bea6ca4910e, + 0x7aebfea95ccdd1c9, + 0xf9bd38a67d5f0e86, + 0xfa65539de65492d8, + 0xf0dfcbe7653ff787, + 0x7aebfea95ccdd1c9, + 0xf9bd38a67d5f0e86, + 0xfa65539de65492d8, + 0xf0dfcbe7653ff787, + 0x19, + 0xb035585f6e929d9d, + 0xba1579c7e219b954, + 0xcb201cf846db4ba3, + 0x19, + 0xb035585f6e929d9d, + 0xba1579c7e219b954, + 0xcb201cf846db4ba3, + 0x287bf9177372cf45, + 0xa350e4f61147d0a6, + 0xd5d0ecfb50bcff99, + 0x2e166aa6c776ed21, + 0x287bf9177372cf45, + 0xa350e4f61147d0a6, + 0xd5d0ecfb50bcff99, + 0x2e166aa6c776ed21, + 0xe1e66c991990e282, + 0x662b329b01e7bb38, + 0x8aa674b36144d9a9, + 0xcbabf78f97f95e65, + 0xe1e66c991990e282, + 0x662b329b01e7bb38, + 0x8aa674b36144d9a9, + 0xcbabf78f97f95e65, + 0x0, + 0xbd87ad390420258, + 0xad8617bca9e33c8, + 0xc00ad377a1e2666, + 0x0, + 0xbd87ad390420258, + 0xad8617bca9e33c8, + 0xc00ad377a1e2666, + 0xac6fc58b3f0518f, + 0xc0cc8a892cc4173, + 0xc210accb117bc21, + 0xb73630dbb46ca18, + 0xac6fc58b3f0518f, + 0xc0cc8a892cc4173, + 0xc210accb117bc21, + 0xb73630dbb46ca18, + 0xc8be4920cbd4a54, + 0xbfe877a21be1690, + 0xae790559b0ded81, + 0xbf50db2f8d6ce31, + 0xc8be4920cbd4a54, + 0xbfe877a21be1690, + 0xae790559b0ded81, + 0xbf50db2f8d6ce31, + 0x19, + 0xeec24b15a06b53fe, + 0xc8a7aa07c5633533, + 0xefe9c6fa4311ad51, + 0x19, + 0xeec24b15a06b53fe, + 0xc8a7aa07c5633533, + 0xefe9c6fa4311ad51, + 0xb9173f13977109a1, + 0x69ce43c9cc94aedc, + 0xecf623c9cd118815, + 0x28625def198c33c7, + 0xb9173f13977109a1, + 0x69ce43c9cc94aedc, + 0xecf623c9cd118815, + 0x28625def198c33c7, + 0xccfc5f7de5c3636a, + 0xf5e6c40f1621c299, + 0xcec0e58c34cb64b1, + 0xa868ea113387939f, + 0xccfc5f7de5c3636a, + 0xf5e6c40f1621c299, + 0xcec0e58c34cb64b1, + 0xa868ea113387939f, + 0x0, + 0xcf29427ff7c58, + 0xbd9b3cf49eec8, + 0xd1dc8aa81fb26, + 0x0, + 0xcf29427ff7c58, + 0xbd9b3cf49eec8, + 0xd1dc8aa81fb26, + 0xbc792d5c394ef, + 0xd2ae0b2266453, + 0xd413f12c496c1, + 0xc84128cfed618, + 0xbc792d5c394ef, + 0xd2ae0b2266453, + 0xd413f12c496c1, + 0xc84128cfed618, + 0xdb5ebd48fc0d4, + 0xd1b77326dcb90, + 0xbeb0ccc145421, + 0xd10e5b22b11d1, + 0xdb5ebd48fc0d4, + 0xd1b77326dcb90, + 0xbeb0ccc145421, + 0xd10e5b22b11d1, + 0x19, + 0xd8dddbdc5ce4ef45, + 0xacfc51de8131458c, + 0x146bb3c0fe499ac0, + 0x19, + 0xd8dddbdc5ce4ef45, + 0xacfc51de8131458c, + 0x146bb3c0fe499ac0, + 0x9e65309f15943903, + 0x80d0ad980773aa70, + 0xf97817d4ddbf0607, + 0xe4626620a75ba276, + 0x9e65309f15943903, + 0x80d0ad980773aa70, + 0xf97817d4ddbf0607, + 0xe4626620a75ba276, + 0xdfdc7fd6fc74f66, + 0xf464864ad6f2bb93, + 0x2d55e52a5d44414, + 0xdd8de62487c40925, + 0xdfdc7fd6fc74f66, + 0xf464864ad6f2bb93, + 0x2d55e52a5d44414, + 0xdd8de62487c40925, + 0x0, + 0xe24c99adad8, + 0xcf389ed4bc8, + 0xe580cbf6966, + 0x0, + 0xe24c99adad8, + 0xcf389ed4bc8, + 0xe580cbf6966, + 0xcde5fd7e04f, + 0xe63628041b3, + 0xe7e81a87361, + 0xdabe78f6d98, + 0xcde5fd7e04f, + 0xe63628041b3, + 0xe7e81a87361, + 0xdabe78f6d98, + 0xefb14cac554, + 0xe5574743b10, + 0xd05709f42c1, + 0xe4690c96af1, + 0xefb14cac554, + 0xe5574743b10, + 0xd05709f42c1, + 0xe4690c96af1, + 0x19, + 0xc15acf44759545a3, + 0xcbfdcf39869719d4, + 0x33f62042e2f80225, + 0x19, + 0xc15acf44759545a3, + 0xcbfdcf39869719d4, + 0x33f62042e2f80225, + 0x2599c5ead81d8fa3, + 0xb306cb6c1d7c8d0, + 0x658c80d3df3729b1, + 0xe8d1b2b21b41429c, + 0x2599c5ead81d8fa3, + 0xb306cb6c1d7c8d0, + 0x658c80d3df3729b1, + 0xe8d1b2b21b41429c, + 0xa1b67f09d4b3ccb8, + 0xe1adf8b84437180, + 0xd593a5e584af47b, + 0xa023d94c56e151c7, + 0xa1b67f09d4b3ccb8, + 0xe1adf8b84437180, + 0xd593a5e584af47b, + 0xa023d94c56e151c7, + 0x0, + 0xf7157bc98, + 0xe3006d948, + 0xfa65811e6, + 0x0, + 0xf7157bc98, + 0xe3006d948, + 0xfa65811e6, + 0xe0d127e2f, + 0xfc18bfe53, + 0xfd002d901, + 0xeed6461d8, + 0xe0d127e2f, + 0xfc18bfe53, + 0xfd002d901, + 0xeed6461d8, + 0x1068562754, + 0xfa0236f50, + 0xe3af13ee1, + 0xfa460f6d1, + 0x1068562754, + 0xfa0236f50, + 0xe3af13ee1, + 0xfa460f6d1, + 0x19, + 0x49026cc3a4afc5a6, + 0xe06dff00ab25b91b, + 0xab38c561e8850ff, + 0x19, + 0x49026cc3a4afc5a6, + 0xe06dff00ab25b91b, + 0xab38c561e8850ff, + 0x92c3c8275e105eeb, + 0xb65256e546889bd0, + 0x3c0468236ea142f6, + 0xee61766b889e18f2, + 0x92c3c8275e105eeb, + 0xb65256e546889bd0, + 0x3c0468236ea142f6, + 0xee61766b889e18f2, + 0xa206f41b12c30415, + 0x2fe9d756c9f12d1, + 0xe9633210630cbf12, + 0x1ffea9fe85a0b0b1, + 0xa206f41b12c30415, + 0x2fe9d756c9f12d1, + 0xe9633210630cbf12, + 0x1ffea9fe85a0b0b1, + 0x0, + 0x11131738, + 0xf56d588, + 0x11050f86, + 0x0, + 0x11131738, + 0xf56d588, + 0x11050f86, + 0xf848f4f, + 0x111527d3, + 0x114369a1, + 0x106f2f38, + 0xf848f4f, + 0x111527d3, + 0x114369a1, + 0x106f2f38, + 0x11e2ca94, + 0x110a29f0, + 0xfa9f5c1, + 0x10f625d1, + 0x11e2ca94, + 0x110a29f0, + 0xfa9f5c1, + 0x10f625d1, + 0x19, + 0x81d1ae8cc50240f3, + 0xf4c77a079a4607d7, + 0xed446b2315e3efc1, + 0x19, + 0x81d1ae8cc50240f3, + 0xf4c77a079a4607d7, + 0xed446b2315e3efc1, + 0xb0a6b70915178c3, + 0xb11ff3e089f15d9a, + 0x1d4dba0b7ae9cc18, + 0x65d74e2f43b48d05, + 0xb0a6b70915178c3, + 0xb11ff3e089f15d9a, + 0x1d4dba0b7ae9cc18, + 0x65d74e2f43b48d05, + 0xa2df8c6b8ae0804a, + 0xa4e6f0a8c33348a6, + 0xc0a26efc7be5669b, + 0xa6b6582c547d0d60, + 0xa2df8c6b8ae0804a, + 0xa4e6f0a8c33348a6, + 0xc0a26efc7be5669b, + 0xa6b6582c547d0d60, + 0x0, + 0x11f718, + 0x10b6c8, + 0x134a96, + 0x0, + 0x11f718, + 0x10b6c8, + 0x134a96, + 0x10cf7f, + 0x124d03, + 0x13f8a1, + 0x117c58, + 0x10cf7f, + 0x124d03, + 0x13f8a1, + 0x117c58, + 0x132c94, + 0x134fc0, + 0x10a091, + 0x128961, + 0x132c94, + 0x134fc0, + 0x10a091, + 0x128961, + 0x19, + 0x84afc741f1c13213, + 0x2f8f43734fc906f3, + 0xde682d72da0a02d9, + 0x19, + 0x84afc741f1c13213, + 0x2f8f43734fc906f3, + 0xde682d72da0a02d9, + 0xbb005236adb9ef2, + 0x5bdf35c10a8b5624, + 0x739a8a343950010, + 0x52f515f44785cfbc, + 0xbb005236adb9ef2, + 0x5bdf35c10a8b5624, + 0x739a8a343950010, + 0x52f515f44785cfbc, + 0xcbaf4e5d82856c60, + 0xac9ea09074e3e150, + 0x8f0fa011a2035fb0, + 0x1a37905d8450904a, + 0xcbaf4e5d82856c60, + 0xac9ea09074e3e150, + 0x8f0fa011a2035fb0, + 0x1a37905d8450904a, + 0x0, + 0x1300, + 0x1750, + 0x114e, + 0x0, + 0x1300, + 0x1750, + 0x114e, + 0x131f, + 0x167b, + 0x1371, + 0x1230, + 0x131f, + 0x167b, + 0x1371, + 0x1230, + 0x182c, + 0x1368, + 0xf31, + 0x15c9, + 0x182c, + 0x1368, + 0xf31, + 0x15c9, + 0x19, + 0x3abeb80def61cc85, + 0x9d19c9dd4eac4133, + 0x75a652d9641a985, + 0x19, + 0x3abeb80def61cc85, + 0x9d19c9dd4eac4133, + 0x75a652d9641a985, + 0x9daf69ae1b67e667, + 0x364f71da77920a18, + 0x50bd769f745c95b1, + 0xf223d1180dbbf3fc, + 0x9daf69ae1b67e667, + 0x364f71da77920a18, + 0x50bd769f745c95b1, + 0xf223d1180dbbf3fc, + 0x2f885e584e04aa99, + 0xb69a0fa70aea684a, + 0x9584acaa6e062a0, + 0xbc051640145b19b, + 0x2f885e584e04aa99, + 0xb69a0fa70aea684a, + 0x9584acaa6e062a0, + 0xbc051640145b19b, + 0x0, + 0x14, + 0x22, + 0x12, + 0x0, + 0x14, + 0x22, + 0x12, + 0x27, + 0xd, + 0xd, + 0x1c, + 0x27, + 0xd, + 0xd, + 0x1c, + 0x2, + 0x10, + 0x29, + 0xf, + 0x2, + 0x10, + 0x29, + 0xf, + ]; + (c_str, m_str, p_str, s_str) +} diff --git a/test/stark_aggregation.sh b/test/stark_aggregation.sh index a1226d00..3522ddf1 100755 --- a/test/stark_aggregation.sh +++ b/test/stark_aggregation.sh @@ -5,6 +5,9 @@ set -ex if [ "x${USE_AVX2}" = "xyes" ]; then # build with avx2 feature RUSTFLAGS="-C target-feature=+avx2" cargo build --release +elif [ "x${USE_AVX512}" = "xyes" ]; then + # build with avx512 feature + RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo build --features avx512 --release else cargo build --release fi