diff --git a/algebraic/src/arch/x86_64/avx2_field_gl.rs b/algebraic/src/arch/x86_64/avx2_field_gl.rs index 03fc5d5d..a7aebabb 100644 --- a/algebraic/src/arch/x86_64/avx2_field_gl.rs +++ b/algebraic/src/arch/x86_64/avx2_field_gl.rs @@ -6,12 +6,12 @@ //! use crate::ff::*; use crate::field_gl::{Fr, FrRepr as GoldilocksField}; +use crate::packed::PackedField; use core::arch::x86_64::*; use core::fmt; use core::fmt::{Debug, Formatter}; use core::mem::transmute; use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign}; -// use crate::packed::PackedField; /// AVX2 Goldilocks Field /// @@ -24,8 +24,6 @@ use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAss #[repr(transparent)] pub struct Avx2GoldilocksField(pub [GoldilocksField; 4]); -const WIDTH: usize = 4; - impl Avx2GoldilocksField { #[inline] pub fn new(x: __m256i) -> Self { @@ -35,30 +33,36 @@ impl Avx2GoldilocksField { pub fn get(&self) -> __m256i { unsafe { transmute(*self) } } - // } - // unsafe impl PackedField for Avx2GoldilocksField { #[inline] - pub fn from_slice(slice: &[GoldilocksField]) -> &Self { - assert_eq!(slice.len(), WIDTH); + pub fn square(&self) -> Avx2GoldilocksField { + Self::new(unsafe { square(self.get()) }) + } +} + +unsafe impl PackedField for Avx2GoldilocksField { + const WIDTH: usize = 4; + type Scalar = GoldilocksField; + const ZEROS: Self = Self([GoldilocksField([0]); 4]); + const ONES: Self = Self([GoldilocksField([1]); 4]); + + #[inline] + fn from_slice(slice: &[GoldilocksField]) -> &Self { + assert_eq!(slice.len(), Self::WIDTH); unsafe { &*slice.as_ptr().cast() } } #[inline] - pub fn from_slice_mut(slice: &mut [GoldilocksField]) -> &mut Self { - assert_eq!(slice.len(), WIDTH); + fn from_slice_mut(slice: &mut [GoldilocksField]) -> &mut Self { + assert_eq!(slice.len(), Self::WIDTH); unsafe { &mut *slice.as_mut_ptr().cast() } } #[inline] - pub fn as_slice(&self) -> &[GoldilocksField] { + fn as_slice(&self) -> &[GoldilocksField] { &self.0[..] } #[inline] - pub fn as_slice_mut(&mut self) -> &mut [GoldilocksField] { + fn as_slice_mut(&mut self) -> &mut [GoldilocksField] { &mut self.0[..] } - #[inline] - pub fn square(&self) -> Avx2GoldilocksField { - Self::new(unsafe { square(self.get()) }) - } #[inline] fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) { @@ -117,7 +121,7 @@ impl Debug for Avx2GoldilocksField { impl Default for Avx2GoldilocksField { #[inline] fn default() -> Self { - Self([GoldilocksField::from(0); 4]) + Self::ZEROS } } @@ -503,8 +507,8 @@ mod tests { use super::Avx2GoldilocksField; use crate::ff::*; use crate::field_gl::{Fr, FrRepr as GoldilocksField}; + use crate::packed::PackedField; use std::time::Instant; - // use crate::packed::PackedField; fn test_vals_a() -> [GoldilocksField; 4] { [ diff --git a/algebraic/src/arch/x86_64/avx512_field_gl.rs b/algebraic/src/arch/x86_64/avx512_field_gl.rs index fe9e8942..84d93dbf 100644 --- a/algebraic/src/arch/x86_64/avx512_field_gl.rs +++ b/algebraic/src/arch/x86_64/avx512_field_gl.rs @@ -5,6 +5,7 @@ //! RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo build --release use crate::ff::*; use crate::field_gl::{Fr, FrRepr as GoldilocksField}; +use crate::packed::PackedField; use core::arch::x86_64::*; use core::fmt; use core::fmt::{Debug, Formatter}; @@ -34,27 +35,36 @@ impl Avx512GoldilocksField { unsafe { transmute(*self) } } #[inline] - pub fn from_slice(slice: &[GoldilocksField]) -> &Self { + pub fn square(&self) -> Avx512GoldilocksField { + Self::new(unsafe { square(self.get()) }) + } +} + +unsafe impl PackedField for Avx512GoldilocksField { + const WIDTH: usize = 8; + + type Scalar = GoldilocksField; + + const ZEROS: Self = Self([GoldilocksField([0]); 8]); + const ONES: Self = Self([GoldilocksField([1]); 8]); + #[inline] + fn from_slice(slice: &[GoldilocksField]) -> &Self { assert_eq!(slice.len(), WIDTH); unsafe { &*slice.as_ptr().cast() } } #[inline] - pub fn from_slice_mut(slice: &mut [GoldilocksField]) -> &mut Self { + fn from_slice_mut(slice: &mut [GoldilocksField]) -> &mut Self { assert_eq!(slice.len(), WIDTH); unsafe { &mut *slice.as_mut_ptr().cast() } } #[inline] - pub fn as_slice(&self) -> &[GoldilocksField] { + fn as_slice(&self) -> &[GoldilocksField] { &self.0[..] } #[inline] - pub fn as_slice_mut(&mut self) -> &mut [GoldilocksField] { + fn as_slice_mut(&mut self) -> &mut [GoldilocksField] { &mut self.0[..] } - #[inline] - pub fn square(&self) -> Avx512GoldilocksField { - Self::new(unsafe { square(self.get()) }) - } #[inline] fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) { @@ -114,7 +124,7 @@ impl Debug for Avx512GoldilocksField { impl Default for Avx512GoldilocksField { #[inline] fn default() -> Self { - Self([GoldilocksField::from(0); 8]) + Self::ZEROS } } @@ -397,6 +407,7 @@ mod tests { use super::Avx512GoldilocksField; use crate::ff::*; use crate::field_gl::{Fr, FrRepr as GoldilocksField}; + use crate::packed::PackedField; use std::time::Instant; fn test_vals_a() -> [GoldilocksField; 8] { diff --git a/algebraic/src/lib.rs b/algebraic/src/lib.rs index 5806fec3..80b8d24a 100644 --- a/algebraic/src/lib.rs +++ b/algebraic/src/lib.rs @@ -1,6 +1,6 @@ #![allow(clippy::unit_arg)] - -// #![feature(stdsimd)] +#![feature(stdsimd)] +#![feature(const_trait_impl)] #[macro_use] extern crate serde; @@ -13,7 +13,7 @@ extern crate num_bigint; extern crate num_traits; extern crate rand; -// pub mod arch; +pub mod arch; pub mod circom_circuit; pub mod errors; @@ -24,13 +24,15 @@ pub mod witness; pub mod utils; +pub mod packable; +pub mod packed; + pub use bellman_ce::pairing::ff; pub use ff::*; pub use franklin_crypto::bellman as bellman_ce; #[cfg(test)] mod field_gl_test; -// mod packed; #[cfg(target_arch = "wasm32")] extern crate wasm_bindgen; diff --git a/algebraic/src/packable.rs b/algebraic/src/packable.rs index c6f8cd36..6aa4e72b 100644 --- a/algebraic/src/packable.rs +++ b/algebraic/src/packable.rs @@ -1,15 +1,18 @@ +use crate::ff::PrimeFieldRepr; use crate::packed::PackedField; -use crate::types::Field; /// Points us to the default packing for a particular field. There may me multiple choices of /// PackedField for a particular Field (e.g. every Field is also a PackedField), but this is the /// recommended one. The recommended packing varies by target_arch and target_feature. -pub trait Packable: Field { +pub trait Packable: PrimeFieldRepr { type Packing: PackedField; } -impl Packable for F { - default type Packing = Self; +impl Packable for F +where + F: PrimeFieldRepr + PackedField, +{ + type Packing = Self; } #[cfg(all( @@ -23,8 +26,8 @@ impl Packable for F { target_feature = "avx512vl" )) ))] -impl Packable for crate::goldilocks_field::GoldilocksField { - type Packing = crate::arch::x86_64::avx2_goldilocks_field::Avx2GoldilocksField; +impl Packable for crate::field_gl::FrRepr { + type Packing = crate::arch::x86_64::avx2_field_gl::Avx2GoldilocksField; } #[cfg(all( @@ -35,6 +38,6 @@ impl Packable for crate::goldilocks_field::GoldilocksField { target_feature = "avx512f", target_feature = "avx512vl" ))] -impl Packable for crate::goldilocks_field::GoldilocksField { - type Packing = crate::arch::x86_64::avx512_goldilocks_field::Avx512GoldilocksField; +impl Packable for crate::field_gl::FrRepr { + type Packing = crate::arch::x86_64::avx512_field_gl::Avx512GoldilocksField; } diff --git a/algebraic/src/packed.rs b/algebraic/src/packed.rs index 0236962d..e4cebb2b 100644 --- a/algebraic/src/packed.rs +++ b/algebraic/src/packed.rs @@ -1,11 +1,9 @@ use core::fmt::Debug; -use core::iter::{Product, Sum}; +// use core::iter::{Product, Sum}; +use crate::ff::PrimeFieldRepr; use core::ops::{Add, AddAssign, Div, Mul, MulAssign, Neg, Sub, SubAssign}; use core::slice; -// use crate::ops::Square; -use crate::ff::*; - /// # Safety /// - WIDTH is assumed to be a power of 2. /// - If P implements PackedField then P must be castable to/from [P::Scalar; P::WIDTH] without UB. @@ -27,20 +25,20 @@ pub unsafe trait PackedField: + MulAssign // + Square + Neg - + Product + // + Product + Send + Sub + Sub + SubAssign + SubAssign - + Sum + // + Sum + Sync where Self::Scalar: Add, Self::Scalar: Mul, Self::Scalar: Sub, { - type Scalar: Field; + type Scalar: PrimeFieldRepr; const WIDTH: usize; const ZEROS: Self; @@ -93,35 +91,4 @@ where unsafe { slice::from_raw_parts_mut(buf_ptr, n) } } - fn doubles(&self) -> Self { - *self * Self::Scalar::TWO - } -} - -unsafe impl PackedField for F { - type Scalar = Self; - - const WIDTH: usize = 1; - const ZEROS: Self = F::ZERO; - const ONES: Self = F::ONE; - - fn from_slice(slice: &[Self::Scalar]) -> &Self { - &slice[0] - } - fn from_slice_mut(slice: &mut [Self::Scalar]) -> &mut Self { - &mut slice[0] - } - fn as_slice(&self) -> &[Self::Scalar] { - slice::from_ref(self) - } - fn as_slice_mut(&mut self) -> &mut [Self::Scalar] { - slice::from_mut(self) - } - - fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) { - match block_len { - 1 => (*self, other), - _ => panic!("unsupported block length"), - } - } } diff --git a/starky/src/arch/mod.rs b/starky/src/arch/mod.rs new file mode 100644 index 00000000..832557ef --- /dev/null +++ b/starky/src/arch/mod.rs @@ -0,0 +1,2 @@ +#[cfg(target_arch = "x86_64")] +pub mod x86_64; diff --git a/starky/src/arch/x86_64/avx2_poseidon_gl.rs b/starky/src/arch/x86_64/avx2_poseidon_gl.rs new file mode 100644 index 00000000..099b9b4e --- /dev/null +++ b/starky/src/arch/x86_64/avx2_poseidon_gl.rs @@ -0,0 +1,357 @@ +#![allow(non_snake_case)] +use crate::constant::POSEIDON_CONSTANTS_OPT; +use crate::poseidon_constants_opt as constants; +use algebraic::arch::x86_64::avx2_field_gl::Avx2GoldilocksField; +use core::arch::x86_64::*; +use plonky::field_gl::Fr as FGL; +use plonky::Field; +use plonky::PrimeField; + +#[derive(Debug)] +pub struct Constants { + pub c: Vec, + pub m: Vec>, + pub p: Vec>, + pub s: Vec, + pub n_rounds_f: usize, + pub n_rounds_p: usize, +} + +pub fn load_constants() -> Constants { + let (c_str, m_str, p_str, s_str) = constants::constants(); + let mut c: Vec = Vec::new(); + for v1 in c_str { + c.push(FGL::from(v1)); + } + let mut m: Vec> = Vec::new(); + for v1 in m_str { + let mut mi: Vec = Vec::new(); + for v2 in v1 { + mi.push(FGL::from(v2)); + } + m.push(mi); + } + + let mut p: Vec> = Vec::new(); + for v1 in p_str { + let mut mi: Vec = Vec::new(); + for v2 in v1 { + mi.push(FGL::from(v2)); + } + p.push(mi); + } + + let mut s: Vec = Vec::new(); + for v1 in s_str { + s.push(FGL::from(v1)); + } + + Constants { + c, + m, + p, + s, + n_rounds_f: 8, + n_rounds_p: 22, + } +} + +pub struct Poseidon; + +impl Default for Poseidon { + fn default() -> Self { + Self::new() + } +} + +impl Poseidon { + pub fn new() -> Poseidon { + Self {} + } + + #[inline(always)] + fn pow7(x: &mut FGL) { + let aux = *x; + x.square(); + x.mul_assign(&aux); + x.square(); + x.mul_assign(&aux); + } + + #[inline(always)] + fn pow7_avx2(x: &mut Avx2GoldilocksField) { + let aux = *x; + *x = x.square(); + *x *= aux; + *x = x.square(); + *x *= aux; + } + + pub fn hash(&self, inp: &Vec, init_state: &[FGL], out: usize) -> Result, String> { + self.hash_inner(inp, init_state, out) + } + + fn hash_inner( + &self, + inp: &Vec, + init_state: &[FGL], + out: usize, + ) -> Result, String> { + if inp.len() != 8 { + return Err(format!("Wrong inputs length {} != 8", inp.len(),)); + } + + let t = 12; + let n_rounds_f = POSEIDON_CONSTANTS_OPT.n_rounds_f; + let n_rounds_p = POSEIDON_CONSTANTS_OPT.n_rounds_p; + let C = &POSEIDON_CONSTANTS_OPT.c; + let S = &POSEIDON_CONSTANTS_OPT.s; + let M = &POSEIDON_CONSTANTS_OPT.m; + let P = &POSEIDON_CONSTANTS_OPT.p; + + let mut state = vec![FGL::ZERO; t]; + if init_state.len() != 4 { + return Err(format!("Capacity inputs length {} != 4", init_state.len(),)); + } + + state[0..8].clone_from_slice(inp); + state[8..].clone_from_slice(init_state); + + state + .iter_mut() + .enumerate() + .for_each(|(i, a)| a.add_assign(&C[i])); + + let mut tmp_state = vec![FGL::ZERO; t]; + for r in 0..(n_rounds_f / 2 - 1) { + // state.iter_mut().for_each(Self::pow7); + state.chunks_exact_mut(4).for_each(|chunk| { + let mut field_chunk = Avx2GoldilocksField([ + chunk[0].into_repr(), + chunk[1].into_repr(), + chunk[2].into_repr(), + chunk[3].into_repr(), + ]); + + Self::pow7_avx2(&mut field_chunk); + + for (i, field) in field_chunk.0.iter().enumerate() { + chunk[i] = FGL::from_repr(*field).unwrap(); + } + }); + state.iter_mut().enumerate().for_each(|(i, a)| { + a.add_assign(&C[(r + 1) * t + i]); + }); + + let sz = state.len(); + tmp_state.iter_mut().enumerate().for_each(|(i, out)| { + let mut acc = FGL::ZERO; + for j in 0..sz { + let mut tmp = M[j][i]; + tmp.mul_assign(&state[j]); + acc.add_assign(&tmp); + } + *out = acc; + }); + state + .iter_mut() + .zip(tmp_state.iter()) + .for_each(|(out, inp)| { + *out = *inp; + }); + } + + // state.iter_mut().for_each(Self::pow7); + state.chunks_exact_mut(4).for_each(|chunk| { + let mut field_chunk = Avx2GoldilocksField([ + chunk[0].into_repr(), + chunk[1].into_repr(), + chunk[2].into_repr(), + chunk[3].into_repr(), + ]); + + Self::pow7_avx2(&mut field_chunk); + + for (i, field) in field_chunk.0.iter().enumerate() { + chunk[i] = FGL::from_repr(*field).unwrap(); + } + }); + state.iter_mut().enumerate().for_each(|(i, a)| { + a.add_assign(&C[(n_rounds_f / 2 - 1 + 1) * t + i]); + }); //opt + + let sz = state.len(); + tmp_state.iter_mut().enumerate().for_each(|(i, out)| { + let mut acc = FGL::ZERO; + for j in 0..sz { + let mut tmp = P[j][i]; + tmp.mul_assign(&state[j]); + acc.add_assign(&tmp); + } + *out = acc; + }); + state + .iter_mut() + .zip(tmp_state.iter()) + .for_each(|(out, inp)| { + *out = *inp; + }); + + for r in 0..n_rounds_p { + Self::pow7(&mut state[0]); + state[0].add_assign(&C[(n_rounds_f / 2 + 1) * t + r]); + + let sz = state.len(); + let mut s0 = FGL::ZERO; + for j in 0..sz { + let mut tmp = S[(t * 2 - 1) * r + j]; + tmp.mul_assign(&state[j]); + s0.add_assign(&tmp); + } + + for k in 1..t { + let mut tmp = S[(t * 2 - 1) * r + t + k - 1]; + tmp.mul_assign(&state[0]); + state[k].add_assign(&tmp); + } + + state[0] = s0; + } + + for r in 0..(n_rounds_f / 2 - 1) { + // state.iter_mut().for_each(Self::pow7); + state.chunks_exact_mut(4).for_each(|chunk| { + let mut field_chunk = Avx2GoldilocksField([ + chunk[0].into_repr(), + chunk[1].into_repr(), + chunk[2].into_repr(), + chunk[3].into_repr(), + ]); + + Self::pow7_avx2(&mut field_chunk); + + for (i, field) in field_chunk.0.iter().enumerate() { + chunk[i] = FGL::from_repr(*field).unwrap(); + } + }); + state.iter_mut().enumerate().for_each(|(i, a)| { + a.add_assign(&C[(n_rounds_f / 2 + 1) * t + n_rounds_p + r * t + i]); + }); + + let sz = state.len(); + tmp_state.iter_mut().enumerate().for_each(|(i, out)| { + let mut acc = FGL::ZERO; + for j in 0..sz { + let mut tmp = M[j][i]; + tmp.mul_assign(&state[j]); + acc.add_assign(&tmp); + } + *out = acc; + }); + state + .iter_mut() + .zip(tmp_state.iter()) + .for_each(|(out, inp)| { + *out = *inp; + }); + } + + // state.iter_mut().for_each(Self::pow7); + state.chunks_exact_mut(4).for_each(|chunk| { + let mut field_chunk = Avx2GoldilocksField([ + chunk[0].into_repr(), + chunk[1].into_repr(), + chunk[2].into_repr(), + chunk[3].into_repr(), + ]); + + Self::pow7_avx2(&mut field_chunk); + + for (i, field) in field_chunk.0.iter().enumerate() { + chunk[i] = FGL::from_repr(*field).unwrap(); + } + }); + let sz = state.len(); + tmp_state.iter_mut().enumerate().for_each(|(i, out)| { + let mut acc = FGL::ZERO; + for j in 0..sz { + let mut tmp = M[j][i]; + tmp.mul_assign(&state[j]); + acc.add_assign(&tmp); + } + *out = acc; + }); + state = tmp_state; + + Ok(state[0..out].to_vec()) + } +} + +#[cfg(test)] +mod tests { + use crate::arch::x86_64::avx2_poseidon_gl::*; + use algebraic::arch::x86_64::avx2_field_gl::Avx2GoldilocksField; + use algebraic::packed::PackedField; + use plonky::field_gl::Fr as FGL; + use plonky::PrimeField; + use rand::Rand; + + #[test] + fn test_pow7_avx2() { + let mut rng = rand::thread_rng(); + let x = FGL::rand(&mut rng); + let x7 = x * x * x * x * x * x * x; + let a_arr = [x.into_repr(), x.into_repr(), x.into_repr(), x.into_repr()]; + let packed_a = Avx2GoldilocksField::from_slice(&a_arr); + let mut x = *packed_a; + Poseidon::pow7_avx2(&mut x); + let arr_res = x.as_slice(); + assert_eq!(x7.into_repr(), arr_res[0]); + } + + #[test] + fn test_poseidon_opt_hash_all_0() { + let poseidon = Poseidon::new(); + let input = vec![FGL::ZERO; 8]; + let state = vec![FGL::ZERO; 4]; + let res = poseidon.hash(&input, &state, 4).unwrap(); + let expected = vec![ + FGL::from(0x3c18a9786cb0b359u64), + FGL::from(0xc4055e3364a246c3u64), + FGL::from(0x7953db0ab48808f4u64), + FGL::from(0xc71603f33a1144cau64), + ]; + assert_eq!(res, expected); + } + + #[test] + fn test_poseidon_opt_hash_1_11() { + let poseidon = Poseidon::new(); + let input = (0u64..8).map(FGL::from).collect::>(); + let state = (8u64..12).map(FGL::from).collect::>(); + let res = poseidon.hash(&input, &state, 4).unwrap(); + let expected = vec![ + FGL::from(0xd64e1e3efc5b8e9eu64), + FGL::from(0x53666633020aaa47u64), + FGL::from(0xd40285597c6a8825u64), + FGL::from(0x613a4f81e81231d2u64), + ]; + assert_eq!(res, expected); + } + + #[test] + fn test_poseidon_opt_hash_all_neg_1() { + let poseidon = Poseidon::new(); + let init = FGL::ZERO - FGL::ONE; + let input = vec![init; 8]; + let state = vec![init; 4]; + let res = poseidon.hash(&input, &state, 4).unwrap(); + let expected = vec![ + FGL::from(0xbe0085cfc57a8357u64), + FGL::from(0xd95af71847d05c09u64), + FGL::from(0xcf55a13d33c1c953u64), + FGL::from(0x95803a74f4530e82u64), + ]; + assert_eq!(res, expected); + } +} diff --git a/starky/src/arch/x86_64/avx512_poseidon_gl.rs b/starky/src/arch/x86_64/avx512_poseidon_gl.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/starky/src/arch/x86_64/avx512_poseidon_gl.rs @@ -0,0 +1 @@ + diff --git a/starky/src/arch/x86_64/mod.rs b/starky/src/arch/x86_64/mod.rs new file mode 100644 index 00000000..f3314f29 --- /dev/null +++ b/starky/src/arch/x86_64/mod.rs @@ -0,0 +1,20 @@ +#[cfg(all( + target_feature = "avx2", + not(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" + )) +))] +pub mod avx2_poseidon_gl; + +#[cfg(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl" +))] +pub mod avx512_poseidon_gl; diff --git a/starky/src/lib.rs b/starky/src/lib.rs index 078cc6dc..ed4a4848 100644 --- a/starky/src/lib.rs +++ b/starky/src/lib.rs @@ -1,5 +1,8 @@ #![allow(clippy::needless_range_loop)] #![allow(dead_code)] +#![feature(stdsimd)] +#![feature(const_trait_impl)] + pub mod polsarray; mod polutils; pub mod stark_verifier_circom; @@ -7,6 +10,8 @@ pub mod stark_verifier_circom_bn128; pub mod traits; pub mod types; +pub mod arch; + mod compressor12; pub use compressor12::*;