Skip to content

Commit

Permalink
chore: avx512 acceleration (#169)
Browse files Browse the repository at this point in the history
* chore: avx512 acceleration

---------

Co-authored-by: eigmax <[email protected]>
  • Loading branch information
ibmp33 and eigmax authored Dec 18, 2023
1 parent 3e0f3cf commit 27fc512
Show file tree
Hide file tree
Showing 15 changed files with 2,664 additions and 29 deletions.
1 change: 1 addition & 0 deletions algebraic/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ wasm-bindgen-test = "0.3"
[features]
default = ["franklin-crypto/multicore", "wasmer/default"]
wasm = ["wasmer/js-default"]
avx512 = []
24 changes: 17 additions & 7 deletions algebraic/src/arch/x86_64/avx512_field_gl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! https://github.com/0xPolygonZero/plonky2/blob/main/field/src/arch/x86_64/avx512_goldilocks_field.rs
//!
//! How to build/run/test:
//! RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo build --release
//! RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo build --features "avx512" --release
use crate::ff::*;
use crate::field_gl::{Fr, FrRepr as GoldilocksField};
use crate::packed::PackedField;
Expand All @@ -27,14 +27,22 @@ const WIDTH: usize = 8;

impl Avx512GoldilocksField {
#[inline]
fn new(x: __m512i) -> Self {
pub fn new(x: __m512i) -> Self {
unsafe { transmute(x) }
}
#[inline]
fn get(&self) -> __m512i {
pub fn get(&self) -> __m512i {
unsafe { transmute(*self) }
}
#[inline]
pub fn interleave2(x: __m512i, y: __m512i) -> (__m512i, __m512i) {
unsafe { interleave2(x, y) }
}
#[inline]
pub fn reduce(x: __m512i, y: __m512i) -> Avx512GoldilocksField {
Self::new(unsafe { reduce128((x, y)) })
}
#[inline]
pub fn square(&self) -> Avx512GoldilocksField {
Self::new(unsafe { square(self.get()) })
}
Expand Down Expand Up @@ -271,7 +279,8 @@ unsafe fn sub_no_double_overflow_64_64(x: __m512i, y: __m512i) -> __m512i {

#[inline]
unsafe fn add(x: __m512i, y: __m512i) -> __m512i {
add_no_double_overflow_64_64(x, canonicalize(y))
let res_s = add_no_double_overflow_64_64(x, canonicalize(y));
canonicalize(res_s)
}

#[inline]
Expand Down Expand Up @@ -356,7 +365,8 @@ unsafe fn reduce128(x: (__m512i, __m512i)) -> __m512i {
let hi_hi0 = _mm512_srli_epi64::<32>(hi0);
let lo1 = sub_no_double_overflow_64_64(lo0, hi_hi0);
let t1 = _mm512_mul_epu32(hi0, EPSILON);
let lo2 = add_no_double_overflow_64_64(lo1, t1);
let _lo2 = add_no_double_overflow_64_64(lo1, t1);
let lo2 = canonicalize(_lo2);
lo2
}

Expand Down Expand Up @@ -412,7 +422,7 @@ mod tests {

fn test_vals_a() -> [GoldilocksField; 8] {
[
GoldilocksField([14479013849828404771u64]),
GoldilocksField([18446744069414584320u64]),
GoldilocksField([9087029921428221768u64]),
GoldilocksField([2441288194761790662u64]),
GoldilocksField([5646033492608483824u64]),
Expand All @@ -424,7 +434,7 @@ mod tests {
}
fn test_vals_b() -> [GoldilocksField; 8] {
[
GoldilocksField([17891926589593242302u64]),
GoldilocksField([18446744069414584320u64]),
GoldilocksField([11009798273260028228u64]),
GoldilocksField([2028722748960791447u64]),
GoldilocksField([7929433601095175579u64]),
Expand Down
1 change: 1 addition & 0 deletions algebraic/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![allow(clippy::unit_arg)]
#![cfg_attr(feature = "avx512", feature(stdsimd))]

#[macro_use]
extern crate serde;
Expand Down
4 changes: 4 additions & 0 deletions starky/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,7 @@ harness = false
[[bench]]
name = "poseidon"
harness = false

[features]
default = []
avx512 = ["algebraic/avx512"]
7 changes: 7 additions & 0 deletions starky/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,15 @@ Because the nExtBits reaches up to 24 so this step would be very slow. Consider

```
cargo bench --bench merklehash -- --profile-time=5
# or with AVX enabled like this
RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo bench --features avx512 -- merklehash
RUSTFLAGS="-C target-feature=+avx2" cargo bench -- merklehash
```


* https://www.jibbow.com/posts/criterion-flamegraphs/


4 changes: 2 additions & 2 deletions starky/benches/merklehash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use criterion::*;
use plonky::field_gl::Fr as FGL;
use rayon::prelude::*;
use starky::merklehash_bn128::MerkleTreeBN128;
use starky::merklehash::MerkleTreeGL;
use starky::traits::MerkleTree;
mod perf;

Expand All @@ -12,7 +12,7 @@ fn run_merklehash(pols: Vec<FGL>) {
let n_pols = 10;

let now = std::time::Instant::now();
let mut tree: MerkleTreeBN128 = MerkleTree::new();
let mut tree: MerkleTreeGL = MerkleTree::new();
tree.merkelize(pols, n_pols, n).unwrap();
log::trace!("time cost: {}", now.elapsed().as_secs());
let (group_elements, mp) = tree.get_group_proof(idx).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion starky/src/arch/x86_64/avx2_poseidon_gl.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#![allow(non_snake_case)]
use crate::constant::POSEIDON_CONSTANTS_OPT_AVX2;
use crate::poseidon_constants_avx as constants;
use crate::poseidon_constants_avx2 as constants;
use algebraic::arch::x86_64::avx2_field_gl::Avx2GoldilocksField;
use algebraic::packed::PackedField;
use core::arch::x86_64::*;
Expand Down
Loading

0 comments on commit 27fc512

Please sign in to comment.