Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: avx512 acceleration #169

Merged
merged 8 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions algebraic/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ wasm-bindgen-test = "0.3"
[features]
default = ["franklin-crypto/multicore", "wasmer/default"]
wasm = ["wasmer/js-default"]
avx512 = []
24 changes: 17 additions & 7 deletions algebraic/src/arch/x86_64/avx512_field_gl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! https://github.com/0xPolygonZero/plonky2/blob/main/field/src/arch/x86_64/avx512_goldilocks_field.rs
//!
//! How to build/run/test:
//! RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo build --release
//! RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo build --features "avx512" --release
use crate::ff::*;
use crate::field_gl::{Fr, FrRepr as GoldilocksField};
use crate::packed::PackedField;
Expand All @@ -27,14 +27,22 @@ const WIDTH: usize = 8;

impl Avx512GoldilocksField {
#[inline]
fn new(x: __m512i) -> Self {
pub fn new(x: __m512i) -> Self {
unsafe { transmute(x) }
}
#[inline]
fn get(&self) -> __m512i {
pub fn get(&self) -> __m512i {
unsafe { transmute(*self) }
}
#[inline]
pub fn interleave2(x: __m512i, y: __m512i) -> (__m512i, __m512i) {
unsafe { interleave2(x, y) }
}
#[inline]
pub fn reduce(x: __m512i, y: __m512i) -> Avx512GoldilocksField {
Self::new(unsafe { reduce128((x, y)) })
}
#[inline]
pub fn square(&self) -> Avx512GoldilocksField {
Self::new(unsafe { square(self.get()) })
}
Expand Down Expand Up @@ -271,7 +279,8 @@ unsafe fn sub_no_double_overflow_64_64(x: __m512i, y: __m512i) -> __m512i {

#[inline]
unsafe fn add(x: __m512i, y: __m512i) -> __m512i {
add_no_double_overflow_64_64(x, canonicalize(y))
let res_s = add_no_double_overflow_64_64(x, canonicalize(y));
canonicalize(res_s)
}

#[inline]
Expand Down Expand Up @@ -356,7 +365,8 @@ unsafe fn reduce128(x: (__m512i, __m512i)) -> __m512i {
let hi_hi0 = _mm512_srli_epi64::<32>(hi0);
let lo1 = sub_no_double_overflow_64_64(lo0, hi_hi0);
let t1 = _mm512_mul_epu32(hi0, EPSILON);
let lo2 = add_no_double_overflow_64_64(lo1, t1);
let _lo2 = add_no_double_overflow_64_64(lo1, t1);
let lo2 = canonicalize(_lo2);
lo2
}

Expand Down Expand Up @@ -412,7 +422,7 @@ mod tests {

fn test_vals_a() -> [GoldilocksField; 8] {
[
GoldilocksField([14479013849828404771u64]),
GoldilocksField([18446744069414584320u64]),
GoldilocksField([9087029921428221768u64]),
GoldilocksField([2441288194761790662u64]),
GoldilocksField([5646033492608483824u64]),
Expand All @@ -424,7 +434,7 @@ mod tests {
}
fn test_vals_b() -> [GoldilocksField; 8] {
[
GoldilocksField([17891926589593242302u64]),
GoldilocksField([18446744069414584320u64]),
GoldilocksField([11009798273260028228u64]),
GoldilocksField([2028722748960791447u64]),
GoldilocksField([7929433601095175579u64]),
Expand Down
1 change: 1 addition & 0 deletions algebraic/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![allow(clippy::unit_arg)]
#![cfg_attr(feature = "avx512", feature(stdsimd))]

#[macro_use]
extern crate serde;
Expand Down
4 changes: 4 additions & 0 deletions starky/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,7 @@ harness = false
[[bench]]
name = "poseidon"
harness = false

[features]
default = []
avx512 = ["algebraic/avx512"]
7 changes: 7 additions & 0 deletions starky/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,15 @@ Because the nExtBits reaches up to 24 so this step would be very slow. Consider

```
cargo bench --bench merklehash -- --profile-time=5

# or with AVX enabled like this

RUSTFLAGS='-C target-feature=+avx512f,+avx512bw,+avx512cd,+avx512dq,+avx512vl' cargo bench --features avx512 -- merklehash
RUSTFLAGS="-C target-feature=+avx2" cargo bench -- merklehash

```


* https://www.jibbow.com/posts/criterion-flamegraphs/


4 changes: 2 additions & 2 deletions starky/benches/merklehash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use criterion::*;
use plonky::field_gl::Fr as FGL;
use rayon::prelude::*;
use starky::merklehash_bn128::MerkleTreeBN128;
use starky::merklehash::MerkleTreeGL;
use starky::traits::MerkleTree;
mod perf;

Expand All @@ -12,7 +12,7 @@ fn run_merklehash(pols: Vec<FGL>) {
let n_pols = 10;

let now = std::time::Instant::now();
let mut tree: MerkleTreeBN128 = MerkleTree::new();
let mut tree: MerkleTreeGL = MerkleTree::new();
tree.merkelize(pols, n_pols, n).unwrap();
log::trace!("time cost: {}", now.elapsed().as_secs());
let (group_elements, mp) = tree.get_group_proof(idx).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion starky/src/arch/x86_64/avx2_poseidon_gl.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#![allow(non_snake_case)]
use crate::constant::POSEIDON_CONSTANTS_OPT_AVX2;
use crate::poseidon_constants_avx as constants;
use crate::poseidon_constants_avx2 as constants;
use algebraic::arch::x86_64::avx2_field_gl::Avx2GoldilocksField;
use algebraic::packed::PackedField;
use core::arch::x86_64::*;
Expand Down
Loading
Loading