diff --git a/.github/workflows/saffron.yml b/.github/workflows/saffron.yml index e0edf7cd13..947eb62d7f 100644 --- a/.github/workflows/saffron.yml +++ b/.github/workflows/saffron.yml @@ -35,6 +35,11 @@ jobs: run: | cargo build --release --bin saffron - - name: Run the saffron e2e encoding tests + - name: Run the saffron e2e encoding tests on small lorem file run: | - ./saffron/test-encoding.sh saffron/fixtures/lorem.txt \ No newline at end of file + ./saffron/test-encoding.sh saffron/fixtures/lorem.txt + + - name: Run the saffron e2e encoding on large random file + run: | + base64 /dev/urandom | head -c 100000000 | tr -dc "A-Za-z0-9 " | fold -w100 | head -n 1000000 > bigfile.txt + RUST_LOG=debug ./saffron/test-encoding.sh bigfile.txt \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index c8f212d0ee..8dacb7a941 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2605,13 +2605,20 @@ name = "saffron" version = "0.1.0" dependencies = [ "anyhow", + "ark-ec", "ark-ff", + "ark-poly", "ark-serialize", "ark-std", "clap 4.4.18", + "env_logger", + "log", "mina-curves", "o1-utils", + "once_cell", + "poly-commitment", "proptest", + "rayon", ] [[package]] diff --git a/saffron/Cargo.toml b/saffron/Cargo.toml index ef77f57cf7..7227e82ad2 100644 --- a/saffron/Cargo.toml +++ b/saffron/Cargo.toml @@ -18,13 +18,20 @@ path = "src/main.rs" [dependencies] anyhow = "1.0" +ark-ec.workspace = true ark-ff.workspace = true +ark-poly.workspace = true ark-serialize = { workspace = true, features = ["derive"]} clap = { workspace = true, features = ["derive"] } +env_logger.workspace = true +log.workspace = true mina-curves.workspace = true o1-utils.workspace = true +poly-commitment.workspace = true +rayon.workspace = true [dev-dependencies] ark-std.workspace = true -proptest.workspace = true \ No newline at end of file +proptest.workspace = true +once_cell.workspace = true \ No newline at end of file diff --git a/saffron/src/cli.rs b/saffron/src/cli.rs index 6f539136a3..615da9d651 100644 --- a/saffron/src/cli.rs +++ b/saffron/src/cli.rs @@ -1,6 +1,6 @@ use clap::{arg, Parser}; -#[derive(Parser, Debug, Clone)] +#[derive(Parser)] pub struct EncodeFileArgs { #[arg(long, short = 'i', value_name = "FILE", help = "input file")] pub input: String, @@ -14,7 +14,7 @@ pub struct EncodeFileArgs { pub output: String, } -#[derive(Parser, Debug, Clone)] +#[derive(Parser)] pub struct DecodeFileArgs { #[arg( long, @@ -28,7 +28,18 @@ pub struct DecodeFileArgs { pub output: String, } -#[derive(Parser, Debug, Clone)] +#[derive(Parser)] +pub struct CommitArgs { + #[arg( + long, + short = 'i', + value_name = "FILE", + help = "input file (encoded as field elements)" + )] + pub input: String, +} + +#[derive(Parser)] #[command( name = "saffron", version = "0.1", diff --git a/saffron/src/lib.rs b/saffron/src/lib.rs index 29994c6126..8017b38b8d 100644 --- a/saffron/src/lib.rs +++ b/saffron/src/lib.rs @@ -1,3 +1,2 @@ -pub mod serialization; - pub mod cli; +pub mod serialization; diff --git a/saffron/src/main.rs b/saffron/src/main.rs index a8dfa711e6..771c6dbc11 100644 --- a/saffron/src/main.rs +++ b/saffron/src/main.rs @@ -1,6 +1,8 @@ use anyhow::Result; +use ark_poly::{EvaluationDomain, Radix2EvaluationDomain}; use ark_serialize::{CanonicalDeserialize, CanonicalSerialize}; use clap::Parser; +use log::debug; use mina_curves::pasta::Fp; use saffron::{cli, serialization::FieldBlob}; use std::{ @@ -8,22 +10,28 @@ use std::{ io::{Read, Write}, }; +const SRS_SIZE: usize = 1 << 16; + fn decode_file(args: cli::DecodeFileArgs) -> Result<()> { + let domain = Radix2EvaluationDomain::new(SRS_SIZE).unwrap(); + debug!("Decoding file using domain of size {}", domain.size()); let mut file = File::open(args.input)?; let mut buf = Vec::new(); file.read_to_end(&mut buf)?; let blob: FieldBlob = FieldBlob::::deserialize_compressed(&buf[..])?; - let data = FieldBlob::::decode(blob); + let data = FieldBlob::::decode(domain, blob); let mut writer = File::create(args.output)?; writer.write_all(&data)?; Ok(()) } fn encode_file(args: cli::EncodeFileArgs) -> Result<()> { + let domain = Radix2EvaluationDomain::new(SRS_SIZE).unwrap(); + debug!("Encoding file using domain of size {}", domain.size()); let mut file = File::open(args.input)?; let mut buf = Vec::new(); file.read_to_end(&mut buf)?; - let blob = FieldBlob::::encode(&buf); + let blob = FieldBlob::::encode(domain, &buf); let mut bytes_to_write = Vec::with_capacity(buf.len()); blob.serialize_compressed(&mut bytes_to_write)?; let mut writer = File::create(args.output)?; @@ -32,6 +40,7 @@ fn encode_file(args: cli::EncodeFileArgs) -> Result<()> { } pub fn main() -> Result<()> { + env_logger::init(); let args = cli::Commands::parse(); match args { cli::Commands::Encode(args) => encode_file(args), diff --git a/saffron/src/serialization.rs b/saffron/src/serialization.rs index 292ccda8af..d40b70cdb9 100644 --- a/saffron/src/serialization.rs +++ b/saffron/src/serialization.rs @@ -1,9 +1,12 @@ -use ark_ff::{BigInteger, PrimeField}; +use ark_ff::{BigInteger, Field, PrimeField}; +use ark_poly::{univariate::DensePolynomial, EvaluationDomain, Evaluations}; use ark_serialize::{ CanonicalDeserialize, CanonicalSerialize, Compress, Read, SerializationError, Valid, Validate, Write, }; +use log::debug; use o1_utils::FieldHelpers; +use rayon::prelude::*; // For injectivity, you can only use this on inputs of length at most // 'F::MODULUS_BIT_SIZE / 8', e.g. for Vesta this is 31. @@ -17,82 +20,121 @@ fn decode(x: Fp) -> Vec { // A FieldBlob represents the encoding of a Vec as a Vec where F is a prime field. #[derive(Clone, Debug, PartialEq)] -pub struct FieldBlob { +pub struct FieldBlob { pub n_bytes: usize, - pub data: Vec, + pub domain_size: usize, + pub data: Vec>, } -impl CanonicalSerialize for FieldBlob { +impl CanonicalSerialize for FieldBlob { fn serialize_with_mode( &self, mut writer: W, mode: Compress, ) -> Result<(), SerializationError> { self.n_bytes.serialize_with_mode(&mut writer, mode)?; + self.domain_size.serialize_with_mode(&mut writer, mode)?; self.data.serialize_with_mode(&mut writer, mode)?; Ok(()) } fn serialized_size(&self, mode: Compress) -> usize { - self.n_bytes.serialized_size(mode) + self.data.serialized_size(mode) + self.n_bytes.serialized_size(mode) + + self.domain_size.serialized_size(mode) + + self.data.serialized_size(mode) } } -impl Valid for FieldBlob { +impl Valid for FieldBlob { fn check(&self) -> Result<(), SerializationError> { self.n_bytes.check()?; + self.domain_size.check()?; self.data.check()?; Ok(()) } } -impl CanonicalDeserialize for FieldBlob { +impl CanonicalDeserialize for FieldBlob { fn deserialize_with_mode( mut reader: R, compress: Compress, validate: Validate, ) -> Result { let n_bytes = usize::deserialize_with_mode(&mut reader, compress, validate)?; - let data = Vec::::deserialize_with_mode(&mut reader, compress, validate)?; - Ok(Self { n_bytes, data }) + let domain_size = usize::deserialize_with_mode(&mut reader, compress, validate)?; + let data = + Vec::>::deserialize_with_mode(&mut reader, compress, validate)?; + Ok(Self { + n_bytes, + domain_size, + data, + }) } } impl FieldBlob { - // Encode a bytestring as a list of field elements. - pub fn encode(bytes: &[u8]) -> FieldBlob { + // Encode a bytestring as a list of polynomials in coefficient form. + pub fn encode>(domain: D, bytes: &[u8]) -> FieldBlob { let n = (F::MODULUS_BIT_SIZE / 8) as usize; - let data = bytes + let domain_size = domain.size(); + + let field_elements = bytes .chunks(n) .map(|chunk| { let mut bytes = vec![0u8; n]; bytes[..chunk.len()].copy_from_slice(chunk); encode(&bytes) }) - .collect::>(); + .collect::>(); + + let data: Vec> = field_elements + .par_chunks(domain_size) + .map(|chunk| Evaluations::from_vec_and_domain(chunk.to_vec(), domain).interpolate()) + .collect(); + + debug!( + "Encoded {} bytes into {} polynomials", + bytes.len(), + data.len() + ); + FieldBlob { n_bytes: bytes.len(), + domain_size, data, } } - // Decode a list of field elements as a bytestring. - pub fn decode(blob: FieldBlob) -> Vec { + // Decode a list of polynomials (in coefficient form) as a bytestring. + pub fn decode>(domain: D, blob: FieldBlob) -> Vec { let n = (F::MODULUS_BIT_SIZE / 8) as usize; let m = F::size_in_bytes(); - blob.data - .into_iter() - .flat_map(|x| decode(x).as_slice()[(m - n)..m].to_vec()) - .take(blob.n_bytes) - .collect() + + let bytes: Vec = blob + .data + .into_par_iter() + .flat_map(|p: DensePolynomial| { + let evals = p.evaluate_over_domain(domain).evals; + + // Convert evaluations to bytes + evals + .into_par_iter() + .flat_map(|x| decode(x).as_slice()[(m - n)..m].to_vec()) + .collect::>() + }) + .collect::>(); + + bytes.into_iter().take(blob.n_bytes).collect() } } #[cfg(test)] mod tests { use super::*; + use ark_poly::Radix2EvaluationDomain; use ark_std::UniformRand; use mina_curves::pasta::Fp; + use once_cell::sync::Lazy; use proptest::prelude::*; // Check that [u8] -> Fp -> [u8] is the identity function. @@ -117,17 +159,23 @@ mod tests { } } + static DOMAIN: Lazy> = Lazy::new(|| { + const SRS_SIZE: usize = 1 << 16; + Radix2EvaluationDomain::new(SRS_SIZE).unwrap() + }); + // check that Vec -> FieldBlob -> Vec is the identity function proptest! { + #![proptest_config(ProptestConfig::with_cases(20))] #[test] fn test_round_trip_blob_encoding( xs in any::>()) - { let blob = FieldBlob::::encode(&xs); + { let blob = FieldBlob::::encode(*DOMAIN, &xs); let mut buf = Vec::new(); blob.serialize_compressed(&mut buf).unwrap(); let a = FieldBlob::::deserialize_compressed(&buf[..]).unwrap(); // check that ark-serialize is behaving as expected prop_assert_eq!(blob.clone(), a); - let ys = FieldBlob::::decode(blob); + let ys = FieldBlob::::decode(*DOMAIN, blob); // check that we get the byte blob back again prop_assert_eq!(xs,ys); }