Skip to content

Commit

Permalink
En/Decode to polynomials in coefficient form. Add large file randomiz…
Browse files Browse the repository at this point in the history
…ed test
  • Loading branch information
martyall committed Jan 22, 2025
1 parent 22e114a commit 139508a
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 32 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/saffron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ jobs:
run: |
cargo build --release --bin saffron
- name: Run the saffron e2e encoding tests
- name: Run the saffron e2e encoding tests on small lorem file
run: |
./saffron/test-encoding.sh saffron/fixtures/lorem.txt
./saffron/test-encoding.sh saffron/fixtures/lorem.txt
- name: Run the saffron e2e encoding on large random file
run: |
base64 /dev/urandom | head -c 100000000 | tr -dc "A-Za-z0-9 " | fold -w100 | head -n 1000000 > bigfile.txt
RUST_LOG=debug ./saffron/test-encoding.sh bigfile.txt
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion saffron/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,20 @@ path = "src/main.rs"

[dependencies]
anyhow = "1.0"
ark-ec.workspace = true
ark-ff.workspace = true
ark-poly.workspace = true
ark-serialize = { workspace = true, features = ["derive"]}
clap = { workspace = true, features = ["derive"] }
env_logger.workspace = true
log.workspace = true
mina-curves.workspace = true
o1-utils.workspace = true
poly-commitment.workspace = true
rayon.workspace = true


[dev-dependencies]
ark-std.workspace = true
proptest.workspace = true
proptest.workspace = true
once_cell.workspace = true
17 changes: 14 additions & 3 deletions saffron/src/cli.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use clap::{arg, Parser};

#[derive(Parser, Debug, Clone)]
#[derive(Parser)]
pub struct EncodeFileArgs {
#[arg(long, short = 'i', value_name = "FILE", help = "input file")]
pub input: String,
Expand All @@ -14,7 +14,7 @@ pub struct EncodeFileArgs {
pub output: String,
}

#[derive(Parser, Debug, Clone)]
#[derive(Parser)]
pub struct DecodeFileArgs {
#[arg(
long,
Expand All @@ -28,7 +28,18 @@ pub struct DecodeFileArgs {
pub output: String,
}

#[derive(Parser, Debug, Clone)]
#[derive(Parser)]
pub struct CommitArgs {
#[arg(
long,
short = 'i',
value_name = "FILE",
help = "input file (encoded as field elements)"
)]
pub input: String,
}

#[derive(Parser)]
#[command(
name = "saffron",
version = "0.1",
Expand Down
3 changes: 1 addition & 2 deletions saffron/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
pub mod serialization;

pub mod cli;
pub mod serialization;
13 changes: 11 additions & 2 deletions saffron/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,37 @@
use anyhow::Result;
use ark_poly::{EvaluationDomain, Radix2EvaluationDomain};
use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
use clap::Parser;
use log::debug;
use mina_curves::pasta::Fp;
use saffron::{cli, serialization::FieldBlob};
use std::{
fs::File,
io::{Read, Write},
};

const SRS_SIZE: usize = 1 << 16;

fn decode_file(args: cli::DecodeFileArgs) -> Result<()> {
let domain = Radix2EvaluationDomain::new(SRS_SIZE).unwrap();
debug!("Decoding file using domain of size {}", domain.size());
let mut file = File::open(args.input)?;
let mut buf = Vec::new();
file.read_to_end(&mut buf)?;
let blob: FieldBlob<Fp> = FieldBlob::<Fp>::deserialize_compressed(&buf[..])?;
let data = FieldBlob::<Fp>::decode(blob);
let data = FieldBlob::<Fp>::decode(domain, blob);
let mut writer = File::create(args.output)?;
writer.write_all(&data)?;
Ok(())
}

fn encode_file(args: cli::EncodeFileArgs) -> Result<()> {
let domain = Radix2EvaluationDomain::new(SRS_SIZE).unwrap();
debug!("Encoding file using domain of size {}", domain.size());
let mut file = File::open(args.input)?;
let mut buf = Vec::new();
file.read_to_end(&mut buf)?;
let blob = FieldBlob::<Fp>::encode(&buf);
let blob = FieldBlob::<Fp>::encode(domain, &buf);
let mut bytes_to_write = Vec::with_capacity(buf.len());
blob.serialize_compressed(&mut bytes_to_write)?;
let mut writer = File::create(args.output)?;
Expand All @@ -32,6 +40,7 @@ fn encode_file(args: cli::EncodeFileArgs) -> Result<()> {
}

pub fn main() -> Result<()> {
env_logger::init();
let args = cli::Commands::parse();
match args {
cli::Commands::Encode(args) => encode_file(args),
Expand Down
92 changes: 70 additions & 22 deletions saffron/src/serialization.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use ark_ff::{BigInteger, PrimeField};
use ark_ff::{BigInteger, Field, PrimeField};
use ark_poly::{univariate::DensePolynomial, EvaluationDomain, Evaluations};
use ark_serialize::{
CanonicalDeserialize, CanonicalSerialize, Compress, Read, SerializationError, Valid, Validate,
Write,
};
use log::debug;
use o1_utils::FieldHelpers;
use rayon::prelude::*;

// For injectivity, you can only use this on inputs of length at most
// 'F::MODULUS_BIT_SIZE / 8', e.g. for Vesta this is 31.
Expand All @@ -17,82 +20,121 @@ fn decode<Fp: PrimeField>(x: Fp) -> Vec<u8> {

// A FieldBlob<F> represents the encoding of a Vec<u8> as a Vec<F> where F is a prime field.
#[derive(Clone, Debug, PartialEq)]
pub struct FieldBlob<F> {
pub struct FieldBlob<F: Field> {
pub n_bytes: usize,
pub data: Vec<F>,
pub domain_size: usize,
pub data: Vec<DensePolynomial<F>>,
}

impl<F: CanonicalSerialize> CanonicalSerialize for FieldBlob<F> {
impl<F: CanonicalSerialize + Field> CanonicalSerialize for FieldBlob<F> {
fn serialize_with_mode<W: Write>(
&self,
mut writer: W,
mode: Compress,
) -> Result<(), SerializationError> {
self.n_bytes.serialize_with_mode(&mut writer, mode)?;
self.domain_size.serialize_with_mode(&mut writer, mode)?;
self.data.serialize_with_mode(&mut writer, mode)?;
Ok(())
}

fn serialized_size(&self, mode: Compress) -> usize {
self.n_bytes.serialized_size(mode) + self.data.serialized_size(mode)
self.n_bytes.serialized_size(mode)
+ self.domain_size.serialized_size(mode)
+ self.data.serialized_size(mode)
}
}

impl<F: Valid> Valid for FieldBlob<F> {
impl<F: Valid + Field> Valid for FieldBlob<F> {
fn check(&self) -> Result<(), SerializationError> {
self.n_bytes.check()?;
self.domain_size.check()?;
self.data.check()?;
Ok(())
}
}

impl<F: CanonicalDeserialize> CanonicalDeserialize for FieldBlob<F> {
impl<F: CanonicalDeserialize + Field> CanonicalDeserialize for FieldBlob<F> {
fn deserialize_with_mode<R: Read>(
mut reader: R,
compress: Compress,
validate: Validate,
) -> Result<Self, SerializationError> {
let n_bytes = usize::deserialize_with_mode(&mut reader, compress, validate)?;
let data = Vec::<F>::deserialize_with_mode(&mut reader, compress, validate)?;
Ok(Self { n_bytes, data })
let domain_size = usize::deserialize_with_mode(&mut reader, compress, validate)?;
let data =
Vec::<DensePolynomial<F>>::deserialize_with_mode(&mut reader, compress, validate)?;
Ok(Self {
n_bytes,
domain_size,
data,
})
}
}

impl<F: PrimeField> FieldBlob<F> {
// Encode a bytestring as a list of field elements.
pub fn encode(bytes: &[u8]) -> FieldBlob<F> {
// Encode a bytestring as a list of polynomials in coefficient form.
pub fn encode<D: EvaluationDomain<F>>(domain: D, bytes: &[u8]) -> FieldBlob<F> {
let n = (F::MODULUS_BIT_SIZE / 8) as usize;
let data = bytes
let domain_size = domain.size();

let field_elements = bytes
.chunks(n)
.map(|chunk| {
let mut bytes = vec![0u8; n];
bytes[..chunk.len()].copy_from_slice(chunk);
encode(&bytes)
})
.collect::<Vec<F>>();
.collect::<Vec<_>>();

let data: Vec<DensePolynomial<F>> = field_elements
.par_chunks(domain_size)
.map(|chunk| Evaluations::from_vec_and_domain(chunk.to_vec(), domain).interpolate())
.collect();

debug!(
"Encoded {} bytes into {} polynomials",
bytes.len(),
data.len()
);

FieldBlob {
n_bytes: bytes.len(),
domain_size,
data,
}
}

// Decode a list of field elements as a bytestring.
pub fn decode(blob: FieldBlob<F>) -> Vec<u8> {
// Decode a list of polynomials (in coefficient form) as a bytestring.
pub fn decode<D: EvaluationDomain<F>>(domain: D, blob: FieldBlob<F>) -> Vec<u8> {
let n = (F::MODULUS_BIT_SIZE / 8) as usize;
let m = F::size_in_bytes();
blob.data
.into_iter()
.flat_map(|x| decode(x).as_slice()[(m - n)..m].to_vec())
.take(blob.n_bytes)
.collect()

let bytes: Vec<u8> = blob
.data
.into_par_iter()
.flat_map(|p: DensePolynomial<F>| {
let evals = p.evaluate_over_domain(domain).evals;

// Convert evaluations to bytes
evals
.into_par_iter()
.flat_map(|x| decode(x).as_slice()[(m - n)..m].to_vec())
.collect::<Vec<_>>()
})
.collect::<Vec<_>>();

bytes.into_iter().take(blob.n_bytes).collect()
}
}

#[cfg(test)]
mod tests {
use super::*;
use ark_poly::Radix2EvaluationDomain;
use ark_std::UniformRand;
use mina_curves::pasta::Fp;
use once_cell::sync::Lazy;
use proptest::prelude::*;

// Check that [u8] -> Fp -> [u8] is the identity function.
Expand All @@ -117,17 +159,23 @@ mod tests {
}
}

static DOMAIN: Lazy<Radix2EvaluationDomain<Fp>> = Lazy::new(|| {
const SRS_SIZE: usize = 1 << 16;
Radix2EvaluationDomain::new(SRS_SIZE).unwrap()
});

// check that Vec<u8> -> FieldBlob<Fp> -> Vec<u8> is the identity function
proptest! {
#![proptest_config(ProptestConfig::with_cases(20))]
#[test]
fn test_round_trip_blob_encoding( xs in any::<Vec<u8>>())
{ let blob = FieldBlob::<Fp>::encode(&xs);
{ let blob = FieldBlob::<Fp>::encode(*DOMAIN, &xs);
let mut buf = Vec::new();
blob.serialize_compressed(&mut buf).unwrap();
let a = FieldBlob::<Fp>::deserialize_compressed(&buf[..]).unwrap();
// check that ark-serialize is behaving as expected
prop_assert_eq!(blob.clone(), a);
let ys = FieldBlob::<Fp>::decode(blob);
let ys = FieldBlob::<Fp>::decode(*DOMAIN, blob);
// check that we get the byte blob back again
prop_assert_eq!(xs,ys);
}
Expand Down

0 comments on commit 139508a

Please sign in to comment.