Skip to content

Commit

Permalink
fix: sha precompute selector
Browse files Browse the repository at this point in the history
  • Loading branch information
Bisht13 committed Dec 7, 2024
1 parent 6f9f5b7 commit f4d4de8
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 23 deletions.
29 changes: 20 additions & 9 deletions src/circuit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,17 @@ fn generate_circuit_inputs(params: CircuitInputParams) -> Result<CircuitInput> {
if !params.ignore_body_hash_check {
// Calculate the length needed for SHA-256 padding of the body
let body_sha_length = ((params.body.len() + 63 + 65) / 64) * 64;
println!("Body SHA length: {}", body_sha_length);
println!("Max body length: {}", params.max_body_length);
println!("Body length: {}", params.body.len());
// Pad the body to the maximum length or the calculated SHA-256 padding length
let (body_padded, body_padded_len) = sha256_pad(
params.body,
cmp::max(params.max_body_length, body_sha_length),
);

println!("Body padded length: {}", body_padded_len);

// Ensure that the error type returned by `generate_partial_sha` is sized
// by converting it into an `anyhow::Error` if it's not already.
let result = generate_partial_sha(
Expand Down Expand Up @@ -317,11 +322,15 @@ pub async fn generate_email_circuit_input(
let command = parsed_email.get_command(circuit_input_params.ignore_body_hash_check)?;

// Body is padded and cleaned, so use it for search
let search_body = padded_cleaned_body.as_ref();

// Find indices for the code and command in the body
code_idx = find_index_in_body(search_body, &code);
command_idx = find_index_in_body(search_body, &command);
if let Some((search_body, _)) = padded_cleaned_body.as_ref() {
// Find indices for the code and command in the body
code_idx = find_index_in_body(Some(search_body), &code);
command_idx = find_index_in_body(Some(search_body), &command);
} else {
// Handle the case where padded_cleaned_body is None
code_idx = 0; // or some other default value
command_idx = 0; // or some other default value
}
}

// Construct the email circuit input from the generated data
Expand All @@ -341,7 +350,7 @@ pub async fn generate_email_circuit_input(
padded_body_len: email_circuit_inputs.body_len_padded_bytes,
precomputed_sha: email_circuit_inputs.precomputed_sha,
command_idx,
padded_cleaned_body,
padded_cleaned_body: padded_cleaned_body.map(|(cleaned_body, _)| cleaned_body),
};

// Serialize the email circuit input to JSON and return
Expand Down Expand Up @@ -459,7 +468,9 @@ pub async fn generate_circuit_inputs_with_decomposed_regexes_and_external_inputs

// Add the cleaned body to the circuit inputs if soft line breaks are to be removed
if params.remove_soft_lines_breaks {
circuit_inputs["decodedEmailBodyIn"] = cleaned_body.clone().into();
if let Some((cleaned_body_vec, _)) = cleaned_body.clone() {
circuit_inputs["decodedEmailBodyIn"] = cleaned_body_vec.into();
}
}

// Process each decomposed regex and add the resulting indices to the circuit inputs
Expand All @@ -477,7 +488,7 @@ pub async fn generate_circuit_inputs_with_decomposed_regexes_and_external_inputs
} else if decomposed_regex.location == "body" && params.remove_soft_lines_breaks {
&cleaned_body
.as_ref()
.map(|v| String::from_utf8_lossy(v).into_owned())
.map(|(v, _)| String::from_utf8_lossy(v).into_owned())
.unwrap_or_else(|| String::new())
} else {
&email_circuit_inputs
Expand Down Expand Up @@ -716,7 +727,7 @@ mod tests {
decomposed_regexes,
external_inputs,
CircuitInputWithDecomposedRegexesAndExternalInputsParams {
max_body_length: 2816,
max_body_length: 3136,
max_header_length: 1024,
ignore_body_hash_check: false,
remove_soft_lines_breaks: true,
Expand Down
44 changes: 39 additions & 5 deletions src/cryptos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,27 @@ pub fn partial_sha(msg: &[u8], msg_len: usize) -> Vec<u8> {
result.to_vec()
}

/// Finds the original indices in `body` that correspond to `pattern` in the `cleaned_body`.
/// Returns `Some((original_start, original_end))` if found, or `None` if the pattern isn't present.
fn find_original_indices_for_pattern(
body: &[u8],
cleaned_body: &[u8],
index_map: &[usize],
pattern: &[u8],
) -> Option<(usize, usize)> {
// Search the pattern in cleaned_body
if let Some(cleaned_start_index) = cleaned_body
.windows(pattern.len())
.position(|window| window == pattern)
{
let original_start = index_map[cleaned_start_index];
let original_end = index_map[cleaned_start_index + pattern.len() - 1];
Some((original_start, original_end))
} else {
None
}
}

/// Generates a partial SHA-256 hash of a message up to the point of a selector string, if provided.
///
/// # Arguments
Expand All @@ -476,17 +497,30 @@ pub fn generate_partial_sha(
selector_regex: Option<String>,
max_remaining_body_length: usize,
) -> PartialShaResult {
let cleaned_body = remove_quoted_printable_soft_breaks(body.clone());

let selector_index =
find_index_in_body(Some(&cleaned_body), selector_regex.as_deref().unwrap_or(""));
let (cleaned_body, index_map) = remove_quoted_printable_soft_breaks(body.clone());

let selector_bytes = selector_regex.as_deref().map(|s| s.as_bytes());
let (selector_index, _) = find_original_indices_for_pattern(
&body,
&cleaned_body,
&index_map,
selector_bytes.expect("Selector bytes not found"),
)
.ok_or_else(|| {
Box::new(std::io::Error::new(
std::io::ErrorKind::Other,
"Selector not found in the body",
))
})?;

// Calculate the cutoff index for SHA-256 block size (64 bytes)
let sha_cutoff_index = (selector_index / 64) * 64;
let precompute_text = &body[..sha_cutoff_index];
let mut body_remaining = body[sha_cutoff_index..].to_vec();

let body_remaining_length = body_length - precompute_text.len();
let body_remaining_length = body.len() - precompute_text.len();

println!("body_remaining_length: {}", body_remaining_length);

// Check if the remaining body length exceeds the maximum allowed length
if body_remaining_length > max_remaining_body_length {
Expand Down
28 changes: 19 additions & 9 deletions src/parse_email.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ impl ParsedEmail {
canonicalized_body: String::from_utf8(canonicalized_body.clone())?, // Convert bytes to string, may return an error if not valid UTF-8.
signature: signature_bytes.into_iter().collect_vec(), // Collect the signature bytes into a vector.
public_key,
cleaned_body: String::from_utf8(remove_quoted_printable_soft_breaks(
canonicalized_body,
))?, // Remove quoted-printable soft breaks from the canonicalized body.
cleaned_body: String::from_utf8(
remove_quoted_printable_soft_breaks(canonicalized_body).0,
)?, // Remove quoted-printable soft breaks from the canonicalized body.
headers,
};

Expand Down Expand Up @@ -280,22 +280,32 @@ impl ParsedEmail {
/// # Returns
///
/// A `Vec<u8>` with all quoted-printable soft line breaks removed.
pub(crate) fn remove_quoted_printable_soft_breaks(body: Vec<u8>) -> Vec<u8> {
let mut result = Vec::with_capacity(body.len());
let mut iter = body.iter().enumerate();
pub fn remove_quoted_printable_soft_breaks(body: Vec<u8>) -> (Vec<u8>, Vec<usize>) {
let original_len = body.len();
let mut result = Vec::with_capacity(original_len);
let mut index_map = Vec::with_capacity(original_len);

let mut iter = body.iter().enumerate();
while let Some((i, &byte)) = iter.next() {
if byte == b'=' && body.get(i + 1..i + 3) == Some(&[b'\r', b'\n']) {
// Skip the next two bytes (soft line break)
iter.nth(1);
} else {
result.push(byte);
index_map.push(i);
}
}

// Resize the result to match the original body length
result.resize(body.len(), 0);
result
// Pad `result` to the original length with zeros
result.resize(original_len, 0);

// Pad `index_map` to the same length.
// Since these extra bytes don't map to anything in the original body,
// use a placeholder like usize::MAX.
let padding_needed = original_len - index_map.len();
index_map.extend(std::iter::repeat(usize::MAX).take(padding_needed));

(result, index_map)
}

/// Finds the index of the first occurrence of a pattern in the given body.
Expand Down

0 comments on commit f4d4de8

Please sign in to comment.