diff --git a/src/circuit.rs b/src/circuit.rs index 4c04b27..d84e586 100644 --- a/src/circuit.rs +++ b/src/circuit.rs @@ -195,12 +195,17 @@ fn generate_circuit_inputs(params: CircuitInputParams) -> Result { if !params.ignore_body_hash_check { // Calculate the length needed for SHA-256 padding of the body let body_sha_length = ((params.body.len() + 63 + 65) / 64) * 64; + println!("Body SHA length: {}", body_sha_length); + println!("Max body length: {}", params.max_body_length); + println!("Body length: {}", params.body.len()); // Pad the body to the maximum length or the calculated SHA-256 padding length let (body_padded, body_padded_len) = sha256_pad( params.body, cmp::max(params.max_body_length, body_sha_length), ); + println!("Body padded length: {}", body_padded_len); + // Ensure that the error type returned by `generate_partial_sha` is sized // by converting it into an `anyhow::Error` if it's not already. let result = generate_partial_sha( @@ -317,11 +322,15 @@ pub async fn generate_email_circuit_input( let command = parsed_email.get_command(circuit_input_params.ignore_body_hash_check)?; // Body is padded and cleaned, so use it for search - let search_body = padded_cleaned_body.as_ref(); - - // Find indices for the code and command in the body - code_idx = find_index_in_body(search_body, &code); - command_idx = find_index_in_body(search_body, &command); + if let Some((search_body, _)) = padded_cleaned_body.as_ref() { + // Find indices for the code and command in the body + code_idx = find_index_in_body(Some(search_body), &code); + command_idx = find_index_in_body(Some(search_body), &command); + } else { + // Handle the case where padded_cleaned_body is None + code_idx = 0; // or some other default value + command_idx = 0; // or some other default value + } } // Construct the email circuit input from the generated data @@ -341,7 +350,7 @@ pub async fn generate_email_circuit_input( padded_body_len: email_circuit_inputs.body_len_padded_bytes, precomputed_sha: email_circuit_inputs.precomputed_sha, command_idx, - padded_cleaned_body, + padded_cleaned_body: padded_cleaned_body.map(|(cleaned_body, _)| cleaned_body), }; // Serialize the email circuit input to JSON and return @@ -459,7 +468,9 @@ pub async fn generate_circuit_inputs_with_decomposed_regexes_and_external_inputs // Add the cleaned body to the circuit inputs if soft line breaks are to be removed if params.remove_soft_lines_breaks { - circuit_inputs["decodedEmailBodyIn"] = cleaned_body.clone().into(); + if let Some((cleaned_body_vec, _)) = cleaned_body.clone() { + circuit_inputs["decodedEmailBodyIn"] = cleaned_body_vec.into(); + } } // Process each decomposed regex and add the resulting indices to the circuit inputs @@ -477,7 +488,7 @@ pub async fn generate_circuit_inputs_with_decomposed_regexes_and_external_inputs } else if decomposed_regex.location == "body" && params.remove_soft_lines_breaks { &cleaned_body .as_ref() - .map(|v| String::from_utf8_lossy(v).into_owned()) + .map(|(v, _)| String::from_utf8_lossy(v).into_owned()) .unwrap_or_else(|| String::new()) } else { &email_circuit_inputs @@ -716,7 +727,7 @@ mod tests { decomposed_regexes, external_inputs, CircuitInputWithDecomposedRegexesAndExternalInputsParams { - max_body_length: 2816, + max_body_length: 3136, max_header_length: 1024, ignore_body_hash_check: false, remove_soft_lines_breaks: true, diff --git a/src/cryptos.rs b/src/cryptos.rs index d9a3b85..4702f05 100644 --- a/src/cryptos.rs +++ b/src/cryptos.rs @@ -457,6 +457,27 @@ pub fn partial_sha(msg: &[u8], msg_len: usize) -> Vec { result.to_vec() } +/// Finds the original indices in `body` that correspond to `pattern` in the `cleaned_body`. +/// Returns `Some((original_start, original_end))` if found, or `None` if the pattern isn't present. +fn find_original_indices_for_pattern( + body: &[u8], + cleaned_body: &[u8], + index_map: &[usize], + pattern: &[u8], +) -> Option<(usize, usize)> { + // Search the pattern in cleaned_body + if let Some(cleaned_start_index) = cleaned_body + .windows(pattern.len()) + .position(|window| window == pattern) + { + let original_start = index_map[cleaned_start_index]; + let original_end = index_map[cleaned_start_index + pattern.len() - 1]; + Some((original_start, original_end)) + } else { + None + } +} + /// Generates a partial SHA-256 hash of a message up to the point of a selector string, if provided. /// /// # Arguments @@ -476,17 +497,30 @@ pub fn generate_partial_sha( selector_regex: Option, max_remaining_body_length: usize, ) -> PartialShaResult { - let cleaned_body = remove_quoted_printable_soft_breaks(body.clone()); - - let selector_index = - find_index_in_body(Some(&cleaned_body), selector_regex.as_deref().unwrap_or("")); + let (cleaned_body, index_map) = remove_quoted_printable_soft_breaks(body.clone()); + + let selector_bytes = selector_regex.as_deref().map(|s| s.as_bytes()); + let (selector_index, _) = find_original_indices_for_pattern( + &body, + &cleaned_body, + &index_map, + selector_bytes.expect("Selector bytes not found"), + ) + .ok_or_else(|| { + Box::new(std::io::Error::new( + std::io::ErrorKind::Other, + "Selector not found in the body", + )) + })?; // Calculate the cutoff index for SHA-256 block size (64 bytes) let sha_cutoff_index = (selector_index / 64) * 64; let precompute_text = &body[..sha_cutoff_index]; let mut body_remaining = body[sha_cutoff_index..].to_vec(); - let body_remaining_length = body_length - precompute_text.len(); + let body_remaining_length = body.len() - precompute_text.len(); + + println!("body_remaining_length: {}", body_remaining_length); // Check if the remaining body length exceeds the maximum allowed length if body_remaining_length > max_remaining_body_length { diff --git a/src/parse_email.rs b/src/parse_email.rs index 8ef2da3..6eb0c96 100644 --- a/src/parse_email.rs +++ b/src/parse_email.rs @@ -79,9 +79,9 @@ impl ParsedEmail { canonicalized_body: String::from_utf8(canonicalized_body.clone())?, // Convert bytes to string, may return an error if not valid UTF-8. signature: signature_bytes.into_iter().collect_vec(), // Collect the signature bytes into a vector. public_key, - cleaned_body: String::from_utf8(remove_quoted_printable_soft_breaks( - canonicalized_body, - ))?, // Remove quoted-printable soft breaks from the canonicalized body. + cleaned_body: String::from_utf8( + remove_quoted_printable_soft_breaks(canonicalized_body).0, + )?, // Remove quoted-printable soft breaks from the canonicalized body. headers, }; @@ -280,22 +280,32 @@ impl ParsedEmail { /// # Returns /// /// A `Vec` with all quoted-printable soft line breaks removed. -pub(crate) fn remove_quoted_printable_soft_breaks(body: Vec) -> Vec { - let mut result = Vec::with_capacity(body.len()); - let mut iter = body.iter().enumerate(); +pub fn remove_quoted_printable_soft_breaks(body: Vec) -> (Vec, Vec) { + let original_len = body.len(); + let mut result = Vec::with_capacity(original_len); + let mut index_map = Vec::with_capacity(original_len); + let mut iter = body.iter().enumerate(); while let Some((i, &byte)) = iter.next() { if byte == b'=' && body.get(i + 1..i + 3) == Some(&[b'\r', b'\n']) { // Skip the next two bytes (soft line break) iter.nth(1); } else { result.push(byte); + index_map.push(i); } } - // Resize the result to match the original body length - result.resize(body.len(), 0); - result + // Pad `result` to the original length with zeros + result.resize(original_len, 0); + + // Pad `index_map` to the same length. + // Since these extra bytes don't map to anything in the original body, + // use a placeholder like usize::MAX. + let padding_needed = original_len - index_map.len(); + index_map.extend(std::iter::repeat(usize::MAX).take(padding_needed)); + + (result, index_map) } /// Finds the index of the first occurrence of a pattern in the given body.