Skip to content

Commit

Permalink
Fix qual string length to match TSO trim
Browse files Browse the repository at this point in the history
  • Loading branch information
hextraza committed Jul 16, 2024
1 parent 768d64b commit 5ec06db
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 7 deletions.
2 changes: 2 additions & 0 deletions src/bin/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ fn main() {
if let Some((length, strictness)) = trim_pairs.get(i) {
aligner_config.trim_target_length = *length;
aligner_config.trim_strictness = *strictness;
println!("Manually setting trim settings for library {} | target length: {}, strictness: {}",
reference_json_path, aligner_config.trim_target_length, aligner_config.trim_strictness);
}

// Get sequences and feature names from the reference library for producing the library index
Expand Down
30 changes: 26 additions & 4 deletions src/parse/bam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,12 @@ impl UMIReader {
let seq =
UMIReader::strip_nonbio_regions(&record.seq().as_bytes()[..], record.is_reverse());

let qual = String::from_utf8(record.qual().to_vec()).unwrap_or_else(|e| {
println!("QUAL parsing warning: {}", e);
String::new()
});
let qual = UMIReader::strip_nonbio_regions_qual(&qual, record.is_reverse());

let mut record_fields = Vec::new();
for field in BAM_FIELDS_TO_REPORT {
let field_value = if let Ok(Aux::String(s)) = record.aux(field.as_bytes()) {
Expand All @@ -199,10 +205,7 @@ impl UMIReader {
eprintln!("Error: {}", e);
String::new()
}),
"QUAL" => String::from_utf8(record.qual().to_vec()).unwrap_or_else(|e| {
eprintln!("Error: {}", e);
String::new()
}),
"QUAL" => qual.clone(),
"REVERSE" => record.is_reverse().to_string(),
"MATE_REVERSE" => record.is_mate_reverse().to_string(),
"PAIRED" => record.is_paired().to_string(),
Expand Down Expand Up @@ -262,6 +265,25 @@ impl UMIReader {
return DnaString::from_acgt_bytes(&seq);
}
}

// Do the same as above for the PHREDs
fn strip_nonbio_regions_qual(qual: &str, rev_comp: bool) -> String {
let trimmed_qual = if qual.len() == 124 {
if rev_comp {
qual[0..qual.len() - CLIP_LENGTH].to_string()
} else {
qual[CLIP_LENGTH..].to_string()
}
} else {
qual.to_string()
};

if rev_comp {
trimmed_qual.chars().rev().collect()
} else {
trimmed_qual
}
}
}

#[cfg(test)]
Expand Down
6 changes: 3 additions & 3 deletions src/process/bam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ fn align_umi_to_libraries(
&umi_metadata
.clone()
.into_iter()
.map(|v| parse_str_as_bool(&v[3])) // set the sequence to be reverse complemented if it has been reverse complemented in the bam record
.map(|v| parse_str_as_bool(&v[2])) // set the sequence to be reverse complemented if it has been reverse complemented in the bam record
.collect::<Vec<bool>>(),
);

Expand Down Expand Up @@ -333,8 +333,8 @@ fn align_umi_to_libraries(

let mut transformed_scores = Vec::new();
for score in s.into_iter() {
let r1_key_part = reverse_comp_if_needed((&DnaString::from_dna_string(&score.1.1[15]), &parse_str_as_bool(&score.1.1[3]))).to_string();
let r2_key_part = reverse_comp_if_needed((&DnaString::from_dna_string(&score.1.2[15]), &parse_str_as_bool(&score.1.2[3]))).to_string();
let r1_key_part = reverse_comp_if_needed((&DnaString::from_dna_string(&score.1.1[15]), &parse_str_as_bool(&score.1.1[2]))).to_string();
let r2_key_part = reverse_comp_if_needed((&DnaString::from_dna_string(&score.1.2[15]), &parse_str_as_bool(&score.1.2[2]))).to_string();
let filter_result = filter_reasons.get(&(r1_key_part.clone() + &r2_key_part));

let new_score = match filter_result {
Expand Down

0 comments on commit 5ec06db

Please sign in to comment.