From 2f8bac188dbb443ac7267cbf31cc4877361d6518 Mon Sep 17 00:00:00 2001 From: hextraza Date: Tue, 6 Aug 2024 13:55:48 -0700 Subject: [PATCH] Add manual mismatch filter --- src/align.rs | 5 ++- src/filter/align.rs | 89 ++++++++++++++++++++++++++++++++++++++++++++ src/output_header.rs | 4 ++ 3 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 src/output_header.rs diff --git a/src/align.rs b/src/align.rs index ffdb4fa..51c0a8e 100644 --- a/src/align.rs +++ b/src/align.rs @@ -45,6 +45,7 @@ pub enum FilterReason { SuccessfulMatch, StrandWasWrong, TriageEmptyEquivalenceClass, + AboveMismatchThreshold, None, } @@ -67,6 +68,7 @@ impl Display for FilterReason { FilterReason::SuccessfulMatch => write!(f, "Successful Match"), FilterReason::StrandWasWrong => write!(f, "Strandedness Filtered"), FilterReason::TriageEmptyEquivalenceClass => write!(f, "Equivalence Class Empty After Filters"), + FilterReason::AboveMismatchThreshold => write!(f, "Above Mismatch Threshold"), FilterReason::None => write!(f, "None"), } } @@ -942,7 +944,6 @@ fn pseudoalign( // Perform the alignment of the sequence to the reference debruijn graph. Pass the number of allowed mismatches match reference_index.map_read_with_mismatch(&sequence, config.num_mismatches) { Some((equivalence_class, score, mismatches)) => { - // Normalize score by read length let normalized_score = score as f64 / sequence.len() as f64; @@ -959,6 +960,8 @@ fn pseudoalign( config.score_threshold, config.score_percent, config.discard_multiple_matches, + config.num_mismatches, + mismatches ) } None => (None, Some((FilterReason::NoMatch, 0.0, 0))), diff --git a/src/filter/align.rs b/src/filter/align.rs index 4ea092b..ec91fd1 100644 --- a/src/filter/align.rs +++ b/src/filter/align.rs @@ -8,6 +8,8 @@ pub fn filter_alignment_by_metrics( score_threshold: usize, normalized_score_threshold: f64, discard_multiple_matches: bool, + mismatch_threshold: usize, + mismatches: usize ) -> ( Option<(Vec, f64, usize)>, Option<(FilterReason, f64, usize)>, @@ -22,6 +24,15 @@ pub fn filter_alignment_by_metrics( score, )), ) + } else if mismatches > mismatch_threshold { + ( + None, + Some(( + FilterReason::AboveMismatchThreshold, + normalized_score, + score, + )), + ) } else { (Some((equivalence_class, normalized_score, score)), None) } @@ -52,6 +63,8 @@ mod tests { score_threshold, score_percent, false, + 0, + 0 ); let expected_results = Some((vec![1, 2], 1.0, 50)); @@ -74,6 +87,8 @@ mod tests { score_threshold, score_percent, false, + 0, + 0 ); let expected_results = Some((super::FilterReason::ScoreBelowThreshold, 0.10, 10)); @@ -97,10 +112,84 @@ mod tests { score_threshold, score_percent, true, + 0, + 0 ); let expected_results = Some((super::FilterReason::DiscardedMultipleMatch, 1.0, 50)); assert_eq!(results, expected_results); } + + // Case where the mismatches are lower than the threshold -- should not filter the alignment + #[test] + fn do_not_filter_mismatches() { + let score = 50; + let normalized_score = 1.0; + let score_threshold = 20; + let score_percent = 0.5; + let equiv_class = vec![1, 2]; + + let (results, _) = super::filter_alignment_by_metrics( + equiv_class, + score, + normalized_score, + score_threshold, + score_percent, + false, + 1, + 0 + ); + let expected_results = Some((vec![1, 2], 1.0, 50)); + + assert_eq!(results, expected_results); + } + + // Case where the mismatches are equal to the threshold -- should not filter the alignment + #[test] + fn do_not_filter_mismatches_equal() { + let score = 50; + let normalized_score = 1.0; + let score_threshold = 20; + let score_percent = 0.5; + let equiv_class = vec![1, 2]; + + let (results, _) = super::filter_alignment_by_metrics( + equiv_class, + score, + normalized_score, + score_threshold, + score_percent, + false, + 1, + 1 + ); + let expected_results = Some((vec![1, 2], 1.0, 50)); + + assert_eq!(results, expected_results); + } + + // Case where the mismatches are equal to the threshold -- should not filter the alignment + #[test] + fn filter_mismatches() { + let score = 50; + let normalized_score = 1.0; + let score_threshold = 20; + let score_percent = 0.5; + let equiv_class = vec![1, 2]; + + let (_, results) = super::filter_alignment_by_metrics( + equiv_class, + score, + normalized_score, + score_threshold, + score_percent, + false, + 1, + 2 + ); + let expected_results = Some((super::FilterReason::AboveMismatchThreshold, 1.0, 50)); + + assert_eq!(results, expected_results); + } } diff --git a/src/output_header.rs b/src/output_header.rs new file mode 100644 index 0000000..3e2002c --- /dev/null +++ b/src/output_header.rs @@ -0,0 +1,4 @@ +pub struct OutputHeader { + tool_version: String, + +} \ No newline at end of file