From bfea937233400bb9dc1ea2230226204655d5b926 Mon Sep 17 00:00:00 2001 From: hextraza Date: Tue, 19 Jul 2022 15:51:02 -0700 Subject: [PATCH] Fix strand filter parsing --- src/align.rs | 32 +++++++++++++++----------------- src/bin/cli.yml | 7 +++++++ src/bin/main.rs | 13 +++++++++++-- src/reference_library.rs | 7 ++++--- 4 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/align.rs b/src/align.rs index 5f04b55..8512016 100644 --- a/src/align.rs +++ b/src/align.rs @@ -49,7 +49,15 @@ pub struct AlignFilterConfig { pub intersect_level: IntersectLevel, pub require_valid_pair: bool, pub discard_multi_hits: usize, - pub max_hits_to_report: usize + pub max_hits_to_report: usize, + pub strand_filter: StrandFilter +} + +pub enum StrandFilter { + Unstranded, + FivePrime, + ThreePrime, + None } #[derive(Default)] @@ -85,14 +93,6 @@ pub enum AlignmentDirection { I } -// TODO input data is fiveprime or threeprime, not library data, so this should just be a toggle for the filtering and the type of filtering is a console param -pub enum LibraryType { - Unstranded, - FivePrime, - ThreePrime, - None -} - impl AlignmentDirection { fn get_alignment_dir(forward_pair_state: PairState, reverse_pair_state: PairState) -> AlignmentDirection { match (forward_pair_state, reverse_pair_state) { @@ -116,8 +116,7 @@ impl AlignmentDirection { let (f_pair_state, f_equiv_class) = forward_hits; if let Some((r_pair_state, r_equiv_class)) = reverse_hits { - // TODO Read library type - if AlignmentDirection::filter_read(AlignmentDirection::get_alignment_dir(f_pair_state, r_pair_state), LibraryType::Unstranded) { + if AlignmentDirection::filter_read(AlignmentDirection::get_alignment_dir(f_pair_state, r_pair_state), &config.strand_filter) { return } @@ -137,12 +136,12 @@ impl AlignmentDirection { } - fn filter_read(dir: AlignmentDirection, lib_type: LibraryType) -> bool { + fn filter_read(dir: AlignmentDirection, lib_type: &StrandFilter) -> bool { match lib_type { - LibraryType::Unstranded => AlignmentDirection::filter_unstranded(dir), - LibraryType::FivePrime => AlignmentDirection::filter_fiveprime(dir), - LibraryType::ThreePrime => AlignmentDirection::filter_threeprime(dir), - LibraryType::None => false + StrandFilter::Unstranded => AlignmentDirection::filter_unstranded(dir), + StrandFilter::FivePrime => AlignmentDirection::filter_fiveprime(dir), + StrandFilter::ThreePrime => AlignmentDirection::filter_threeprime(dir), + StrandFilter::None => false } } @@ -261,7 +260,6 @@ pub fn score<'a>( None => (None, None), }; - // TODO reverse comp the .bam if the relevant flag is true let (forward_score, forward_matched_sequences, forward_align_debug_info) = generate_score( sequences, reverse_sequences, diff --git a/src/bin/cli.yml b/src/bin/cli.yml index c3124e1..c4a303f 100644 --- a/src/bin/cli.yml +++ b/src/bin/cli.yml @@ -35,3 +35,10 @@ args: help: Path to a gzipped file that will log reference name and sequence for every alignment (note -- can be very large) takes_value: true default_value: "" + - strand_filter: + short: f + long: strand_filter + value_name: STRAND_FILTER + help: How to filter paired-read data based on strandedness. Possible values are "unstranded" (default), "fiveprime", "threeprime", and "none" + takes_value: true + default_value: "unstranded" diff --git a/src/bin/main.rs b/src/bin/main.rs index 23a2f7d..b030618 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -1,12 +1,12 @@ extern crate nimble; +use nimble::align::StrandFilter; use nimble::process::{bam, fastq}; use nimble::reference_library; use nimble::utils; use clap::{load_yaml, App}; use std::collections::HashMap; -use std::fs::OpenOptions; use std::path::Path; fn main() { @@ -35,11 +35,20 @@ fn main() { Some(alignment_file) }; + let strand_filter = matches.value_of("strand_filter").unwrap_or("unstranded"); + let strand_filter = match strand_filter { + "unstranded" => StrandFilter::Unstranded, + "fiveprime" => StrandFilter::FivePrime, + "threeprime" => StrandFilter::ThreePrime, + "none" => StrandFilter::None, + _ => panic!("Could not parse strand_filter option.") + }; + println!("Loading and preprocessing reference data"); // Read library alignment config info, reference library metadata, and sequences from library json let (align_config, reference_metadata) = - reference_library::get_reference_library(Path::new(reference_json_path)); + reference_library::get_reference_library(Path::new(reference_json_path), strand_filter); // Generate error-checked vectors of seqs and names for the debrujin index let (reference_seqs, reference_seqs_rev, reference_names) = diff --git a/src/reference_library.rs b/src/reference_library.rs index ad2c6ac..7b0c1a1 100644 --- a/src/reference_library.rs +++ b/src/reference_library.rs @@ -1,4 +1,4 @@ -use crate::align; +use crate::align::{self, StrandFilter}; use serde_json::Value; use std::fs::read_to_string; use std::path::Path; @@ -15,7 +15,7 @@ pub struct ReferenceMetadata { } // Parses a .json that contains a reference library. Returns a tuple of the library's config information and the library data -pub fn get_reference_library(path: &Path) -> (align::AlignFilterConfig, ReferenceMetadata) { +pub fn get_reference_library(path: &Path, strand_filter: StrandFilter) -> (align::AlignFilterConfig, ReferenceMetadata) { // Parse raw JSON to serde_json value let raw_json_string = read_to_string(path).expect("Error -- could not read reference library"); @@ -105,7 +105,8 @@ pub fn get_reference_library(path: &Path) -> (align::AlignFilterConfig, Referenc require_valid_pair, discard_multi_hits, intersect_level, - max_hits_to_report + max_hits_to_report, + strand_filter }; let reference_metadata = ReferenceMetadata {