Skip to content

Commit

Permalink
Sanity check rna sequence strings on library generation
Browse files Browse the repository at this point in the history
  • Loading branch information
hextraza committed Nov 5, 2024
1 parent fdeab1c commit f0f11fb
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 3 deletions.
48 changes: 45 additions & 3 deletions src/reference_library.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,15 @@ pub fn get_reference_library(path: &Path, strand_filter: LibraryChemistry) -> (a
let mut row = Vec::new();
let mut revcomp_row = Vec::new();

for col in &columns {
row.push(col[row_idx].clone());
revcomp_row.push(col[row_idx].clone());
for (col_idx, col) in columns.iter().enumerate() {
let mut value = col[row_idx].clone();

if col_idx == sequence_idx {
value = value.replace('U', "T").replace('u', "t");
}

row.push(value.clone());
revcomp_row.push(value.clone());
}

revcomp_row[sequence_name_idx] = revcomp_row[sequence_name_idx].clone() + SPECIAL_REVCOMP_FEATURE_NAME_SEPARATOR + "rev";
Expand Down Expand Up @@ -436,4 +442,40 @@ mod tests {

sanity_check_align_config(&align_config); // Should pass without panic
}

#[test]
fn test_get_reference_library_rna_to_dna_conversion() {
let path = Path::new("tests/test-sequences/libraries/reference-library-rna.json");
let strand_filter = LibraryChemistry::None;
let (_align_config, reference_metadata) = get_reference_library(path, strand_filter);

assert_eq!(reference_metadata.columns[3][0], "ATGCTT".to_string());
assert_eq!(reference_metadata.columns[3][1], "AAGCAT".to_string());
assert_eq!(reference_metadata.columns[3][2], "tTgcAT".to_string());
assert_eq!(reference_metadata.columns[3][3], "ATgcAa".to_string());
}

#[test]
fn test_get_reference_library_mixed_case_rna_to_dna_conversion() {
let path = Path::new("tests/test-sequences/libraries/reference-library-mixed-case-rna.json");
let strand_filter = LibraryChemistry::None;
let (_align_config, reference_metadata) = get_reference_library(path, strand_filter);

assert_eq!(reference_metadata.columns[3][0], "atGcTt".to_string());
assert_eq!(reference_metadata.columns[3][1], "aAgCat".to_string());
assert_eq!(reference_metadata.columns[3][2], "TtgCAt".to_string());
assert_eq!(reference_metadata.columns[3][3], "aTGcaA".to_string());
}

#[test]
fn test_get_reference_library_sequence_without_rna_bases() {
let path = Path::new("tests/test-sequences/libraries/reference-library-no-rna-bases.json");
let strand_filter = LibraryChemistry::None;
let (_align_config, reference_metadata) = get_reference_library(path, strand_filter);

assert_eq!(reference_metadata.columns[3][0], "ATGCGT".to_string());
assert_eq!(reference_metadata.columns[3][1], "ACGCAT".to_string());
assert_eq!(reference_metadata.columns[3][2], "CGTACG".to_string());
assert_eq!(reference_metadata.columns[3][3], "CGTACG".to_string());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[
{
"score_percent": 0.85,
"score_filter": 200,
"score_threshold": 300,
"num_mismatches": 2,
"discard_multiple_matches": true,
"require_valid_pair": false,
"discard_multi_hits": 1,
"intersect_level": 1,
"max_hits_to_report": 10,
"group_on": "feature_id",
"trim_target_length": 40,
"trim_strictness": 0.9
},
{
"headers": ["id", "feature_id", "sequence_name", "sequence"],
"columns": [
["1", "2"],
["fid1", "fid2"],
["seq_name1", "seq_name2"],
["auGcTt", "UugCAu"]
]
}
]
25 changes: 25 additions & 0 deletions tests/test-sequences/libraries/reference-library-no-rna-bases.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[
{
"score_percent": 0.85,
"score_filter": 200,
"score_threshold": 300,
"num_mismatches": 2,
"discard_multiple_matches": true,
"require_valid_pair": false,
"discard_multi_hits": 1,
"intersect_level": 1,
"max_hits_to_report": 10,
"group_on": "feature_id",
"trim_target_length": 40,
"trim_strictness": 0.9
},
{
"headers": ["id", "feature_id", "sequence_name", "sequence"],
"columns": [
["1", "2"],
["fid1", "fid2"],
["seq_name1", "seq_name2"],
["ATGCGT", "CGTACG"]
]
}
]
25 changes: 25 additions & 0 deletions tests/test-sequences/libraries/reference-library-rna.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[
{
"score_percent": 0.85,
"score_filter": 200,
"score_threshold": 300,
"num_mismatches": 2,
"discard_multiple_matches": true,
"require_valid_pair": false,
"discard_multi_hits": 1,
"intersect_level": 1,
"max_hits_to_report": 10,
"group_on": "feature_id",
"trim_target_length": 40,
"trim_strictness": 0.9
},
{
"headers": ["id", "feature_id", "sequence_name", "sequence"],
"columns": [
["1", "2"],
["fid1", "fid2"],
["seq_name1", "seq_name2"],
["AUGCUU", "uUgcAU"]
]
}
]

0 comments on commit f0f11fb

Please sign in to comment.