Skip to content

Commit

Permalink
feat: added per-read anchor requirement to junction extract
Browse files Browse the repository at this point in the history
 - closes griffithlab#186, reads now only 'support' a junction if they have at least
   a given minimum anchor length, supplied with the '-A' flag (default 0)
  • Loading branch information
TimD1 committed Jul 2, 2024
1 parent c9e18a5 commit 9040ecb
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 9 deletions.
10 changes: 8 additions & 2 deletions src/cis-splice-effects/cis_splice_effects_identifier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ void CisSpliceEffectsIdentifier::usage(ostream& out) {
out << "\t\t" << "-t STR\tTag used in bam to label strand. [XS]" << endl;
out << "\t\t" << "-a INT\tMinimum anchor length. Junctions which satisfy a minimum \n"
<< "\t\t\t " << "anchor length on both sides are reported. [8]" << endl;
out << "\t\t" << "-A INT\tMinimum read anchor length. Reads which satisfy a minimum \n"
<< "\t\t\t " << "anchor length on both sides 'support' a junction. [0]" << endl;
out << "\t\t" << "-m INT\tMinimum intron length. [70]" << endl;
out << "\t\t" << "-M INT\tMaximum intron length. [500000]" << endl;
out << "\t\t" << "-f INT\tOnly use alignments where all flag bits set here are set. [0]" << endl;
Expand Down Expand Up @@ -116,7 +118,7 @@ void CisSpliceEffectsIdentifier::parse_options(int argc, char* argv[]) {
optind = 1; //Reset before parsing again.
stringstream help_ss;
char c;
while((c = getopt(argc, argv, "o:w:v:j:e:Ei:ISht:s:a:m:M:f:F:q:b:C")) != -1) {
while((c = getopt(argc, argv, "o:w:v:j:e:Ei:ISht:s:a:A:m:M:f:F:q:b:C")) != -1) {
switch(c) {
case 'o':
output_file_ = string(optarg);
Expand Down Expand Up @@ -167,6 +169,9 @@ void CisSpliceEffectsIdentifier::parse_options(int argc, char* argv[]) {
case 'a':
min_anchor_length_ = atoi(optarg);
break;
case 'A':
min_read_anchor_length_ = atoi(optarg);
break;
case 'm':
min_intron_length_ = atoi(optarg);
break;
Expand Down Expand Up @@ -298,7 +303,8 @@ void CisSpliceEffectsIdentifier::identify() {
ref_to_pass = "NA";
}
JunctionsExtractor je1(bam_, variant_region, strandness_,
strand_tag_, min_anchor_length_, min_intron_length_, max_intron_length_,
strand_tag_, min_anchor_length_, min_read_anchor_length_,
min_intron_length_, max_intron_length_,
filter_flags_, require_flags_, min_map_qual_, ref_to_pass);
je1.identify_junctions_from_BAM();
vector<Junction> junctions = je1.get_all_junctions();
Expand Down
6 changes: 4 additions & 2 deletions src/cis-splice-effects/cis_splice_effects_identifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@ class CisSpliceEffectsIdentifier {
//tag used in BAM to denote strand, default "XS"
string strand_tag_;
//Minimum anchor length for junctions
//Junctions need at least this many bp overlap
// on both ends.
//Junctions need at least this many bp overlap on both ends.
uint32_t min_anchor_length_;
//Reads need at least this many bp overlap on both ends to support junction.
uint32_t min_read_anchor_length_;
//Minimum length of an intron, i.e min junction width
uint32_t min_intron_length_;
//Maximum length of an intron, i.e max junction width
Expand Down Expand Up @@ -118,6 +119,7 @@ class CisSpliceEffectsIdentifier {
strandness_(-1),
strand_tag_("XS"),
min_anchor_length_(8),
min_read_anchor_length_(0),
min_intron_length_(70),
max_intron_length_(500000),
filter_flags_(0),
Expand Down
17 changes: 15 additions & 2 deletions src/junctions/junctions_extractor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,17 @@ int JunctionsExtractor::parse_options(int argc, char *argv[]) {
optind = 1; //Reset before parsing again.
int c;
stringstream help_ss;
while((c = getopt(argc, argv, "ha:m:M:f:F:q:o:r:t:s:b:")) != -1) {
while((c = getopt(argc, argv, "ha:A:m:M:f:F:q:o:r:t:s:b:")) != -1) {
switch(c) {
case 'h':
usage(help_ss);
throw common::cmdline_help_exception(help_ss.str());
case 'a':
min_anchor_length_ = atoi(optarg);
break;
case 'A':
min_read_anchor_length_ = atoi(optarg);
break;
case 'm':
min_intron_length_ = atoi(optarg);
break;
Expand Down Expand Up @@ -119,6 +122,7 @@ int JunctionsExtractor::parse_options(int argc, char *argv[]) {
}

cerr << "Minimum junction anchor length: " << min_anchor_length_ << endl;
cerr << "Minimum read anchor length: " << min_read_anchor_length_ << endl;
cerr << "Minimum intron length: " << min_intron_length_ << endl;
cerr << "Maximum intron length: " << max_intron_length_ << endl;
cerr << "Require alignment flags: " << require_flags_ << endl;
Expand All @@ -140,6 +144,8 @@ int JunctionsExtractor::usage(ostream& out) {
out << "Options:" << endl;
out << "\t\t" << "-a INT\tMinimum anchor length. Junctions which satisfy a minimum \n"
<< "\t\t\t " << "anchor length on both sides are reported. [8]" << endl;
out << "\t\t" << "-A INT\tMinimum read anchor length. Reads which satisfy a minimum \n"
<< "\t\t\t " << "anchor length on both sides 'support' a junction. [0]" << endl;
out << "\t\t" << "-m INT\tMinimum intron length. [70]" << endl;
out << "\t\t" << "-M INT\tMaximum intron length. [500000]" << endl;
out << "\t\t" << "-f INT\tOnly use alignments where all flag bits set here are set. [0]" << endl;
Expand Down Expand Up @@ -171,12 +177,19 @@ string JunctionsExtractor::get_new_junction_name() {
return name_ss.str();
}

//Do some basic qc on the junction
//Update if junction passes QC based on current read alignment
bool JunctionsExtractor::junction_qc(Junction &j1) {
// don't add support for junction if intron is wrong size
if(j1.end - j1.start < min_intron_length_ ||
j1.end - j1.start > max_intron_length_) {
return false;
}

// don't add support for junction if read isn't sufficiently anchored
if(j1.start - j1.thick_start < min_read_anchor_length_) return false;
if(j1.thick_end - j1.end < min_read_anchor_length_) return false;

// add support, update if this junction is sufficiently anchored
if(j1.start - j1.thick_start >= min_anchor_length_)
j1.has_left_min_anchor = true;
if(j1.thick_end - j1.end >= min_anchor_length_)
Expand Down
10 changes: 7 additions & 3 deletions src/junctions/junctions_extractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,10 @@ class JunctionsExtractor {
//Reference FASTA file
string ref_;
//Minimum anchor length for junctions
//Junctions need atleast this many bp overlap
// on both ends.
//Junctions need at least this many bp overlap on both ends.
uint32_t min_anchor_length_;
//Reads need at least this many bp overlap to support a junction
uint32_t min_read_anchor_length_;
//Minimum length of an intron, i.e min junction width
uint32_t min_intron_length_;
//Maximum length of an intron, i.e max junction width
Expand Down Expand Up @@ -190,6 +191,7 @@ class JunctionsExtractor {
//Default constructor
JunctionsExtractor() {
min_anchor_length_ = 8;
min_read_anchor_length_ = 0;
min_intron_length_ = 70;
max_intron_length_ = 500000;
filter_flags_ = 0;
Expand All @@ -211,6 +213,7 @@ class JunctionsExtractor {
int strandness1,
string strand_tag1,
uint32_t min_anchor_length1,
uint32_t min_read_anchor_length1,
uint32_t min_intron_length1,
uint32_t max_intron_length1,
uint16_t filter_flags,
Expand All @@ -222,7 +225,8 @@ class JunctionsExtractor {
strandness_(strandness1),
strand_tag_(strand_tag1),
min_anchor_length_(min_anchor_length1),
min_intron_length_(min_anchor_length1),
min_read_anchor_length_(min_read_anchor_length1),
min_intron_length_(min_intron_length1),
max_intron_length_(max_intron_length1),
filter_flags_(filter_flags),
require_flags_(require_flags),
Expand Down

0 comments on commit 9040ecb

Please sign in to comment.