Skip to content

Commit

Permalink
Break out mpmap band padding algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Jul 19, 2024
1 parent d3f9dec commit 25ad71b
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 38 deletions.
43 changes: 43 additions & 0 deletions src/algorithms/pad_band.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* \file pad_band.cpp
*
* Defines implementation for band padding functions for banded global alignment.
*/

#include "pad_band.hpp"

namespace vg {
namespace algorithms {

std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_random_walk(double band_padding_multiplier, size_t band_padding_memo_size) {

// We're goign to capture this vector by value into the closure
std::vector<size_t> band_padding_memo;

// Fill it in to initialize
band_padding_memo.resize(band_padding_memo_size);
for (size_t i = 0; i < band_padding_memo.size(); i++) {
band_padding_memo[i] = size_t(band_padding_multiplier * sqrt(i)) + 1;
}

function<size_t(const Alignment&, const HandleGraph&)> choose_band_padding = [band_padding_multiplier, band_padding_memo](const Alignment& seq, const HandleGraph& graph) {
size_t read_length = seq.sequence().size();
return read_length < band_padding_memo.size() ? band_padding_memo.at(read_length)
: size_t(band_padding_multiplier * sqrt(read_length)) + 1;
};

// And return the closure which now owns the memo table.
return choose_band_padding;
}

std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_constant(size_t band_padding) {
// don't dynamically choose band padding, shim constant value into a function type
function<size_t(const Alignment&,const HandleGraph&)> constant_padding = [band_padding](const Alignment& seq, const HandleGraph& graph) {
return band_padding;
};

return constant_padding;
}

}
}
28 changes: 28 additions & 0 deletions src/algorithms/pad_band.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef VG_ALGORITHMS_PAD_BAND_HPP_INCLUDED
#define VG_ALGORITHMS_PAD_BAND_HPP_INCLUDED

/**
* \file pad_band.hpp
*
* Defines algorithm for computing band padding for banded alignment.
*/

#include "../handle.hpp"
#include <vg/vg.pb.h>

namespace vg {
namespace algorithms {

using namespace std;

/// Get a band padding function that uses the expected distance of a random
/// walk, memoized out to the given length.
std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_random_walk(double band_padding_multiplier = 1.0, size_t band_padding_memo_size = 2000);

/// Get a band padding function that uses a constant value.
std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_constant(size_t band_padding);

}
}

#endif
9 changes: 3 additions & 6 deletions src/multipath_alignment_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "algorithms/extract_connecting_graph.hpp"
#include "algorithms/extract_extending_graph.hpp"
#include "algorithms/pad_band.hpp"

//#define debug_multipath_alignment
//#define debug_decompose_algorithm
Expand Down Expand Up @@ -4230,10 +4231,6 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap
SnarlDistanceIndex* dist_index, const function<pair<id_t, bool>(id_t)>* project,
bool allow_negative_scores, unordered_map<handle_t, bool>* left_align_strand) {

// don't dynamically choose band padding, shim constant value into a function type
function<size_t(const Alignment&,const HandleGraph&)> constant_padding = [&](const Alignment& seq, const HandleGraph& graph) {
return band_padding;
};
align(alignment,
align_graph,
aligner,
Expand All @@ -4244,7 +4241,7 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap
pessimistic_tail_gap_multiplier,
simplify_topologies,
unmergeable_len,
constant_padding,
algorithms::pad_band_constant(band_padding),
multipath_aln_out,
cutting_snarls,
dist_index,
Expand Down Expand Up @@ -5183,7 +5180,7 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap
void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGraph& align_graph, const GSSWAligner* aligner,
bool score_anchors_as_matches, size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap,
double pessimistic_tail_gap_multiplier, bool simplify_topologies, size_t unmergeable_len,
function<size_t(const Alignment&,const HandleGraph&)> band_padding_function,
const function<size_t(const Alignment&,const HandleGraph&)>& band_padding_function,
multipath_alignment_t& multipath_aln_out, SnarlManager* cutting_snarls,
SnarlDistanceIndex* dist_index, const function<pair<id_t, bool>(id_t)>* project,
bool allow_negative_scores, unordered_map<handle_t, bool>* left_align_strand) {
Expand Down
2 changes: 1 addition & 1 deletion src/multipath_alignment_graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ namespace vg {
/// with topologically_order_subpaths() before trying to run DP on it.
void align(const Alignment& alignment, const HandleGraph& align_graph, const GSSWAligner* aligner, bool score_anchors_as_matches,
size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, bool simplify_topologies,
size_t unmergeable_len, function<size_t(const Alignment&,const HandleGraph&)> band_padding_function,
size_t unmergeable_len, const function<size_t(const Alignment&,const HandleGraph&)>& band_padding_function,
multipath_alignment_t& multipath_aln_out, SnarlManager* cutting_snarls = nullptr, SnarlDistanceIndex* dist_index = nullptr,
const function<pair<id_t, bool>(id_t)>* project = nullptr, bool allow_negative_scores = false,
unordered_map<handle_t, bool>* left_align_strand = nullptr);
Expand Down
25 changes: 3 additions & 22 deletions src/multipath_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "algorithms/jump_along_path.hpp"
#include "algorithms/ref_path_distance.hpp"
#include "algorithms/component.hpp"
#include "algorithms/pad_band.hpp"

namespace vg {

Expand All @@ -63,7 +64,8 @@ namespace vg {
snarl_manager(snarl_manager),
distance_index(distance_index),
path_component_index(distance_index ? nullptr : new PathComponentIndex(graph)),
splice_stats(*get_regular_aligner())
splice_stats(*get_regular_aligner()),
choose_band_padding(algorithms::pad_band_random_walk(1.0, 0))
{
set_max_merge_supression_length();
}
Expand Down Expand Up @@ -895,15 +897,6 @@ namespace vg {
}
}
}

void MultipathMapper::init_band_padding_memo() {
band_padding_memo.clear();
band_padding_memo.resize(band_padding_memo_size);

for (size_t i = 0; i < band_padding_memo.size(); i++) {
band_padding_memo[i] = size_t(band_padding_multiplier * sqrt(i)) + 1;
}
}

void MultipathMapper::set_alignment_scores(const int8_t* score_matrix, int8_t gap_open, int8_t gap_extend,
int8_t full_length_bonus) {
Expand Down Expand Up @@ -6248,12 +6241,6 @@ namespace vg {
}
#endif

function<size_t(const Alignment&, const HandleGraph&)> choose_band_padding = [&](const Alignment& seq, const HandleGraph& graph) {
size_t read_length = seq.sequence().size();
return read_length < band_padding_memo.size() ? band_padding_memo.at(read_length)
: size_t(band_padding_multiplier * sqrt(read_length)) + 1;
};

// do the connecting alignments and fill out the multipath_alignment_t object
multi_aln_graph.align(alignment, *align_dag, aligner, true, num_alt_alns, dynamic_max_alt_alns, max_alignment_gap,
use_pessimistic_tail_alignment ? pessimistic_gap_multiplier : 0.0, simplify_topologies,
Expand Down Expand Up @@ -6305,12 +6292,6 @@ namespace vg {
multi_aln_graph.topological_sort(topological_order);
multi_aln_graph.remove_transitive_edges(topological_order);

function<size_t(const Alignment&, const HandleGraph&)> choose_band_padding = [&](const Alignment& seq, const HandleGraph& graph) {
size_t read_length = seq.sequence().end() - seq.sequence().begin();
return read_length < band_padding_memo.size() ? band_padding_memo.at(read_length)
: size_t(band_padding_multiplier * sqrt(read_length)) + 1;
};

// do the connecting alignments and fill out the multipath_alignment_t object
multi_aln_graph.align(alignment, subgraph, aligner, false, num_alt_alns, dynamic_max_alt_alns, max_alignment_gap,
use_pessimistic_tail_alignment ? pessimistic_gap_multiplier : 0.0, simplify_topologies,
Expand Down
9 changes: 2 additions & 7 deletions src/multipath_mapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,6 @@ namespace vg {
/// Experimental: skeleton code for predicting path distance from minimum distance
void determine_distance_correlation();

/// Should be called once after construction, or any time the band padding multiplier is changed
void init_band_padding_memo();

using AlignerClient::set_alignment_scores;

/// Set the algner scoring parameters and create the stored aligner instances. The
Expand Down Expand Up @@ -115,7 +112,6 @@ namespace vg {
size_t max_tail_merge_supress_length = 4;
bool suppress_tail_anchors = false;
size_t min_tail_anchor_length = 3;
double band_padding_multiplier = 1.0;
bool use_pessimistic_tail_alignment = false;
double pessimistic_gap_multiplier = 0.0;
bool restrained_graph_extraction = false;
Expand All @@ -136,7 +132,6 @@ namespace vg {
int max_fanout_base_quality = 20;
int max_fans_out = 5;
size_t max_p_value_memo_size = 500;
size_t band_padding_memo_size = 2000;
double max_exponential_rate_intercept = 0.612045;
double max_exponential_rate_slope = 0.000555181;
double max_exponential_shape_intercept = 12.136;
Expand Down Expand Up @@ -688,8 +683,8 @@ namespace vg {
static thread_local unordered_map<double, vector<int64_t>> pessimistic_gap_memo;
static const size_t gap_memo_max_size;

// a memo for transcendental band padidng function (gets initialized at construction)
vector<size_t> band_padding_memo;
// A function for computing band padding
std::function<size_t(const Alignment&, const HandleGraph&)> choose_band_padding;

#ifdef mpmap_instrument_mem_statistics
public:
Expand Down
4 changes: 2 additions & 2 deletions src/subcommand/mpmap_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include <vg/io/vpkg.hpp>
#include "../algorithms/component.hpp"
#include "../algorithms/pad_band.hpp"
#include "../multipath_mapper.hpp"
#include "../mem_accelerator.hpp"
#include "../surjector.hpp"
Expand Down Expand Up @@ -1900,8 +1901,7 @@ int main_mpmap(int argc, char** argv) {
}
multipath_mapper.adjust_alignments_for_base_quality = qual_adjusted;
multipath_mapper.strip_bonuses = strip_full_length_bonus;
multipath_mapper.band_padding_multiplier = band_padding_multiplier;
multipath_mapper.init_band_padding_memo();
multipath_mapper.choose_band_padding = algorithms::pad_band_random_walk(band_padding_multiplier);

// set mem finding parameters
multipath_mapper.hit_max = hit_max;
Expand Down

0 comments on commit 25ad71b

Please sign in to comment.