diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index ae2991c0238..4cc4224368e 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -3105,6 +3105,7 @@ void MinimizerMapper::attempt_rescue(const Alignment& aligned_read, Alignment& r get_regular_aligner()->align_xdrop(rescued_alignment, cached_graph, topological_order, dozeu_seed, false, gap_limit); this->fix_dozeu_score(rescued_alignment, cached_graph, topological_order); + this->fix_dozeu_end_deletions(rescued_alignment); } else { get_regular_aligner()->align(rescued_alignment, cached_graph, topological_order); } @@ -3143,6 +3144,7 @@ void MinimizerMapper::attempt_rescue(const Alignment& aligned_read, Alignment& r size_t gap_limit = this->get_regular_aligner()->longest_detectable_gap(rescued_alignment); get_regular_aligner()->align_xdrop(rescued_alignment, dagified, std::vector(), false, gap_limit); this->fix_dozeu_score(rescued_alignment, dagified, std::vector()); + this->fix_dozeu_end_deletions(rescued_alignment); } else if (this->rescue_algorithm == rescue_gssw) { get_regular_aligner()->align(rescued_alignment, dagified, true); } @@ -3199,6 +3201,55 @@ void MinimizerMapper::fix_dozeu_score(Alignment& rescued_alignment, const Handle } } +void MinimizerMapper::fix_dozeu_end_deletions(Alignment& alignment) const { + + // figure out where the first insert/aligned base occurs on the left side + size_t i = 0, j = 0; + for (; i < alignment.path().mapping_size(); ++i) { + const auto& mapping = alignment.path().mapping(i); + for (j = 0; j < mapping.edit_size(); ++j) { + if (mapping.edit(j).to_length() != 0) { + break; + } + } + if (j != mapping.edit_size()) { + break; + } + } + if (i == alignment.path().mapping_size()) { + // the entire alignment is a deletion, clear it + alignment.clear_path(); + } + else if (i != 0 || j != 0) { + // we found a deletion on the left side, remove it + auto mappings = alignment.mutable_path()->mutable_mapping(); + auto edits = (*mappings)[j].mutable_edit(); + size_t removed = 0; + for (size_t k = 0; k < j; ++k) { + removed += (*edits)[k].from_length(); + } + edits->erase(edits->begin(), edits->begin() + j); + mappings->erase(mappings->begin(), mappings->begin() + i); + auto position = (*mappings)[0].mutable_position(); + position->set_offset(position->offset() + removed); + } + + // remove deletions on the right side + for (int64_t i = alignment.path().mapping_size() - 1; i >= 0; --i) { + auto edits = alignment.mutable_path()->mutable_mapping(i)->mutable_edit(); + while (!edits->empty() && (*edits)[edits->size() - 1].to_length() == 0) { + edits->RemoveLast(); + } + if (edits->empty()) { + alignment.mutable_path()->mutable_mapping()->RemoveLast(); + } + else { + break; + } + } +} + + //----------------------------------------------------------------------------- diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp index ab653ea80a6..f71a13f107b 100644 --- a/src/minimizer_mapper.hpp +++ b/src/minimizer_mapper.hpp @@ -645,6 +645,14 @@ class MinimizerMapper : public AlignerClient { */ void fix_dozeu_score(Alignment& rescued_alignment, const HandleGraph& rescue_graph, const std::vector& topological_order) const; + + /** + * When dozeu doesn't have any seeds, it's scan heuristic can lead to + * inaccurate anchoring with the end result that one end of the alignment + * has a deletion that doesn't connect to an aligned base. This function + * removes those deletions + */ + void fix_dozeu_end_deletions(Alignment& rescued_alignment) const; //----------------------------------------------------------------------------- diff --git a/src/unittest/minimizer_mapper.cpp b/src/unittest/minimizer_mapper.cpp index bbcfdbda43f..8b60d21479e 100644 --- a/src/unittest/minimizer_mapper.cpp +++ b/src/unittest/minimizer_mapper.cpp @@ -30,6 +30,7 @@ class TestMinimizerMapper : public MinimizerMapper { using MinimizerMapper::faster_cap; using MinimizerMapper::with_dagified_local_graph; using MinimizerMapper::align_sequence_between; + using MinimizerMapper::fix_dozeu_end_deletions; }; TEST_CASE("Fragment length distribution gets reasonable value", "[giraffe][mapping]") { @@ -386,6 +387,59 @@ TEST_CASE("MinimizerMapper can extract a strand-split dagified local graph witho } +TEST_CASE("MinimizerMapper can fix up alignments with deletions on the ends", "[giraffe][mapping]") { + + gbwtgraph::GBWTGraph gbwt_graph; + gbwt::GBWT gbwt; + gbwt_graph.set_gbwt(gbwt); + gbwtgraph::DefaultMinimizerIndex minimizer_index; + SnarlDistanceIndex distance_index; + PathPositionHandleGraph* handle_graph; + TestMinimizerMapper test_mapper (gbwt_graph, minimizer_index, &distance_index, handle_graph); + + Alignment aln; + auto m1 = aln.mutable_path()->add_mapping(); + auto p1 = m1->mutable_position(); + p1->set_node_id(1); + p1->set_offset(3); + auto e1 = m1->add_edit(); + e1->set_from_length(2); + e1->set_to_length(0); + + auto m2 = aln.mutable_path()->add_mapping(); + auto p2 = m2->mutable_position(); + p2->set_node_id(2); + p2->set_offset(0); + auto e2 = m2->add_edit(); + e2->set_from_length(2); + e2->set_to_length(0); + auto e3 = m2->add_edit(); + e3->set_from_length(1); + e3->set_to_length(1); + auto e4 = m2->add_edit(); + e4->set_from_length(1); + e4->set_to_length(0); + + auto m3 = aln.mutable_path()->add_mapping(); + auto p3 = m3->mutable_position(); + p3->set_node_id(3); + p3->set_offset(0); + auto e5 = m3->add_edit(); + e5->set_from_length(1); + e5->set_to_length(0); + + test_mapper.fix_dozeu_end_deletions(aln); + + REQUIRE(aln.path().mapping_size() == 1); + REQUIRE(aln.path().mapping(0).position().node_id() == 2); + REQUIRE(aln.path().mapping(0).position().offset() == 2); + REQUIRE(aln.path().mapping(0).position().is_reverse() == false); + REQUIRE(aln.path().mapping(0).edit_size() == 1); + REQUIRE(aln.path().mapping(0).edit(0).from_length() == 1); + REQUIRE(aln.path().mapping(0).edit(0).to_length() == 1); + REQUIRE(aln.path().mapping(0).edit(0).sequence() == ""); +} + }