diff --git a/src/dozeu_interface.cpp b/src/dozeu_interface.cpp index 77eda8caa6..8ae3e41ffb 100644 --- a/src/dozeu_interface.cpp +++ b/src/dozeu_interface.cpp @@ -418,7 +418,9 @@ void DozeuInterface::calculate_and_save_alignment(Alignment &alignment, const Or // aln->query_length can be shorter than alignment.sequence().size() if we // didn't traceback from the very last base of the query, or if we didn't // pack the whole query because of an offset. - + + // Add a mapping to node at index _id. + // Also consults (and clears) ref_offset #define _push_mapping(_id) ({ \ handle_t n = graph.order[(_id)]; \ Mapping *mapping = path->add_mapping(); \ diff --git a/src/dozeu_interface.hpp b/src/dozeu_interface.hpp index def39d19fb..11203ed802 100644 --- a/src/dozeu_interface.hpp +++ b/src/dozeu_interface.hpp @@ -226,6 +226,9 @@ class DozeuInterface { * can emit it as is. If it is false, the nodes were filled right to * left, and the internal traceback comes out in right to left order, * so we need to flip it. + * + * All Mappings in the Alignment other than the first will have a zero + * offset in their Positions. */ void calculate_and_save_alignment(Alignment& alignment, const OrderedGraph& graph, const vector& head_positions, diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp index 24932b3652..bb36b885db 100644 --- a/src/minimizer_mapper_from_chains.cpp +++ b/src/minimizer_mapper_from_chains.cpp @@ -3777,13 +3777,21 @@ std::pair MinimizerMapper::align_sequence_between(const pos_t& l #endif // Then trim off the tips that are either in the wrong orientation relative - // to whether we want them to be a source or a sink, or extraneous + // to whether we want them to be a source or a sink, or extraneous. + // Also find the unique tip handles for the potentially cut-down anchor nodes. + // TODO: We re-find them on every trim. std::vector tip_handles = handlegraph::algorithms::find_tips(&dagified_graph); bool trimmed; size_t trim_count = 0; + bool found_left_anchor_tip; + handle_t left_anchor_tip; + bool found_right_anchor_tip; + handle_t right_anchor_tip; do { trimmed = false; + found_left_anchor_tip = false; + found_right_anchor_tip = false; // We need to make sure to remove only one orientation of each handle // we remove. std::unordered_set to_remove_ids; @@ -3797,6 +3805,18 @@ std::pair MinimizerMapper::align_sequence_between(const pos_t& l #ifdef debug std::cerr << "Dagified graph node " << dagified_graph.get_id(h) << " " << dagified_graph.get_is_reverse(h) << " is an acceptable source (" << base_coords.first << " " << base_coords.second << ")" << std::endl; #endif + if (!is_empty(left_anchor)) { + // There should only be one source tip for the left anchor node: the one we actually grabbed the graph from. + // Other nodes for this node shouldn't be this kind of tip. + // Catch any weird bugs in the subgraph extraction + if (found_left_anchor_tip) { + std::stringstream ss; + ss << "Duplicate left anchor tip when extracting " << left_anchor << " to " << right_anchor; + throw std::runtime_error(ss.str()); + } + found_left_anchor_tip = true; + left_anchor_tip = h; + } } else if (dagified_graph.get_is_reverse(h) && (is_empty(right_anchor) || base_coords.first == id(right_anchor))) { // Tip is inward reverse, so it's a sink. // This is a tail in the subgraph, and either matches a right @@ -3804,6 +3824,18 @@ std::pair MinimizerMapper::align_sequence_between(const pos_t& l #ifdef debug std::cerr << "Dagified graph node " << dagified_graph.get_id(h) << " " << dagified_graph.get_is_reverse(h) << " is an acceptable sink (" << base_coords.first << " " << base_coords.second << ")" << std::endl; #endif + if (!is_empty(right_anchor)) { + // There should only be one sink tip for the right anchor node: the one we actually grabbed the graph from. + // Other nodes for this node shouldn't be this kind of tip. + // Catch any weird bugs in the subgraph extraction + if (found_right_anchor_tip) { + std::stringstream ss; + ss << "Duplicate right anchor tip when extracting " << left_anchor << " to " << right_anchor; + throw std::runtime_error(ss.str()); + } + found_right_anchor_tip = true; + right_anchor_tip = h; + } } else { // This is a wrong orientation of an anchoring node, or some other tip. // We don't want to keep this handle @@ -3910,36 +3942,59 @@ std::pair MinimizerMapper::align_sequence_between(const pos_t& l } } - // And translate back into original graph space + // And translate back into original graph ID and orientation space. for (size_t i = 0; i < alignment.path().mapping_size(); i++) { // Translate each mapping's ID and orientation down to the base graph Mapping* m = alignment.mutable_path()->mutable_mapping(i); handle_t dagified_handle = dagified_graph.get_handle(m->position().node_id(), m->position().is_reverse()); auto base_coords = dagified_handle_to_base(dagified_handle); + + if (i == 0) { + // Make sure that, if the leftmost mapping is on an anchor + // node, it's on a copy with the correct length. + // + // Otherwise the changes we make to the offset of the leftmost + // mapping when it's on an anchor node won't make sense, because we + // won't know we were really on a cut copy. + // TODO: This should always be true. + if (!is_empty(left_anchor) && base_coords.first == id(left_anchor)) { + crash_unless(dagified_graph.get_length(dagified_handle) == dagified_graph.get_length(left_anchor_tip)); + } + if (!is_empty(right_anchor) && base_coords.first == id(right_anchor)) { + crash_unless(dagified_graph.get_length(dagified_handle) == dagified_graph.get_length(right_anchor_tip)); + } + } m->mutable_position()->set_node_id(base_coords.first); m->mutable_position()->set_is_reverse(base_coords.second); } - if (!is_empty(left_anchor) && alignment.path().mapping_size() > 0) { - // Get the positions of the leftmost mapping - Position* left_pos = alignment.mutable_path()->mutable_mapping(0)->mutable_position(); - - if (offset(left_anchor) != 0 && offset(left_anchor) < graph->get_length(graph->get_handle(id(left_anchor)))) { - // There is some of the left anchor's node actually in the - // extracted graph. The left anchor isn't past the end of its node. - // The alignment must actually start on the anchor node. - assert(left_pos->node_id() == id(left_anchor)); - } + if (alignment.path().mapping_size() > 0) { + // If we anchored on the left, the leftmost mapping will be on a + // cut anchor node and its offset won't reflect the offset on the + // backing graph node. + // + // If we anchored on the right only, it's *still possible* for the + // alignment to turn around so the leftmost mapping is on the same + // cut anchor node we anchored the right end on, facing the other + // direction and offsetting from the cut point. + // + // Adjust the offset on the leftmost mapping so it counts from the + // edge of the entire node and not the cut-down part used for + // anchoring, if it's on one of the cut-down nodes. + Position* left_pos = alignment.mutable_path()->mutable_mapping(0)->mutable_position(); - if (left_pos->node_id() == id(left_anchor)) { + if (!is_empty(left_anchor) && left_pos->node_id() == id(left_anchor)) { // If the alignment does start on the anchor node (even at 0 or at the past-end position) - // Add on the offset for the cut-off piece of the left anchor node left_pos->set_offset(left_pos->offset() + offset(left_anchor)); + } else if (!is_empty(right_anchor) && left_pos->node_id() == id(right_anchor)) { + // If the alignment starts on the right anchor, make sure to add the part that was cut off to the offset. + left_pos->set_offset(left_pos->offset() + graph->get_length(graph->get_handle(id(right_anchor))) - offset(right_anchor)); } } + if (alignment.path().mapping_size() > 0) { // Make sure we don't have an empty mapping on the end auto* last_mapping = alignment.mutable_path()->mutable_mapping(alignment.path().mapping_size() - 1); @@ -3992,7 +4047,6 @@ std::pair MinimizerMapper::align_sequence_between_consistently(c // Now we know a swap is required. - // The anchors face left to right so we need to flip their orientations in addition to swapping them. // align_sequence_between uses between-base positions for anchoring pos_t flipped_left_anchor = is_empty(right_anchor) ? empty_pos_t() : reverse(right_anchor, get_node_length(id(right_anchor))); @@ -4004,6 +4058,14 @@ std::pair MinimizerMapper::align_sequence_between_consistently(c // Flip and send the answer reverse_complement_alignment_in_place(&flipped_query, get_node_length); alignment = std::move(flipped_query); + + // We shouldn't use any nonzero offsets after the first node. We're not + // meant to be making a split alignment. + for (size_t i = 0; i < alignment.path().mapping_size(); i++) { + if (i > 0) { + crash_unless(alignment.path().mapping(i).position().offset() == 0); + } + } // Return the metadata we track return result; diff --git a/src/unittest/minimizer_mapper.cpp b/src/unittest/minimizer_mapper.cpp index d6309380f4..e517653b1f 100644 --- a/src/unittest/minimizer_mapper.cpp +++ b/src/unittest/minimizer_mapper.cpp @@ -347,6 +347,87 @@ TEST_CASE("MinimizerMapper can map against subgraphs between abutting points", " } } + +TEST_CASE("MinimizerMapper can map against subgraphs within a node", "[giraffe][mapping]") { + + Aligner aligner; + HashGraph graph; + + // We have a big node + auto h1 = graph.create_handle("AAAAGATTG"); + + Alignment aln; + aln.set_sequence("AGAT"); + + // Left anchor should be on start + pos_t left_anchor {graph.get_id(h1), false, 3}; + // Right anchor should be past end + pos_t right_anchor {graph.get_id(h1), false, 7}; + + TestMinimizerMapper::align_sequence_between(left_anchor, right_anchor, 100, 20, &graph, &aligner, aln); + + // Make sure we get the right alignment + REQUIRE(aln.path().mapping_size() == 1); + REQUIRE(aln.path().mapping(0).position().node_id() == graph.get_id(h1)); + REQUIRE(aln.path().mapping(0).position().is_reverse() == graph.get_is_reverse(h1)); + REQUIRE(aln.path().mapping(0).position().offset() == offset(left_anchor)); + REQUIRE(aln.path().mapping(0).edit_size() == 1); + REQUIRE(aln.path().mapping(0).edit(0).from_length() == 4); + REQUIRE(aln.path().mapping(0).edit(0).to_length() == 4); + REQUIRE(aln.path().mapping(0).edit(0).sequence() == ""); + +} + +TEST_CASE("MinimizerMapper can find mappings where the alignment turns around from the right", "[giraffe][mapping]") { + + Aligner aligner; + HashGraph graph; + + auto h1 = graph.create_handle("AAAAGAT"); + auto h_in = graph.create_handle("G"); + graph.create_edge(h_in, h1); + graph.create_edge(graph.flip(h_in), h1); + + Alignment aln; + aln.set_sequence("TTTTGAAAAGA"); + + // Left anchor is empty + pos_t left_anchor; + // Right anchor is at past-end + pos_t right_anchor {graph.get_id(h1), false, 6}; + + TestMinimizerMapper::align_sequence_between(left_anchor, right_anchor, 100, 20, &graph, &aligner, aln); + + // Make sure we get the right alignment + REQUIRE(aln.path().mapping_size() == 3); + REQUIRE(aln.path().mapping(0).position().node_id() == graph.get_id(h1)); + REQUIRE(aln.path().mapping(0).position().is_reverse() == !graph.get_is_reverse(h1)); + REQUIRE(aln.path().mapping(0).position().offset() == 3); + REQUIRE(aln.path().mapping(0).edit_size() == 1); + REQUIRE(aln.path().mapping(0).edit(0).from_length() == 4); + REQUIRE(aln.path().mapping(0).edit(0).to_length() == 4); + REQUIRE(aln.path().mapping(0).edit(0).sequence() == ""); + REQUIRE(aln.path().mapping(1).position().node_id() == graph.get_id(h_in)); + REQUIRE(aln.path().mapping(1).position().is_reverse() == graph.get_is_reverse(h_in)); + REQUIRE(aln.path().mapping(1).position().offset() == 0); + REQUIRE(aln.path().mapping(1).edit_size() == 1); + REQUIRE(aln.path().mapping(1).edit(0).from_length() == 1); + REQUIRE(aln.path().mapping(1).edit(0).to_length() == 1); + REQUIRE(aln.path().mapping(1).edit(0).sequence() == ""); + REQUIRE(aln.path().mapping(2).position().node_id() == graph.get_id(h1)); + REQUIRE(aln.path().mapping(2).position().is_reverse() == graph.get_is_reverse(h1)); + REQUIRE(aln.path().mapping(2).position().offset() == 0); + REQUIRE(aln.path().mapping(2).edit_size() == 1); + REQUIRE(aln.path().mapping(2).edit(0).from_length() == 6); + REQUIRE(aln.path().mapping(2).edit(0).to_length() == 6); + REQUIRE(aln.path().mapping(2).edit(0).sequence() == ""); + + // TODO: We can't find the same alignment from the left because we'd need + // to see more of the anchor node than we anchored on. + + // TODO: We can't handle a hairpin directly inside the anchor node. +} + TEST_CASE("MinimizerMapper can map an empty string between odd points", "[giraffe][mapping]") { Aligner aligner;