Skip to content

Commit

Permalink
Merge pull request #4534 from vgteam/fix-dozeu-split-alignment
Browse files Browse the repository at this point in the history
Fix incorrect alignments in Giraffe when a tail doubles back onto the anchor node
  • Loading branch information
adamnovak authored Mar 4, 2025
2 parents fd05220 + 6198e16 commit e897604
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 16 deletions.
4 changes: 3 additions & 1 deletion src/dozeu_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,9 @@ void DozeuInterface::calculate_and_save_alignment(Alignment &alignment, const Or
// aln->query_length can be shorter than alignment.sequence().size() if we
// didn't traceback from the very last base of the query, or if we didn't
// pack the whole query because of an offset.


// Add a mapping to node at index _id.
// Also consults (and clears) ref_offset
#define _push_mapping(_id) ({ \
handle_t n = graph.order[(_id)]; \
Mapping *mapping = path->add_mapping(); \
Expand Down
3 changes: 3 additions & 0 deletions src/dozeu_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,9 @@ class DozeuInterface {
* can emit it as is. If it is false, the nodes were filled right to
* left, and the internal traceback comes out in right to left order,
* so we need to flip it.
*
* All Mappings in the Alignment other than the first will have a zero
* offset in their Positions.
*/
void calculate_and_save_alignment(Alignment& alignment, const OrderedGraph& graph,
const vector<graph_pos_s>& head_positions,
Expand Down
92 changes: 77 additions & 15 deletions src/minimizer_mapper_from_chains.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3777,13 +3777,21 @@ std::pair<size_t, size_t> MinimizerMapper::align_sequence_between(const pos_t& l
#endif

// Then trim off the tips that are either in the wrong orientation relative
// to whether we want them to be a source or a sink, or extraneous
// to whether we want them to be a source or a sink, or extraneous.
// Also find the unique tip handles for the potentially cut-down anchor nodes.
// TODO: We re-find them on every trim.

std::vector<handle_t> tip_handles = handlegraph::algorithms::find_tips(&dagified_graph);
bool trimmed;
size_t trim_count = 0;
bool found_left_anchor_tip;
handle_t left_anchor_tip;
bool found_right_anchor_tip;
handle_t right_anchor_tip;
do {
trimmed = false;
found_left_anchor_tip = false;
found_right_anchor_tip = false;
// We need to make sure to remove only one orientation of each handle
// we remove.
std::unordered_set<nid_t> to_remove_ids;
Expand All @@ -3797,13 +3805,37 @@ std::pair<size_t, size_t> MinimizerMapper::align_sequence_between(const pos_t& l
#ifdef debug
std::cerr << "Dagified graph node " << dagified_graph.get_id(h) << " " << dagified_graph.get_is_reverse(h) << " is an acceptable source (" << base_coords.first << " " << base_coords.second << ")" << std::endl;
#endif
if (!is_empty(left_anchor)) {
// There should only be one source tip for the left anchor node: the one we actually grabbed the graph from.
// Other nodes for this node shouldn't be this kind of tip.
// Catch any weird bugs in the subgraph extraction
if (found_left_anchor_tip) {
std::stringstream ss;
ss << "Duplicate left anchor tip when extracting " << left_anchor << " to " << right_anchor;
throw std::runtime_error(ss.str());
}
found_left_anchor_tip = true;
left_anchor_tip = h;
}
} else if (dagified_graph.get_is_reverse(h) && (is_empty(right_anchor) || base_coords.first == id(right_anchor))) {
// Tip is inward reverse, so it's a sink.
// This is a tail in the subgraph, and either matches a right
// anchoring node or we don't have any, so keep it.
#ifdef debug
std::cerr << "Dagified graph node " << dagified_graph.get_id(h) << " " << dagified_graph.get_is_reverse(h) << " is an acceptable sink (" << base_coords.first << " " << base_coords.second << ")" << std::endl;
#endif
if (!is_empty(right_anchor)) {
// There should only be one sink tip for the right anchor node: the one we actually grabbed the graph from.
// Other nodes for this node shouldn't be this kind of tip.
// Catch any weird bugs in the subgraph extraction
if (found_right_anchor_tip) {
std::stringstream ss;
ss << "Duplicate right anchor tip when extracting " << left_anchor << " to " << right_anchor;
throw std::runtime_error(ss.str());
}
found_right_anchor_tip = true;
right_anchor_tip = h;
}
} else {
// This is a wrong orientation of an anchoring node, or some other tip.
// We don't want to keep this handle
Expand Down Expand Up @@ -3910,36 +3942,59 @@ std::pair<size_t, size_t> MinimizerMapper::align_sequence_between(const pos_t& l
}
}

// And translate back into original graph space
// And translate back into original graph ID and orientation space.
for (size_t i = 0; i < alignment.path().mapping_size(); i++) {
// Translate each mapping's ID and orientation down to the base graph
Mapping* m = alignment.mutable_path()->mutable_mapping(i);

handle_t dagified_handle = dagified_graph.get_handle(m->position().node_id(), m->position().is_reverse());
auto base_coords = dagified_handle_to_base(dagified_handle);

if (i == 0) {
// Make sure that, if the leftmost mapping is on an anchor
// node, it's on a copy with the correct length.
//
// Otherwise the changes we make to the offset of the leftmost
// mapping when it's on an anchor node won't make sense, because we
// won't know we were really on a cut copy.
// TODO: This should always be true.
if (!is_empty(left_anchor) && base_coords.first == id(left_anchor)) {
crash_unless(dagified_graph.get_length(dagified_handle) == dagified_graph.get_length(left_anchor_tip));
}
if (!is_empty(right_anchor) && base_coords.first == id(right_anchor)) {
crash_unless(dagified_graph.get_length(dagified_handle) == dagified_graph.get_length(right_anchor_tip));
}
}

m->mutable_position()->set_node_id(base_coords.first);
m->mutable_position()->set_is_reverse(base_coords.second);
}
if (!is_empty(left_anchor) && alignment.path().mapping_size() > 0) {
// Get the positions of the leftmost mapping
Position* left_pos = alignment.mutable_path()->mutable_mapping(0)->mutable_position();

if (offset(left_anchor) != 0 && offset(left_anchor) < graph->get_length(graph->get_handle(id(left_anchor)))) {
// There is some of the left anchor's node actually in the
// extracted graph. The left anchor isn't past the end of its node.

// The alignment must actually start on the anchor node.
assert(left_pos->node_id() == id(left_anchor));
}
if (alignment.path().mapping_size() > 0) {
// If we anchored on the left, the leftmost mapping will be on a
// cut anchor node and its offset won't reflect the offset on the
// backing graph node.
//
// If we anchored on the right only, it's *still possible* for the
// alignment to turn around so the leftmost mapping is on the same
// cut anchor node we anchored the right end on, facing the other
// direction and offsetting from the cut point.
//
// Adjust the offset on the leftmost mapping so it counts from the
// edge of the entire node and not the cut-down part used for
// anchoring, if it's on one of the cut-down nodes.
Position* left_pos = alignment.mutable_path()->mutable_mapping(0)->mutable_position();

if (left_pos->node_id() == id(left_anchor)) {
if (!is_empty(left_anchor) && left_pos->node_id() == id(left_anchor)) {
// If the alignment does start on the anchor node (even at 0 or at the past-end position)

// Add on the offset for the cut-off piece of the left anchor node
left_pos->set_offset(left_pos->offset() + offset(left_anchor));
} else if (!is_empty(right_anchor) && left_pos->node_id() == id(right_anchor)) {
// If the alignment starts on the right anchor, make sure to add the part that was cut off to the offset.
left_pos->set_offset(left_pos->offset() + graph->get_length(graph->get_handle(id(right_anchor))) - offset(right_anchor));
}
}

if (alignment.path().mapping_size() > 0) {
// Make sure we don't have an empty mapping on the end
auto* last_mapping = alignment.mutable_path()->mutable_mapping(alignment.path().mapping_size() - 1);
Expand Down Expand Up @@ -3992,7 +4047,6 @@ std::pair<size_t, size_t> MinimizerMapper::align_sequence_between_consistently(c

// Now we know a swap is required.


// The anchors face left to right so we need to flip their orientations in addition to swapping them.
// align_sequence_between uses between-base positions for anchoring
pos_t flipped_left_anchor = is_empty(right_anchor) ? empty_pos_t() : reverse(right_anchor, get_node_length(id(right_anchor)));
Expand All @@ -4004,6 +4058,14 @@ std::pair<size_t, size_t> MinimizerMapper::align_sequence_between_consistently(c
// Flip and send the answer
reverse_complement_alignment_in_place(&flipped_query, get_node_length);
alignment = std::move(flipped_query);

// We shouldn't use any nonzero offsets after the first node. We're not
// meant to be making a split alignment.
for (size_t i = 0; i < alignment.path().mapping_size(); i++) {
if (i > 0) {
crash_unless(alignment.path().mapping(i).position().offset() == 0);
}
}

// Return the metadata we track
return result;
Expand Down
81 changes: 81 additions & 0 deletions src/unittest/minimizer_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,87 @@ TEST_CASE("MinimizerMapper can map against subgraphs between abutting points", "
}
}


TEST_CASE("MinimizerMapper can map against subgraphs within a node", "[giraffe][mapping]") {

Aligner aligner;
HashGraph graph;

// We have a big node
auto h1 = graph.create_handle("AAAAGATTG");

Alignment aln;
aln.set_sequence("AGAT");

// Left anchor should be on start
pos_t left_anchor {graph.get_id(h1), false, 3};
// Right anchor should be past end
pos_t right_anchor {graph.get_id(h1), false, 7};

TestMinimizerMapper::align_sequence_between(left_anchor, right_anchor, 100, 20, &graph, &aligner, aln);

// Make sure we get the right alignment
REQUIRE(aln.path().mapping_size() == 1);
REQUIRE(aln.path().mapping(0).position().node_id() == graph.get_id(h1));
REQUIRE(aln.path().mapping(0).position().is_reverse() == graph.get_is_reverse(h1));
REQUIRE(aln.path().mapping(0).position().offset() == offset(left_anchor));
REQUIRE(aln.path().mapping(0).edit_size() == 1);
REQUIRE(aln.path().mapping(0).edit(0).from_length() == 4);
REQUIRE(aln.path().mapping(0).edit(0).to_length() == 4);
REQUIRE(aln.path().mapping(0).edit(0).sequence() == "");

}

TEST_CASE("MinimizerMapper can find mappings where the alignment turns around from the right", "[giraffe][mapping]") {

Aligner aligner;
HashGraph graph;

auto h1 = graph.create_handle("AAAAGAT");
auto h_in = graph.create_handle("G");
graph.create_edge(h_in, h1);
graph.create_edge(graph.flip(h_in), h1);

Alignment aln;
aln.set_sequence("TTTTGAAAAGA");

// Left anchor is empty
pos_t left_anchor;
// Right anchor is at past-end
pos_t right_anchor {graph.get_id(h1), false, 6};

TestMinimizerMapper::align_sequence_between(left_anchor, right_anchor, 100, 20, &graph, &aligner, aln);

// Make sure we get the right alignment
REQUIRE(aln.path().mapping_size() == 3);
REQUIRE(aln.path().mapping(0).position().node_id() == graph.get_id(h1));
REQUIRE(aln.path().mapping(0).position().is_reverse() == !graph.get_is_reverse(h1));
REQUIRE(aln.path().mapping(0).position().offset() == 3);
REQUIRE(aln.path().mapping(0).edit_size() == 1);
REQUIRE(aln.path().mapping(0).edit(0).from_length() == 4);
REQUIRE(aln.path().mapping(0).edit(0).to_length() == 4);
REQUIRE(aln.path().mapping(0).edit(0).sequence() == "");
REQUIRE(aln.path().mapping(1).position().node_id() == graph.get_id(h_in));
REQUIRE(aln.path().mapping(1).position().is_reverse() == graph.get_is_reverse(h_in));
REQUIRE(aln.path().mapping(1).position().offset() == 0);
REQUIRE(aln.path().mapping(1).edit_size() == 1);
REQUIRE(aln.path().mapping(1).edit(0).from_length() == 1);
REQUIRE(aln.path().mapping(1).edit(0).to_length() == 1);
REQUIRE(aln.path().mapping(1).edit(0).sequence() == "");
REQUIRE(aln.path().mapping(2).position().node_id() == graph.get_id(h1));
REQUIRE(aln.path().mapping(2).position().is_reverse() == graph.get_is_reverse(h1));
REQUIRE(aln.path().mapping(2).position().offset() == 0);
REQUIRE(aln.path().mapping(2).edit_size() == 1);
REQUIRE(aln.path().mapping(2).edit(0).from_length() == 6);
REQUIRE(aln.path().mapping(2).edit(0).to_length() == 6);
REQUIRE(aln.path().mapping(2).edit(0).sequence() == "");

// TODO: We can't find the same alignment from the left because we'd need
// to see more of the anchor node than we anchored on.

// TODO: We can't handle a hairpin directly inside the anchor node.
}

TEST_CASE("MinimizerMapper can map an empty string between odd points", "[giraffe][mapping]") {

Aligner aligner;
Expand Down

1 comment on commit e897604

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17792 seconds

Please sign in to comment.