Skip to content

Commit

Permalink
fix more cycle-related assertion fails
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed Jun 4, 2024
1 parent 2f80767 commit 0349d28
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 11 deletions.
20 changes: 15 additions & 5 deletions src/deconstructor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,8 +618,7 @@ unordered_map<string, vector<int>> Deconstructor::add_star_traversals(vector<Tra
vector<string>& names,
vector<vector<int>>& trav_clusters,
vector<pair<double, int64_t>>& trav_cluster_info,
const unordered_map<string, vector<int>>& parent_haplotypes) const {

const unordered_map<string, vector<int>>& parent_haplotypes) const {
// todo: refactor this into general genotyping code
unordered_map<string, vector<int>> sample_to_haps;

Expand Down Expand Up @@ -692,6 +691,8 @@ bool Deconstructor::deconstruct_site(const handle_t& snarl_start, const handle_t

// compute all the traversals from embedded paths and gbwt
this->get_traversals(snarl_start, snarl_end, travs, trav_path_names, trav_steps);
int64_t trav_count = travs.size();
int64_t trav_step_count = trav_steps.size();

if (travs.empty()) {
return false;
Expand Down Expand Up @@ -745,7 +746,6 @@ bool Deconstructor::deconstruct_site(const handle_t& snarl_start, const handle_t

// in case of cycles, we need our allele traversals to be associated to the correct reference position
// this is done with the path jaccard metric over all overlapping reference paths the given path_jaccard_window size

vector<int> ref_travs;
// hacky subpath support -- gets added to variant on output
vector<int64_t> ref_offsets;
Expand Down Expand Up @@ -831,6 +831,11 @@ bool Deconstructor::deconstruct_site(const handle_t& snarl_start, const handle_t
// we write a variant for every reference traversal
// (optionally) selecting the subset of path traversals that are 1:1
for (size_t i = 0; i < ref_travs.size(); ++i) {
// we zap these to their original size, as the nesting logic can add
// dummy traversals and these are reference-specific (and so need to be cleaned each iteration here)
travs.resize(trav_count);
trav_path_names.resize(trav_count);
trav_steps.resize(trav_step_count);
auto& ref_trav_idx = ref_travs[i];
auto& ref_trav_offset = ref_offsets[i];

Expand Down Expand Up @@ -907,8 +912,13 @@ bool Deconstructor::deconstruct_site(const handle_t& snarl_start, const handle_t
}
}

if (std::none_of(use_trav.begin(), use_trav.end(), [](bool b) {return b;})) {
// no alts were jaccard-assigned to this reference, so abort before an assertion gets tripped
continue;
}

// Sort the traversals for clustering
vector<int> sorted_travs = get_traversal_order(graph, travs, trav_path_names, ref_travs, use_trav);
vector<int> sorted_travs = get_traversal_order(graph, travs, trav_path_names, ref_travs, ref_trav_idx, use_trav);

// jaccard clustering (using handles for now) on traversals
vector<pair<double, int64_t>> trav_cluster_info;
Expand Down Expand Up @@ -936,7 +946,7 @@ bool Deconstructor::deconstruct_site(const handle_t& snarl_start, const handle_t
#endif

unordered_map<string, vector<int>> sample_to_haps;
if (i == 0 && in_nesting_info != nullptr) {
if (in_nesting_info != nullptr) {
// if the reference traversal is also an alt traversal, we pop out an extra copy
// todo: this is a hack add add off-reference support while keeping the current
// logic where the reference traversal is always distinct from the alts. this step
Expand Down
14 changes: 8 additions & 6 deletions src/traversal_clusters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ vector<int> get_traversal_order(const PathHandleGraph* graph,
const vector<Traversal>& traversals,
const vector<string>& trav_path_names,
const vector<int>& ref_travs,
int64_t ref_trav_idx,
const vector<bool>& use_traversal) {
set<int> ref_set(ref_travs.begin(), ref_travs.end());

Expand All @@ -54,7 +55,7 @@ vector<int> get_traversal_order(const PathHandleGraph* graph,
vector<int> sorted_travs;
sorted_travs.reserve(traversals.size());
for (int64_t i = 0; i < traversals.size(); ++i) {
if (use_traversal[i]) {
if (use_traversal[i] && (i == ref_trav_idx || !ref_set.count(i))) {
sorted_travs.push_back(i);
}
}
Expand Down Expand Up @@ -107,9 +108,10 @@ vector<vector<int>> cluster_traversals(const PathHandleGraph* graph,
// need the clusters as sorted lists. we'll forget the endpoints while we're at
// it since they're always shared. note we work with multisets since we want to
// count differences between, say, cycle copy numbers.
vector<multiset<handle_t>> sorted_traversals;
for (const Traversal& trav : traversals) {
assert(trav.size() >=2 );
vector<multiset<handle_t>> sorted_traversals(traversals.size());
for (const int& i : traversal_order) {
const auto& trav = traversals[i];
assert(trav.size() >=2);
// prune snarl nodes as they're always the same
// todo: does jaccard properly handle empty sets?
int64_t first = trav.size() == 2 ? 0 : 1;
Expand All @@ -118,7 +120,7 @@ vector<vector<int>> cluster_traversals(const PathHandleGraph* graph,
for (int64_t i = first; i < last; ++i) {
sorted_trav.insert(trav[i]);
}
sorted_traversals.push_back(sorted_trav);
sorted_traversals[i] = sorted_trav;
}

for (const int& i : traversal_order) {
Expand Down Expand Up @@ -177,7 +179,7 @@ vector<vector<int>> cluster_traversals(const PathHandleGraph* graph,
}
}
}
assert(uncovered_child_snarls.empty());
assert(uncovered_child_snarls.empty() || traversal_order.size() < traversals.size());

// fill in the size deltas
for (vector<int>& cluster : clusters) {
Expand Down
1 change: 1 addition & 0 deletions src/traversal_clusters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ vector<int> get_traversal_order(const PathHandleGraph* graph,
const vector<Traversal>& traversals,
const vector<string>& trav_path_names,
const vector<int>& ref_travs,
int64_t ref_trav_idx,
const vector<bool>& use_traversal);


Expand Down

1 comment on commit 0349d28

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch deconstruct. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17827 seconds

Please sign in to comment.