diff --git a/src/subcommand/gbwt_main.cpp b/src/subcommand/gbwt_main.cpp index 4eca4e47a8..e7d2609187 100644 --- a/src/subcommand/gbwt_main.cpp +++ b/src/subcommand/gbwt_main.cpp @@ -158,7 +158,7 @@ void validate_gbwt_config(GBWTConfig& config); void step_1_build_gbwts(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config); void step_2_merge_gbwts(GBWTHandler& gbwts, GBWTConfig& config); -void step_3_alter_gbwt(GBWTHandler& gbwts, GBWTConfig& config); +void step_3_alter_gbwt(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config); void step_4_path_cover(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config); void step_5_gbwtgraph(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config); void step_6_r_index(GBWTHandler& gbwts, GBWTConfig& config); @@ -195,7 +195,7 @@ int main_gbwt(int argc, char** argv) { // Edit the GBWT (remove samples, apply tags). if (!config.to_remove.empty() || !config.tags_to_set.empty()) { - step_3_alter_gbwt(gbwts, config); + step_3_alter_gbwt(gbwts, graphs, config); } // Path cover construction. @@ -1501,28 +1501,38 @@ void remove_samples(GBWTHandler& gbwts, GBWTConfig& config) { report_time_memory("Samples removed", start, config); } -void set_tags(GBWTHandler& gbwts, GBWTConfig& config) { +void set_tags(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config) { double start = gbwt::readTimer(); if (config.show_progress) { std::cerr << "Setting " << config.tags_to_set.size() << " tags on the GBWT" << std::endl; } gbwts.use_compressed(); + bool set_reference_samples = false; for (auto& kv : config.tags_to_set) { - gbwts.compressed.tags.set(kv.first, kv.second); + gbwts.compressed.tags.set(kv.first, kv.second); + if (kv.first == gbwtgraph::REFERENCE_SAMPLE_LIST_GBWT_TAG) { + set_reference_samples = true; + } } // We modified the GBWT (we assume some tags got set) gbwts.unbacked(); - + + // If we updated reference samples and have an existing GBWTGraph, we need to recache the named paths. + // This is because we may want to include the reference paths in a path cover GBWT. + if (set_reference_samples && (graphs.in_use == GraphHandler::graph_gbwtgraph || graphs.in_use == GraphHandler::graph_gbz)) { + graphs.gbwt_graph->set_gbwt(gbwts.compressed); + } + report_time_memory("Tags set", start, config); } -void step_3_alter_gbwt(GBWTHandler& gbwts, GBWTConfig& config) { +void step_3_alter_gbwt(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config) { if (!config.to_remove.empty()) { remove_samples(gbwts, config); } if (!config.tags_to_set.empty()) { - set_tags(gbwts, config); + set_tags(gbwts, graphs, config); } } diff --git a/test/t/37_vg_gbwt.t b/test/t/37_vg_gbwt.t index a5849bc47a..4260f5f596 100644 --- a/test/t/37_vg_gbwt.t +++ b/test/t/37_vg_gbwt.t @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap PATH=../bin:$PATH # for vg -plan tests 155 +plan tests 161 # Build vg graphs for two chromosomes @@ -330,7 +330,16 @@ is $(vg gbwt -H large.local.gbwt) 18 "local haplotypes w/ reference paths: 18 ha is $(vg gbwt -S large.local.gbwt) 18 "local haplotypes w/ reference paths: 18 samples" is $(vg gbwt --tags large.local.gbwt | grep -c reference_samples) 1 "local haplotypes w/ reference paths: reference_samples set" -rm -f large.gbz large.local.gbwt +# Now the same but with a specified reference sample +vg gbwt -Z large.gbz -l -n 16 --pass-paths --set-reference GRCh38 -o large.local.GCRh38.gbwt +is $? 0 "Local haplotypes with a specified reference sample" +is $(vg gbwt -c large.local.GCRh38.gbwt) 34 "local haplotypes w/ reference paths: 34 paths" +is $(vg gbwt -C large.local.GCRh38.gbwt) 2 "local haplotypes w/ reference paths: 2 contigs" +is $(vg gbwt -H large.local.GCRh38.gbwt) 17 "local haplotypes w/ reference paths: 17 haplotypes" +is $(vg gbwt -S large.local.GCRh38.gbwt) 17 "local haplotypes w/ reference paths: 17 samples" +is $(vg gbwt --tags large.local.GCRh38.gbwt | grep -c reference_samples) 1 "local haplotypes w/ reference paths: reference_samples set" + +rm -f large.gbz large.local.gbwt large.local.GCRh38.gbwt # Build GBWTGraph from an augmented GBWT