Skip to content

Commit

Permalink
Update the in-memory graph when using vg gbwt --set-reference
Browse files Browse the repository at this point in the history
  • Loading branch information
jltsiren committed Feb 26, 2025
1 parent aa22551 commit 8145be9
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
24 changes: 17 additions & 7 deletions src/subcommand/gbwt_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ void validate_gbwt_config(GBWTConfig& config);

void step_1_build_gbwts(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config);
void step_2_merge_gbwts(GBWTHandler& gbwts, GBWTConfig& config);
void step_3_alter_gbwt(GBWTHandler& gbwts, GBWTConfig& config);
void step_3_alter_gbwt(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config);
void step_4_path_cover(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config);
void step_5_gbwtgraph(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config);
void step_6_r_index(GBWTHandler& gbwts, GBWTConfig& config);
Expand Down Expand Up @@ -195,7 +195,7 @@ int main_gbwt(int argc, char** argv) {

// Edit the GBWT (remove samples, apply tags).
if (!config.to_remove.empty() || !config.tags_to_set.empty()) {
step_3_alter_gbwt(gbwts, config);
step_3_alter_gbwt(gbwts, graphs, config);
}

// Path cover construction.
Expand Down Expand Up @@ -1501,28 +1501,38 @@ void remove_samples(GBWTHandler& gbwts, GBWTConfig& config) {
report_time_memory("Samples removed", start, config);
}

void set_tags(GBWTHandler& gbwts, GBWTConfig& config) {
void set_tags(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config) {
double start = gbwt::readTimer();
if (config.show_progress) {
std::cerr << "Setting " << config.tags_to_set.size() << " tags on the GBWT" << std::endl;
}

gbwts.use_compressed();
bool set_reference_samples = false;
for (auto& kv : config.tags_to_set) {
gbwts.compressed.tags.set(kv.first, kv.second);
gbwts.compressed.tags.set(kv.first, kv.second);
if (kv.first == gbwtgraph::REFERENCE_SAMPLE_LIST_GBWT_TAG) {
set_reference_samples = true;
}
}
// We modified the GBWT (we assume some tags got set)
gbwts.unbacked();


// If we updated reference samples and have an existing GBWTGraph, we need to recache the named paths.
// This is because we may want to include the reference paths in a path cover GBWT.
if (set_reference_samples && (graphs.in_use == GraphHandler::graph_gbwtgraph || graphs.in_use == GraphHandler::graph_gbz)) {
graphs.gbwt_graph->set_gbwt(gbwts.compressed);
}

report_time_memory("Tags set", start, config);
}

void step_3_alter_gbwt(GBWTHandler& gbwts, GBWTConfig& config) {
void step_3_alter_gbwt(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& config) {
if (!config.to_remove.empty()) {
remove_samples(gbwts, config);
}
if (!config.tags_to_set.empty()) {
set_tags(gbwts, config);
set_tags(gbwts, graphs, config);
}
}

Expand Down
13 changes: 11 additions & 2 deletions test/t/37_vg_gbwt.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap

PATH=../bin:$PATH # for vg

plan tests 155
plan tests 161


# Build vg graphs for two chromosomes
Expand Down Expand Up @@ -330,7 +330,16 @@ is $(vg gbwt -H large.local.gbwt) 18 "local haplotypes w/ reference paths: 18 ha
is $(vg gbwt -S large.local.gbwt) 18 "local haplotypes w/ reference paths: 18 samples"
is $(vg gbwt --tags large.local.gbwt | grep -c reference_samples) 1 "local haplotypes w/ reference paths: reference_samples set"

rm -f large.gbz large.local.gbwt
# Now the same but with a specified reference sample
vg gbwt -Z large.gbz -l -n 16 --pass-paths --set-reference GRCh38 -o large.local.GCRh38.gbwt
is $? 0 "Local haplotypes with a specified reference sample"
is $(vg gbwt -c large.local.GCRh38.gbwt) 34 "local haplotypes w/ reference paths: 34 paths"
is $(vg gbwt -C large.local.GCRh38.gbwt) 2 "local haplotypes w/ reference paths: 2 contigs"
is $(vg gbwt -H large.local.GCRh38.gbwt) 17 "local haplotypes w/ reference paths: 17 haplotypes"
is $(vg gbwt -S large.local.GCRh38.gbwt) 17 "local haplotypes w/ reference paths: 17 samples"
is $(vg gbwt --tags large.local.GCRh38.gbwt | grep -c reference_samples) 1 "local haplotypes w/ reference paths: reference_samples set"

rm -f large.gbz large.local.gbwt large.local.GCRh38.gbwt


# Build GBWTGraph from an augmented GBWT
Expand Down

1 comment on commit 8145be9

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch sample-fragmented-haplotypes. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17436 seconds

Please sign in to comment.