From a91e8f53ed404290dbdd06ac7ef4996b8aa90de8 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 4 Oct 2023 08:00:36 -0700 Subject: [PATCH] Add incorrect eligible read marking --- scripts/plot-pr.R | 7 ++++++- scripts/plot-qq.R | 7 ++++++- scripts/plot-roc-log.R | 9 +++++++-- scripts/plot-roc.R | 9 +++++++-- src/subcommand/gamcompare_main.cpp | 15 +++++++++++---- 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/scripts/plot-pr.R b/scripts/plot-pr.R index 0fbda529781..dfa565e2d2e 100755 --- a/scripts/plot-pr.R +++ b/scripts/plot-pr.R @@ -10,9 +10,14 @@ require("tidyverse") require("ggrepel") # Read in the combined toil-vg stats.tsv, listing: -# correct, mapq, aligner (really graph name), read name, count +# correct, mapq, aligner (really graph name), read name, count, eligible dat <- read.table(commandArgs(TRUE)[1], header=T) +if (("eligible" %in% names(dat))) { + # If the eligible column is present, remove ineligible reads + dat <- dat[dat$eligible == 1, ] +} + if (! ("count" %in% names(dat))) { # If the count column is not present, add i dat$count <- rep(1, nrow(dat)) diff --git a/scripts/plot-qq.R b/scripts/plot-qq.R index 0f5a8b7074a..250fa15bcee 100755 --- a/scripts/plot-qq.R +++ b/scripts/plot-qq.R @@ -9,9 +9,14 @@ require("tidyverse") require("ggrepel") # Read in the combined toil-vg stats.tsv, listing: -# correct, mapq, aligner (really graph name), read name, count +# correct, mapq, aligner (really graph name), read name, count, eligible dat <- read.table(commandArgs(TRUE)[1], header=T) +if (("eligible" %in% names(dat))) { + # If the eligible column is present, remove ineligible reads + dat <- dat[dat$eligible == 1, ] +} + if (! ("count" %in% names(dat))) { # If the count column is not present, add i dat$count <- rep(1, nrow(dat)) diff --git a/scripts/plot-roc-log.R b/scripts/plot-roc-log.R index 54c3a653436..87321054a9f 100755 --- a/scripts/plot-roc-log.R +++ b/scripts/plot-roc-log.R @@ -20,8 +20,13 @@ require("tidyverse") require("ggrepel") # Read in the combined toil-vg stats.tsv, listing: -# correct, mapq, aligner (really graph name), read name, count -dat <- read.table(commandArgs(TRUE)[1], header=T) +# correct, mapq, aligner (really graph name), read name, count, eligible +dat <- read.table(commandArgs(TRUE)[1], header=T, colClasses=c("aligner"="factor")) + +if (("eligible" %in% names(dat))) { + # If the eligible column is present, remove ineligible reads + dat <- dat[dat$eligible == 1, ] +} if (! ("count" %in% names(dat))) { # If the count column is not present, add i diff --git a/scripts/plot-roc.R b/scripts/plot-roc.R index 3353f0c4d6b..657b4a9782e 100755 --- a/scripts/plot-roc.R +++ b/scripts/plot-roc.R @@ -21,8 +21,13 @@ require("ggrepel") require("scales") # For squish # Read in the combined toil-vg stats.tsv, listing: -# correct, mapq, aligner (really graph name), read name, count -dat <- read.table(commandArgs(TRUE)[1], header=T) +# correct, mapq, aligner (really graph name), read name, count, eligible +dat <- read.table(commandArgs(TRUE)[1], header=T, colClasses=c("aligner"="factor")) + +if (("eligible" %in% names(dat))) { + # If the eligible column is present, remove ineligible reads + dat <- dat[dat$eligible == 1, ] +} if (! ("count" %in% names(dat))) { # If the count column is not present, add i diff --git a/src/subcommand/gamcompare_main.cpp b/src/subcommand/gamcompare_main.cpp index 7cd27270513..638f5f57b85 100644 --- a/src/subcommand/gamcompare_main.cpp +++ b/src/subcommand/gamcompare_main.cpp @@ -257,7 +257,7 @@ int main_gamcompare(int argc, char** argv) { // Output TSV to standard out in the format plot-qq.R needs. if (!header_printed) { // It needs a header - cout << "correct\tmq\taligner\tread" << endl; + cout << "correct\tmq\taligner\tread\teligible" << endl; header_printed = true; } @@ -266,7 +266,8 @@ int main_gamcompare(int argc, char** argv) { cout << (aln.correctly_mapped() ? "1" : "0") << "\t"; cout << aln.mapping_quality() << "\t"; cout << aligner_name << "\t"; - cout << aln.name() << endl; + cout << aln.name() << "\t"; + cout << (aln.to_correct().name().empty() ? "0" : "1") << endl; } text_buffer.clear(); }; @@ -362,11 +363,17 @@ int main_gamcompare(int argc, char** argv) { #pragma omp critical { if (output_tsv) { - text_buffer.emplace_back(std::move(aln)); + if (emitter) { + // Copy the alignment since we need it twice + text_buffer.emplace_back(aln); + } else { + text_buffer.emplace_back(std::move(aln)); + } if (text_buffer.size() > 1000) { flush_text_buffer(); } - } else { + } + if (emitter) { emitter->write(std::move(aln)); } }