From c91e4e0c55aad63999cfdb27e45d08ce800e48e5 Mon Sep 17 00:00:00 2001 From: "Zachary S.L. Foster" Date: Mon, 11 Sep 2023 13:51:22 -0700 Subject: [PATCH] made profiles add execution reports to output --- assets/main_report/01-identification.Rmd | 2 +- assets/main_report/_main.Rmd | 198 ----------------------- conf/test.config | 20 +++ conf/test_full.config | 22 ++- conf/test_medium.config | 21 ++- workflows/pathogensurveillance.nf | 2 + 6 files changed, 64 insertions(+), 201 deletions(-) delete mode 100644 assets/main_report/_main.Rmd diff --git a/assets/main_report/01-identification.Rmd b/assets/main_report/01-identification.Rmd index 00e2cfa9..068162b6 100644 --- a/assets/main_report/01-identification.Rmd +++ b/assets/main_report/01-identification.Rmd @@ -50,7 +50,7 @@ sample_ids <- core_tree$tip.label[core_tree$tip.label %in% samp_meta$modified_id colnames(ani_matrix) <- gsub(colnames(ani_matrix), pattern = "[.-]", replacement = "_") rownames(ani_matrix) <- colnames(ani_matrix) group_ani <- ani_matrix[rownames(ani_matrix) %in% core_tree$tip.label, colnames(ani_matrix) %in% core_tree$tip.label] -core_tree <- root(core_tree, names(which.min(colMeans(group_ani[sample_ids, ])))) +#core_tree <- root(core_tree, names(which.min(colMeans(group_ani[sample_ids, ])))) # Set tip labels to taxon names for reference sequences # TODO: need a more reliable way to get IDs diff --git a/assets/main_report/_main.Rmd b/assets/main_report/_main.Rmd deleted file mode 100644 index 9d4453cd..00000000 --- a/assets/main_report/_main.Rmd +++ /dev/null @@ -1,198 +0,0 @@ ---- -title: "Pathogensurveillance Report" -date: "`r Sys.Date()`" -site: bookdown::bookdown_site -documentclass: book -bibliography: [book.bib, packages.bib] -# url: your book url like https://bookdown.org/yihui/bookdown -# cover-image: path to the social sharing image like images/cover.jpg -description: | - A report generated by the Pathogensurveillance genome analysis pipeline. -link-citations: yes -github-repo: rstudio/bookdown-demo -params: - samp_data: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/test/data/metadata_medium.csv" - ref_data: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/merged_assembly_stats.tsv" - snp_phylos: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/xan_test_22_331_assembly.treefile" - ani_matrix: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/comp.csv" - core_phylo: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/xan_test.treefile" ---- - -# Run info - -This is the first page a user sees. -What should go here? - -## Input settings - -## Software used - - -```{r include=FALSE} -# automatically create a bib database for R packages -knitr::write_bib(c( - .packages(), 'bookdown', 'knitr', 'rmarkdown' -), 'packages.bib') -``` - - - -# Identification - -```{r include=FALSE, eval=FALSE} -work_dir = "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581" -params = list( - samp_data = "metadata_medium.csv", - ref_data = "merged_assembly_stats.tsv", - snp_phylos = list("xan_test_22_331_assembly.treefile"), - ani_matrix = "comp.csv", - core_phylo = "xan_test.treefile" -) -params = lapply(params, function(x) file.path(work_dir, x)) -``` - - -```{r id_setup, include=FALSE} -knitr::opts_chunk$set(echo = FALSE, fig.width = 10, warning = FALSE) -``` - -```{r id_libraries} -library(phylocanvas) -library(ape) -library(magrittr) -``` - -```{r id_parse_inputs} -ref_meta <- read.csv(params$ref_data, sep = '\t') -ref_meta$modified_id <- gsub(ref_meta$LastMajorReleaseAccession, pattern = ".", replacement = "_", fixed = TRUE) -samp_meta <- read.csv(params$samp_data, sep = ',') -samp_meta$modified_id <- paste0(gsub(samp_meta$sample, pattern = "-", replacement = "_", fixed = TRUE), "_T1") -ani_matrix <- read.csv(params$ani_matrix, sep = ',', check.names = FALSE) -core_tree <- ape::read.tree(params$core_phylo) -snp_trees <- ape::read.tree(params$snp_phylos) -``` - - -## Initial ANI tree - - -## Core genome phylogeny - - -```{r id_core_phylo, fig.height = 7, eval = ! is.null(core_tree)} -# Identify which tips are samples and references -sample_ids <- core_tree$tip.label[core_tree$tip.label %in% samp_meta$modified_id] - -# Root tree -colnames(ani_matrix) <- gsub(colnames(ani_matrix), pattern = "[.-]", replacement = "_") -rownames(ani_matrix) <- colnames(ani_matrix) -group_ani <- ani_matrix[rownames(ani_matrix) %in% core_tree$tip.label, colnames(ani_matrix) %in% core_tree$tip.label] -core_tree <- root(core_tree, names(which.min(colMeans(group_ani[sample_ids, ])))) - -# Set tip labels to taxon names for reference sequences -# TODO: need a more reliable way to get IDs -name_key <- c( - ref_meta$Organism, - samp_meta$sample -) -names(name_key) <- c( - ref_meta$modified_id, - samp_meta$modified_id -) -core_tree$tip.label <- name_key[core_tree$tip.label] - -# Plot tree -phycanv <- phylocanvas(core_tree, treetype = "rectangular", alignlabels = T, showscalebar = T, width = "100%") -for (x in name_key[sample_ids]) { - phycanv <- style_node(phycanv, x, labelcolor = "green", labeltextsize = 30) -} - -phycanv -``` - -```{asis id_no_core_phylo, echo = is.null(core_tree)} -There is no tree to draw, probably because there were too few samples. -More info will be added later. -``` - - - - - - - - -# Diversity - -```{r include=FALSE, eval=FALSE} -work_dir = "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581" -params = list( - samp_data = "metadata_medium.csv", - ref_data = "merged_assembly_stats.tsv", - snp_phylos = list("xan_test_22_331_assembly.treefile"), - ani_matrix = "comp.csv", - core_phylo = "xan_test.treefile" -) -params = lapply(params, function(x) file.path(work_dir, x)) -``` - - -```{r div_setup, include=FALSE} -knitr::opts_chunk$set(echo = FALSE, fig.width = 10, warning = FALSE) -``` - -```{r div_libraries} -library(phylocanvas) -library(ape) -``` - -```{r div_parse_inputs} -ref_meta <- read.csv(params$ref_data, sep = '\t') -ref_meta$modified_id <- gsub(ref_meta$LastMajorReleaseAccession, pattern = ".", replacement = "_", fixed = TRUE) -samp_meta <- read.csv(params$samp_data, sep = ',') -samp_meta$modified_id <- paste0(gsub(samp_meta$sample, pattern = "-", replacement = "_", fixed = TRUE), "_T1") -ani_matrix <- read.csv(params$ani_matrix, sep = ',', check.names = FALSE) -core_tree <- ape::read.tree(params$core_phylo) -snp_trees <- ape::read.tree(params$snp_phylos) -``` - -## SNP phylogeny - -```{r div_snp_phylo, fig.height = 7, eval = ! is.null(snp_trees)} -# Root tree -snp_trees <- root(snp_trees, "REF") - -# Plot tree -phycanv <- phylocanvas(snp_trees, treetype = "rectangular", alignlabels = T, showscalebar = T, width = "100%") -phycanv -``` - -```{asis div_no_snp_phylo, echo = is.null(snp_trees)} -There is no tree to draw, probably because there were too few samples. -More info will be added later. -``` - - -## Minimum spanning network - - - - -# Gene content - -## Antibotic resistance genes - -## Effectors - -## Plasmids - -## User-defined genes - - - -`r if (knitr::is_html_output()) ' -# References {-} -'` - - - diff --git a/conf/test.config b/conf/test.config index 0d6e80aa..d5b3c3ec 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,3 +22,23 @@ params { input = 'test/data/metadata_small.csv' outdir = 'test/output_small' } + +report { + enabled = true + file = "${params.outdir}/pipeline_info/execution_report.html" + overwrite = true +} + +timeline { + enabled = true + file = "${params.outdir}/pipeline_info/timeline_report.html" + overwrite = true +} + +trace { + enabled = true + file = "${params.outdir}/pipeline_info/trace_report.tsv" + overwrite = true + fields = "task_id,hash,native_id,process,tag,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt,workdir,scratch,error_action" +} + diff --git a/conf/test_full.config b/conf/test_full.config index 79fc79a5..6f0a60b5 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/plantpathsurveil -profile test_full, --outdir + nextflow run nf-core/pathogensurveillance -profile test_full, --outdir ---------------------------------------------------------------------------------------- */ @@ -23,3 +23,23 @@ params { input = 'test/data/metadata_full.csv' outdir = 'test/output_full' } + +report { + enabled = true + file = "${params.outdir}/pipeline_info/execution_report.html" + overwrite = true +} + +timeline { + enabled = true + file = "${params.outdir}/pipeline_info/timeline_report.html" + overwrite = true +} + +trace { + enabled = true + file = "${params.outdir}/pipeline_info/trace_report.tsv" + overwrite = true + fields = "task_id,hash,native_id,process,tag,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt,workdir,scratch,error_action" +} + diff --git a/conf/test_medium.config b/conf/test_medium.config index 187f4652..d8f0b3d9 100644 --- a/conf/test_medium.config +++ b/conf/test_medium.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/plantpathsurveil -profile test_medium, --outdir + nextflow run nf-core/pathogensurveillance -profile test_medium, --outdir ---------------------------------------------------------------------------------------- */ @@ -24,3 +24,22 @@ params { outdir = 'test/output_medium' } +report { + enabled = true + file = "${params.outdir}/pipeline_info/execution_report.html" + overwrite = true +} + +timeline { + enabled = true + file = "${params.outdir}/pipeline_info/timeline_report.html" + overwrite = true +} + +trace { + enabled = true + file = "${params.outdir}/pipeline_info/trace_report.tsv" + overwrite = true + fields = "task_id,hash,native_id,process,tag,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt,workdir,scratch,error_action" +} + diff --git a/workflows/pathogensurveillance.nf b/workflows/pathogensurveillance.nf index ebb3121b..d5dfb3ca 100644 --- a/workflows/pathogensurveillance.nf +++ b/workflows/pathogensurveillance.nf @@ -168,6 +168,8 @@ workflow PATHOGENSURVEILLANCE { CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collect(sort:true) ) + + println "$workflow.manifest" // MultiQC //workflow_summary = WorkflowPathogensurveillance.paramsSummaryMultiqc(workflow, summary_params)