From c91e4e0c55aad63999cfdb27e45d08ce800e48e5 Mon Sep 17 00:00:00 2001
From: "Zachary S.L. Foster" <zacharyfoster1989@gmail.com>
Date: Mon, 11 Sep 2023 13:51:22 -0700
Subject: [PATCH] made profiles add execution reports to output

---
 assets/main_report/01-identification.Rmd |   2 +-
 assets/main_report/_main.Rmd             | 198 -----------------------
 conf/test.config                         |  20 +++
 conf/test_full.config                    |  22 ++-
 conf/test_medium.config                  |  21 ++-
 workflows/pathogensurveillance.nf        |   2 +
 6 files changed, 64 insertions(+), 201 deletions(-)
 delete mode 100644 assets/main_report/_main.Rmd

diff --git a/assets/main_report/01-identification.Rmd b/assets/main_report/01-identification.Rmd
index 00e2cfa9..068162b6 100644
--- a/assets/main_report/01-identification.Rmd
+++ b/assets/main_report/01-identification.Rmd
@@ -50,7 +50,7 @@ sample_ids <- core_tree$tip.label[core_tree$tip.label %in% samp_meta$modified_id
 colnames(ani_matrix) <- gsub(colnames(ani_matrix), pattern = "[.-]", replacement = "_")
 rownames(ani_matrix) <- colnames(ani_matrix)
 group_ani <- ani_matrix[rownames(ani_matrix) %in% core_tree$tip.label, colnames(ani_matrix) %in% core_tree$tip.label]
-core_tree <- root(core_tree, names(which.min(colMeans(group_ani[sample_ids, ]))))
+#core_tree <- root(core_tree, names(which.min(colMeans(group_ani[sample_ids, ]))))
 
 # Set tip labels to taxon names for reference sequences
 # TODO: need a more reliable way to get IDs
diff --git a/assets/main_report/_main.Rmd b/assets/main_report/_main.Rmd
deleted file mode 100644
index 9d4453cd..00000000
--- a/assets/main_report/_main.Rmd
+++ /dev/null
@@ -1,198 +0,0 @@
---- 
-title: "Pathogensurveillance Report"
-date: "`r Sys.Date()`"
-site: bookdown::bookdown_site
-documentclass: book
-bibliography: [book.bib, packages.bib]
-# url: your book url like https://bookdown.org/yihui/bookdown
-# cover-image: path to the social sharing image like images/cover.jpg
-description: |
-  A report generated by the Pathogensurveillance genome analysis pipeline.
-link-citations: yes
-github-repo: rstudio/bookdown-demo
-params:
-    samp_data: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/test/data/metadata_medium.csv"
-    ref_data: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/merged_assembly_stats.tsv"
-    snp_phylos: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/xan_test_22_331_assembly.treefile"
-    ani_matrix: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/comp.csv"
-    core_phylo: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/xan_test.treefile"
----
-
-# Run info
-
-This is the first page a user sees.
-What should go here?
-
-## Input settings
-
-## Software used
-
-
-```{r include=FALSE}
-# automatically create a bib database for R packages
-knitr::write_bib(c(
-  .packages(), 'bookdown', 'knitr', 'rmarkdown'
-), 'packages.bib')
-```
-
-<!--chapter:end:index.Rmd-->
-
-# Identification
-
-```{r include=FALSE, eval=FALSE}
-work_dir = "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581"
-params = list(
-  samp_data = "metadata_medium.csv",
-  ref_data = "merged_assembly_stats.tsv",
-  snp_phylos = list("xan_test_22_331_assembly.treefile"),
-  ani_matrix = "comp.csv",
-  core_phylo = "xan_test.treefile"
-)
-params = lapply(params, function(x) file.path(work_dir, x))
-```
-
-
-```{r id_setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, fig.width = 10, warning = FALSE)
-```
-
-```{r id_libraries}
-library(phylocanvas)
-library(ape)
-library(magrittr)
-```
-
-```{r id_parse_inputs}
-ref_meta <- read.csv(params$ref_data, sep = '\t')
-ref_meta$modified_id <- gsub(ref_meta$LastMajorReleaseAccession, pattern = ".", replacement = "_", fixed = TRUE)
-samp_meta <- read.csv(params$samp_data, sep = ',')
-samp_meta$modified_id <- paste0(gsub(samp_meta$sample, pattern = "-", replacement = "_", fixed = TRUE), "_T1")
-ani_matrix <-  read.csv(params$ani_matrix, sep = ',', check.names = FALSE)
-core_tree <- ape::read.tree(params$core_phylo)
-snp_trees <- ape::read.tree(params$snp_phylos)
-```
-
-
-## Initial ANI tree
-
-
-## Core genome phylogeny
-
-
-```{r id_core_phylo, fig.height = 7, eval = ! is.null(core_tree)}
-# Identify which tips are samples and references
-sample_ids <- core_tree$tip.label[core_tree$tip.label %in% samp_meta$modified_id]
-
-# Root tree 
-colnames(ani_matrix) <- gsub(colnames(ani_matrix), pattern = "[.-]", replacement = "_")
-rownames(ani_matrix) <- colnames(ani_matrix)
-group_ani <- ani_matrix[rownames(ani_matrix) %in% core_tree$tip.label, colnames(ani_matrix) %in% core_tree$tip.label]
-core_tree <- root(core_tree, names(which.min(colMeans(group_ani[sample_ids, ]))))
-
-# Set tip labels to taxon names for reference sequences
-# TODO: need a more reliable way to get IDs
-name_key <- c(
-  ref_meta$Organism, 
-  samp_meta$sample
-)
-names(name_key) <- c(
-  ref_meta$modified_id,
-  samp_meta$modified_id
-)
-core_tree$tip.label <- name_key[core_tree$tip.label]
-
-# Plot tree
-phycanv <- phylocanvas(core_tree, treetype = "rectangular", alignlabels = T, showscalebar = T, width = "100%")
-for (x in name_key[sample_ids]) {
-  phycanv <- style_node(phycanv, x, labelcolor = "green", labeltextsize = 30)
-}
-    
-phycanv
-```
-
-```{asis id_no_core_phylo, echo = is.null(core_tree)}
-There is no tree to draw, probably because there were too few samples.
-More info will be added later.
-```
-
-
-
-
-
-
-<!--chapter:end:01-identification.Rmd-->
-
-# Diversity
-
-```{r include=FALSE, eval=FALSE}
-work_dir = "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581"
-params = list(
-  samp_data = "metadata_medium.csv",
-  ref_data = "merged_assembly_stats.tsv",
-  snp_phylos = list("xan_test_22_331_assembly.treefile"),
-  ani_matrix = "comp.csv",
-  core_phylo = "xan_test.treefile"
-)
-params = lapply(params, function(x) file.path(work_dir, x))
-```
-
-
-```{r div_setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, fig.width = 10, warning = FALSE)
-```
-
-```{r div_libraries}
-library(phylocanvas)
-library(ape)
-```
-
-```{r div_parse_inputs}
-ref_meta <- read.csv(params$ref_data, sep = '\t')
-ref_meta$modified_id <- gsub(ref_meta$LastMajorReleaseAccession, pattern = ".", replacement = "_", fixed = TRUE)
-samp_meta <- read.csv(params$samp_data, sep = ',')
-samp_meta$modified_id <- paste0(gsub(samp_meta$sample, pattern = "-", replacement = "_", fixed = TRUE), "_T1")
-ani_matrix <-  read.csv(params$ani_matrix, sep = ',', check.names = FALSE)
-core_tree <- ape::read.tree(params$core_phylo)
-snp_trees <- ape::read.tree(params$snp_phylos)
-```
-
-## SNP phylogeny
-
-```{r div_snp_phylo, fig.height = 7, eval = ! is.null(snp_trees)}
-# Root tree 
-snp_trees <- root(snp_trees, "REF")
-
-# Plot tree
-phycanv <- phylocanvas(snp_trees, treetype = "rectangular", alignlabels = T, showscalebar = T, width = "100%")
-phycanv
-```
-
-```{asis div_no_snp_phylo, echo = is.null(snp_trees)}
-There is no tree to draw, probably because there were too few samples.
-More info will be added later.
-```
-
-
-## Minimum spanning network
-
-
-<!--chapter:end:02-diversity.Rmd-->
-
-# Gene content
-
-## Antibotic resistance genes
-
-## Effectors
-
-## Plasmids
-
-## User-defined genes
-
-<!--chapter:end:03-gene_content.Rmd-->
-
-`r if (knitr::is_html_output()) '
-# References {-}
-'`
-
-<!--chapter:end:07-references.Rmd-->
-
diff --git a/conf/test.config b/conf/test.config
index 0d6e80aa..d5b3c3ec 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -22,3 +22,23 @@ params {
     input  = 'test/data/metadata_small.csv'
     outdir = 'test/output_small'
 }
+
+report {
+    enabled   = true
+    file      = "${params.outdir}/pipeline_info/execution_report.html"
+    overwrite = true
+}
+
+timeline {
+    enabled   = true
+    file      = "${params.outdir}/pipeline_info/timeline_report.html"
+    overwrite = true
+}
+
+trace {
+    enabled   = true
+    file      = "${params.outdir}/pipeline_info/trace_report.tsv"
+    overwrite = true
+    fields    = "task_id,hash,native_id,process,tag,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt,workdir,scratch,error_action"
+}
+
diff --git a/conf/test_full.config b/conf/test_full.config
index 79fc79a5..6f0a60b5 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -5,7 +5,7 @@
     Defines input files and everything required to run a full size pipeline test.
 
     Use as follows:
-        nextflow run nf-core/plantpathsurveil -profile test_full,<docker/singularity> --outdir <OUTDIR>
+        nextflow run nf-core/pathogensurveillance -profile test_full,<docker/singularity> --outdir <OUTDIR>
 
 ----------------------------------------------------------------------------------------
 */
@@ -23,3 +23,23 @@ params {
     input  = 'test/data/metadata_full.csv'
     outdir = 'test/output_full'
 }
+
+report {
+    enabled   = true
+    file      = "${params.outdir}/pipeline_info/execution_report.html"
+    overwrite = true
+}
+
+timeline {
+    enabled   = true
+    file      = "${params.outdir}/pipeline_info/timeline_report.html"
+    overwrite = true
+}
+
+trace {
+    enabled   = true
+    file      = "${params.outdir}/pipeline_info/trace_report.tsv"
+    overwrite = true
+    fields    = "task_id,hash,native_id,process,tag,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt,workdir,scratch,error_action"
+}
+
diff --git a/conf/test_medium.config b/conf/test_medium.config
index 187f4652..d8f0b3d9 100644
--- a/conf/test_medium.config
+++ b/conf/test_medium.config
@@ -5,7 +5,7 @@
     Defines input files and everything required to run a full size pipeline test.
 
     Use as follows:
-        nextflow run nf-core/plantpathsurveil -profile test_medium,<docker/singularity> --outdir <OUTDIR>
+        nextflow run nf-core/pathogensurveillance -profile test_medium,<docker/singularity> --outdir <OUTDIR>
 
 ----------------------------------------------------------------------------------------
 */
@@ -24,3 +24,22 @@ params {
     outdir = 'test/output_medium'
 }
 
+report {
+    enabled   = true
+    file      = "${params.outdir}/pipeline_info/execution_report.html"
+    overwrite = true
+}
+
+timeline {
+    enabled   = true
+    file      = "${params.outdir}/pipeline_info/timeline_report.html"
+    overwrite = true
+}
+
+trace {
+    enabled   = true
+    file      = "${params.outdir}/pipeline_info/trace_report.tsv"
+    overwrite = true
+    fields    = "task_id,hash,native_id,process,tag,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt,workdir,scratch,error_action"
+}
+
diff --git a/workflows/pathogensurveillance.nf b/workflows/pathogensurveillance.nf
index ebb3121b..d5dfb3ca 100644
--- a/workflows/pathogensurveillance.nf
+++ b/workflows/pathogensurveillance.nf
@@ -168,6 +168,8 @@ workflow PATHOGENSURVEILLANCE {
     CUSTOM_DUMPSOFTWAREVERSIONS (                                               
         ch_versions.unique().collect(sort:true)
     )
+
+    println "$workflow.manifest"
                                                                           
     // MultiQC
     //workflow_summary    = WorkflowPathogensurveillance.paramsSummaryMultiqc(workflow, summary_params)