From 98895ba4dbc8eabf5aa87b3cefaab245611e264c Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Mon, 16 Dec 2024 16:55:56 +0100 Subject: [PATCH 01/21] Add py script that generates master html --- bin/generate_master_html.py | 96 +++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100755 bin/generate_master_html.py diff --git a/bin/generate_master_html.py b/bin/generate_master_html.py new file mode 100755 index 0000000..f71f416 --- /dev/null +++ b/bin/generate_master_html.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +"""Generate a master html template.""" + +import argparse +import pandas as pd +from jinja2 import Template + +description = ''' +------------------------ +Title: generate_master_html.py +Date: 2024-12-16 +Author(s): Ryan Kennedy +------------------------ +Description: + This script creates master html file that points to all html files that were outputted from EMU. + +List of functions: + get_sample_ids, generate_master_html. + +List of standard modules: + csv, os, argparse. + +List of "non standard" modules: + pandas, jinja2. + +Procedure: + 1. Get sample IDs by parsing samplesheet csv. + 2. Render html using template. + 3. Write out master.html file. + +----------------------------------------------------------------------------------------------------------- +''' + +usage = ''' +----------------------------------------------------------------------------------------------------------- +Generates master html file that points to all html files. +Executed using: python3 ./generate_master_html.py -i -o +----------------------------------------------------------------------------------------------------------- +''' + +parser = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=usage + ) +parser.add_argument( + '-v', '--version', + action='version', + version='%(prog)s 0.0.1' + ) +parser.add_argument( + '-c', '--csv', + help='input samplesheet csv filepath', + metavar='SAMPLESHEET_CSV_FILEPATH', + dest='csv', + required=True + ) +parser.add_argument( + '-m', '--html', + help='input master html template filepath', + metavar='MASTER_HTML_TEMPLATE_FILEPATH', + dest='html', + required=True + ) +parser.add_argument( + '-i', + help='input directory', + metavar='INPUT_DIRECTORY', + dest='input', + required=True + ) + +args = parser.parse_args() + +def get_sample_ids(samplesheet_csv): + df = pd.read_csv(samplesheet_csv) + sample_ids = df['sample'].tolist() + return sample_ids + +def generate_master_html(template_html_fpath, sample_ids): + # Read the template from an HTML file + with open(template_html_fpath, "r") as file: + master_template = file.read() + template = Template(master_template) + rendered_html = template.render(sample_ids=sample_ids) + return rendered_html + +def main(): + sample_ids = get_sample_ids(args.csv) + rendered_html = generate_master_html(args.html, sample_ids) + with open("master.html", "w") as fout: + fout.write(rendered_html) + +if __name__ == "__main__": + main() From 05cc1acfa740e42260d9446dcd4d5d853fd1f667 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 17 Dec 2024 18:24:04 +0100 Subject: [PATCH 02/21] Add basic master html template --- assets/master_template.html | 65 +++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 assets/master_template.html diff --git a/assets/master_template.html b/assets/master_template.html new file mode 100644 index 0000000..191438d --- /dev/null +++ b/assets/master_template.html @@ -0,0 +1,65 @@ + + + + Sample Report + + + +

Sample Report

+ + + + + + + + + + + + + + + + + + + + + + {% for sample_id in sample_ids %} + + + + + + + + + + + + + + + + + + {% endfor %} + +
Sample IDFastQCKronaNanoPlot Length vs Quality Scatter (Dot)NanoPlot Length vs Quality Scatter (KDE)NanoPlot ReportNanoPlot Non-weighted Histogram (Read Length)NanoPlot Non-weighted Log-transformed HistogramNanoPlot Weighted HistogramNanoPlot Weighted Log-transformed HistogramNanoPlot Yield by LengthMultiQC ReportPipeline Execution ReportPipeline Execution TimelinePipeline DAG
{{ sample_id }}FastQCKronaDot Scatter PlotKDE Scatter PlotNanoPlot ReportNon-weighted HistogramNon-weighted Log-transformed HistogramWeighted HistogramWeighted Log-transformed HistogramYield by LengthMultiQCExecution ReportExecution TimelinePipeline DAG
+ + From e1b907993cf44b385b3b052c65205819b029d969 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 17 Dec 2024 19:02:27 +0100 Subject: [PATCH 03/21] Add basic master html template --- assets/master_template.html | 65 +++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 assets/master_template.html diff --git a/assets/master_template.html b/assets/master_template.html new file mode 100644 index 0000000..191438d --- /dev/null +++ b/assets/master_template.html @@ -0,0 +1,65 @@ + + + + Sample Report + + + +

Sample Report

+ + + + + + + + + + + + + + + + + + + + + + {% for sample_id in sample_ids %} + + + + + + + + + + + + + + + + + + {% endfor %} + +
Sample IDFastQCKronaNanoPlot Length vs Quality Scatter (Dot)NanoPlot Length vs Quality Scatter (KDE)NanoPlot ReportNanoPlot Non-weighted Histogram (Read Length)NanoPlot Non-weighted Log-transformed HistogramNanoPlot Weighted HistogramNanoPlot Weighted Log-transformed HistogramNanoPlot Yield by LengthMultiQC ReportPipeline Execution ReportPipeline Execution TimelinePipeline DAG
{{ sample_id }}FastQCKronaDot Scatter PlotKDE Scatter PlotNanoPlot ReportNon-weighted HistogramNon-weighted Log-transformed HistogramWeighted HistogramWeighted Log-transformed HistogramYield by LengthMultiQCExecution ReportExecution TimelinePipeline DAG
+ + From 4f18949e7c50afb84b73d755619b3e0a05dfe91c Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 17 Dec 2024 19:13:12 +0100 Subject: [PATCH 04/21] Add bootstrap to master html template --- assets/master_template.html | 108 +++++++++++++++++------------------- 1 file changed, 51 insertions(+), 57 deletions(-) diff --git a/assets/master_template.html b/assets/master_template.html index 191438d..d1b7bb6 100644 --- a/assets/master_template.html +++ b/assets/master_template.html @@ -1,65 +1,59 @@ + + Sample Report - + -

Sample Report

- - - - - - - - - - - - - - - - - - - - - - {% for sample_id in sample_ids %} - - - - - - - - - - - - - - - - - - {% endfor %} - -
Sample IDFastQCKronaNanoPlot Length vs Quality Scatter (Dot)NanoPlot Length vs Quality Scatter (KDE)NanoPlot ReportNanoPlot Non-weighted Histogram (Read Length)NanoPlot Non-weighted Log-transformed HistogramNanoPlot Weighted HistogramNanoPlot Weighted Log-transformed HistogramNanoPlot Yield by LengthMultiQC ReportPipeline Execution ReportPipeline Execution TimelinePipeline DAG
{{ sample_id }}FastQCKronaDot Scatter PlotKDE Scatter PlotNanoPlot ReportNon-weighted HistogramNon-weighted Log-transformed HistogramWeighted HistogramWeighted Log-transformed HistogramYield by LengthMultiQCExecution ReportExecution TimelinePipeline DAG
+
+

Sample Report

+
+ + + + + + + + + + + + + + + + + + + + + + {% for sample_id in sample_ids %} + + + + + + + + + + + + + + + + + + {% endfor %} + +
Sample IDFastQCKronaNanoPlot Length vs Quality Scatter (Dot)NanoPlot Length vs Quality Scatter (KDE)NanoPlot ReportNanoPlot Non-weighted Histogram (Read Length)NanoPlot Non-weighted Log-transformed HistogramNanoPlot Weighted HistogramNanoPlot Weighted Log-transformed HistogramNanoPlot Yield by LengthMultiQC ReportPipeline Execution ReportPipeline Execution TimelinePipeline DAG
{{ sample_id }}FastQCKronaDot Scatter PlotKDE Scatter PlotNanoPlot ReportNon-weighted HistogramNon-weighted Log-transformed HistogramWeighted HistogramWeighted Log-transformed HistogramYield by LengthMultiQCExecution ReportExecution TimelinePipeline DAG
+
+
+ From a47a66470290b0e429d20afa93101e54680aa0dc Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 19 Dec 2024 11:34:50 +0100 Subject: [PATCH 05/21] Add cards and seqrun date --- assets/master_template.html | 99 ++++++++++++++++++++----------------- bin/generate_master_html.py | 26 ++++++---- 2 files changed, 70 insertions(+), 55 deletions(-) diff --git a/assets/master_template.html b/assets/master_template.html index d1b7bb6..51f5e2d 100644 --- a/assets/master_template.html +++ b/assets/master_template.html @@ -3,55 +3,64 @@ - Sample Report + 16S Samples Report
-

Sample Report

-
- - - - - - - - - - - - - - - - - - - - - - {% for sample_id in sample_ids %} - - - - - - - - - - - - - - - - - - {% endfor %} - -
Sample IDFastQCKronaNanoPlot Length vs Quality Scatter (Dot)NanoPlot Length vs Quality Scatter (KDE)NanoPlot ReportNanoPlot Non-weighted Histogram (Read Length)NanoPlot Non-weighted Log-transformed HistogramNanoPlot Weighted HistogramNanoPlot Weighted Log-transformed HistogramNanoPlot Yield by LengthMultiQC ReportPipeline Execution ReportPipeline Execution TimelinePipeline DAG
{{ sample_id }}FastQCKronaDot Scatter PlotKDE Scatter PlotNanoPlot ReportNon-weighted HistogramNon-weighted Log-transformed HistogramWeighted HistogramWeighted Log-transformed HistogramYield by LengthMultiQCExecution ReportExecution TimelinePipeline DAG
+
+
+

Sample Report

+
+
+
+ + + + + + + + + + + + + + + + + + + + + + {% for sample_id in sample_ids %} + + + + + + + + + + + + + + + + + + {% endfor %} + +
Sample IDFastQCKronaNanoPlot Length vs Quality Scatter (Dot)NanoPlot Length vs Quality Scatter (KDE)NanoPlot ReportNanoPlot Non-weighted Histogram (Read Length)NanoPlot Non-weighted Log-transformed HistogramNanoPlot Weighted HistogramNanoPlot Weighted Log-transformed HistogramNanoPlot Yield by LengthMultiQC ReportPipeline Execution ReportPipeline Execution TimelinePipeline DAG
{{ sample_id }}FastQCKronaDot Scatter PlotKDE Scatter PlotNanoPlot ReportNon-weighted HistogramNon-weighted Log-transformed HistogramWeighted HistogramWeighted Log-transformed HistogramYield by LengthMultiQCExecution ReportExecution TimelinePipeline DAG
+
+
+
diff --git a/bin/generate_master_html.py b/bin/generate_master_html.py index f71f416..abaa0af 100755 --- a/bin/generate_master_html.py +++ b/bin/generate_master_html.py @@ -2,9 +2,11 @@ """Generate a master html template.""" +import re import argparse import pandas as pd from jinja2 import Template +from datetime import datetime description = ''' ------------------------ @@ -63,13 +65,6 @@ dest='html', required=True ) -parser.add_argument( - '-i', - help='input directory', - metavar='INPUT_DIRECTORY', - dest='input', - required=True - ) args = parser.parse_args() @@ -78,17 +73,28 @@ def get_sample_ids(samplesheet_csv): sample_ids = df['sample'].tolist() return sample_ids -def generate_master_html(template_html_fpath, sample_ids): +def get_seqrun_date(samplesheet_csv): + date = "" + match = re.search(r'/(\d{8})_', samplesheet_csv) + if match: + date_regex = match.group(1) + date = datetime.strptime(date_regex, "%Y%m%d").strftime("%d-%m-%Y") + else: + date = "(No date found)" + return date + +def generate_master_html(template_html_fpath, sample_ids, seqrun_date): # Read the template from an HTML file with open(template_html_fpath, "r") as file: master_template = file.read() template = Template(master_template) - rendered_html = template.render(sample_ids=sample_ids) + rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date) return rendered_html def main(): sample_ids = get_sample_ids(args.csv) - rendered_html = generate_master_html(args.html, sample_ids) + seqrun_date = get_seqrun_date(args.csv) + rendered_html = generate_master_html(args.html, sample_ids, seqrun_date) with open("master.html", "w") as fout: fout.write(rendered_html) From d6b6f4053a8a34bd29038b9804be2d4c3bad68b8 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 19 Dec 2024 11:36:17 +0100 Subject: [PATCH 06/21] Add generate_master_html module --- modules/local/generate_master_html/main.nf | 19 ++++++++ modules/local/generate_master_html/meta.yml | 54 +++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 modules/local/generate_master_html/main.nf create mode 100644 modules/local/generate_master_html/meta.yml diff --git a/modules/local/generate_master_html/main.nf b/modules/local/generate_master_html/main.nf new file mode 100644 index 0000000..304cb57 --- /dev/null +++ b/modules/local/generate_master_html/main.nf @@ -0,0 +1,19 @@ +process GENERATE_MASTER_HTML { + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "pyhdfd78af_1") must be EXCLUDED to support installation on different operating systems. + conda "conda-forge::nf-core=3.0.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nf-core:3.0.2--pyhdfd78af_1': + 'quay.io/biocontainers/nf-core:3.0.2' }" + + input: + path csv + + output: + path 'master.html', emit: master_html + + script: + """ + generate_master_html.py --csv $csv --html $params.master_template + """ +} diff --git a/modules/local/generate_master_html/meta.yml b/modules/local/generate_master_html/meta.yml new file mode 100644 index 0000000..1ea8829 --- /dev/null +++ b/modules/local/generate_master_html/meta.yml @@ -0,0 +1,54 @@ +name: "emu_abundance" +## TODO nf-core: Add a description of the module and list keywords +description: A taxonomic profiler for metagenomic 16S data optimized for error prone long reads. +keywords: + - Metagenomics + - 16S + - Nanopore + +tools: + - "emu": + ## TODO nf-core: Add a description and other details for the software below + description: "Emu is a relative abundance estimator for 16s genomic data." + homepage: "https://gitlab.com/treangenlab/emu" + documentation: "https://gitlab.com/treangenlab/emu" + tool_dev_url: "None" + doi: "https://doi.org/10.1038/s41592-022-01520-4" + licence: "['MIT']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - reads: + type: file + description: fastq.gz file containing metagenomic 16S data + pattern: "*.{fastq.gz}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - report: + type: file + description: Report (tsv file) over detected species and estimated number of reads and relative abundance + pattern: "*{.tsv}" + +authors: + - "@ryanjameskennedy" From 25caede7cff8c09c62b2d9b065e7c0dfa6a2b45b Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 19 Dec 2024 11:36:47 +0100 Subject: [PATCH 07/21] Add generate_master_html module to gmsemu.nf workflow --- workflows/gmsemu.nf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/workflows/gmsemu.nf b/workflows/gmsemu.nf index 022e2e5..613ef65 100644 --- a/workflows/gmsemu.nf +++ b/workflows/gmsemu.nf @@ -66,6 +66,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { MERGE_BARCODES } from '../modules/local/merge_barcodes/main.nf' include { MERGE_BARCODES_SAMPLESHEET } from '../modules/local/merge_barcodes_samplesheet/main.nf' include { GENERATE_INPUT } from '../modules/local/generate_input/main.nf' +include { GENERATE_MASTER_HTML } from '../modules/local/generate_master_html/main.nf' //include { FALCO } from '../modules/nf-core/falco/main.nf' include { NANOPLOT as NANOPLOT1 } from '../modules/nf-core/nanoplot/main.nf' include { NANOPLOT as NANOPLOT2 } from '../modules/nf-core/nanoplot/main.nf' @@ -93,19 +94,19 @@ workflow GMSEMU { if ( params.merge_fastq_pass && !params.barcodes_samplesheet) { - MERGE_BARCODES (params.merge_fastq_pass) + MERGE_BARCODES(params.merge_fastq_pass) //GENERATE_INPUT(file("${params.outdir}/fastq_pass_merged")) GENERATE_INPUT(MERGE_BARCODES.out.fastq_dir_merged) // ch_input = file(params.outdir + 'samplesheet_merged.csv') ch_input = GENERATE_INPUT.out.sample_sheet_merged } else if ( params.merge_fastq_pass && params.barcodes_samplesheet) { - MERGE_BARCODES_SAMPLESHEET (params.barcodes_samplesheet, params.merge_fastq_pass) + MERGE_BARCODES_SAMPLESHEET(params.barcodes_samplesheet, params.merge_fastq_pass) // merged_files = (params.outdir + '/fastq_pass_merged') - GENERATE_INPUT (MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged) + GENERATE_INPUT(MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged) ch_input = GENERATE_INPUT.out.sample_sheet_merged } - + GENERATE_MASTER_HTML(GENERATE_INPUT.out.sample_sheet_merged) // From 11df34f67db7a6e8ddcb5004f19f267471747b0d Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 19 Dec 2024 11:37:01 +0100 Subject: [PATCH 08/21] Add generate_master_html to configs --- conf/modules.config | 8 ++++++++ nextflow.config | 14 +++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ccd622a..9cb295b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -44,6 +44,14 @@ process { ] } + withName: GENERATE_MASTER_HTML { + publishDir = [ + path: { "${params.outdir}/" }, + mode: params.publish_dir_mode, + pattern: 'master.html' + ] + } + withName: NANOPLOT1 { publishDir = [ path: { "${params.outdir}/nanoplot" }, diff --git a/nextflow.config b/nextflow.config index cc1bdf4..6c69dd8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,7 +13,7 @@ params { input = null db = null -// reads = null + // reads = null seqtype = "map-ont" min_abundance = 0.0001 minimap_max_alignments = 50 @@ -22,20 +22,20 @@ params { keep_files = false output_unclassified = true + // master html + master_template = "$projectDir/assets/master_template.html" - // - // porechop_abi - adapter_trimming = false + // porechop_abi + adapter_trimming = false - // - // filtlong filtering + // filtlong filtering quality_filtering = true longread_qc_qualityfilter_minlength = 1200 longread_qc_qualityfilter_maxlength = 1800 longread_qc_qualityfilter_min_mean_q = 94 //Save the trimmed reads - save_preprocessed_reads = false + save_preprocessed_reads = false // krona run_krona = true From b8829d86ea76cb6cd8b5964044225dcf3c2ff01a Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 20 Dec 2024 16:49:43 +0100 Subject: [PATCH 09/21] Add cmd.config --- conf/cmd.config | 27 +++++++++++++++++++++++++++ nextflow.config | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 conf/cmd.config diff --git a/conf/cmd.config b/conf/cmd.config new file mode 100644 index 0000000..05234e6 --- /dev/null +++ b/conf/cmd.config @@ -0,0 +1,27 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/gmsemu -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + process.executor = 'slurm' + process.queue = 'low' + config_profile_name = 'cmd profile' + config_profile_description = 'CMD High performance profile' + + // Databases + db = '/fs1/pipelines/gms_16S-dev/assets/databases/emu_database' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 60 + max_memory = '300.GB' + max_time = '48.h' + +} diff --git a/nextflow.config b/nextflow.config index 6c69dd8..c992d0d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -173,7 +173,7 @@ profiles { test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } full { includeConfig 'conf/full.config' } - + cmd { includeConfig 'conf/cmd.config' } } From 917da0e5bab6d13d4c401e1d583f257e904fda90 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 2 Jan 2025 13:17:57 +0100 Subject: [PATCH 10/21] Add search for date_id --- assets/master_template.html | 6 ++--- bin/generate_master_html.py | 51 ++++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/assets/master_template.html b/assets/master_template.html index 51f5e2d..fb77e33 100644 --- a/assets/master_template.html +++ b/assets/master_template.html @@ -49,9 +49,9 @@

Sample Report

Weighted Log-transformed Histogram Yield by Length MultiQC - Execution Report - Execution Timeline - Pipeline DAG + Execution Report + Execution Timeline + Pipeline DAG {% endfor %} diff --git a/bin/generate_master_html.py b/bin/generate_master_html.py index abaa0af..346be3f 100755 --- a/bin/generate_master_html.py +++ b/bin/generate_master_html.py @@ -2,6 +2,7 @@ """Generate a master html template.""" +import os import re import argparse import pandas as pd @@ -68,33 +69,53 @@ args = parser.parse_args() -def get_sample_ids(samplesheet_csv): - df = pd.read_csv(samplesheet_csv) - sample_ids = df['sample'].tolist() - return sample_ids - -def get_seqrun_date(samplesheet_csv): +def get_date_id(samplesheet_csv_fpath): + date_ids = [] + parent_dir = os.path.dirname(samplesheet_csv_fpath) + pipeline_info_dir = os.path.join(parent_dir, 'pipeline_info') + for filename in os.listdir(pipeline_info_dir): + if filename.startswith("execution_report"): + execution_report_fpath = os.path.join(pipeline_info_dir, filename) + date_id = find_date_in_string(execution_report_fpath, r'(\d{4}-\d{2}-\d{2}[^.]+)') + date_ids.append(date_id) + date_list = map(find_date_in_string, date_ids, [r'\b(\d{4}-\d{2}-\d{2})']*len(date_ids)) + date_id_zipped = list(zip(date_ids, date_list)) + sorted_date_ids = [date_id[0] for date_id in sorted(date_id_zipped, key=lambda date: datetime.strptime(date[1], "%Y-%m-%d"), reverse=True)] + return sorted_date_ids[0] + +def find_date_in_string(input_string, date_pattern): + """Searches for a date within a given string.""" date = "" - match = re.search(r'/(\d{8})_', samplesheet_csv) + match = re.search(date_pattern, input_string) if match: date_regex = match.group(1) - date = datetime.strptime(date_regex, "%Y%m%d").strftime("%d-%m-%Y") - else: - date = "(No date found)" + if len(date_regex) == 8: + date = datetime.strptime(date_regex, "%Y%m%d").strftime("%d-%m-%Y") + elif len(date_regex) > 8: + date = date_regex + else: + date = "(No date found)" return date -def generate_master_html(template_html_fpath, sample_ids, seqrun_date): - # Read the template from an HTML file +def get_sample_ids(samplesheet_csv): + """Get sample id from csv.""" + df = pd.read_csv(samplesheet_csv) + sample_ids = df['sample'].tolist() + return sample_ids + +def generate_master_html(template_html_fpath, sample_ids, seqrun_date, date_id): + """Read the template from an HTML file.""" with open(template_html_fpath, "r") as file: master_template = file.read() template = Template(master_template) - rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date) + rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date, date_id=date_id) return rendered_html def main(): sample_ids = get_sample_ids(args.csv) - seqrun_date = get_seqrun_date(args.csv) - rendered_html = generate_master_html(args.html, sample_ids, seqrun_date) + seqrun_date = find_date_in_string(args.csv, r'/(\d{8})_') + date_id = get_date_id(args.csv) + rendered_html = generate_master_html(args.html, sample_ids, seqrun_date, date_id) with open("master.html", "w") as fout: fout.write(rendered_html) From a4bfa1ca874561751b8240748cfc71cc75b74118 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 22 Jan 2025 16:44:41 +0100 Subject: [PATCH 11/21] Add nested header to master.html and remove fastqc --- assets/master_template.html | 38 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/assets/master_template.html b/assets/master_template.html index fb77e33..2ce5a05 100644 --- a/assets/master_template.html +++ b/assets/master_template.html @@ -17,29 +17,34 @@

Sample Report

- - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + {% for sample_id in sample_ids %} - + @@ -48,7 +53,6 @@

Sample Report

- From 1bca118e983c6cf9a82ef3341123df1d64752cf9 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Mon, 3 Feb 2025 11:59:52 +0100 Subject: [PATCH 12/21] Change pipeline execution output filenames --- assets/master_template.html | 6 +++--- bin/generate_master_html.py | 21 +++------------------ nextflow.config | 13 ++++++++----- workflows/gmsemu.nf | 3 +-- 4 files changed, 15 insertions(+), 28 deletions(-) diff --git a/assets/master_template.html b/assets/master_template.html index 2ce5a05..5adfb04 100644 --- a/assets/master_template.html +++ b/assets/master_template.html @@ -53,9 +53,9 @@

Sample Report

- - - + + + {% endfor %} diff --git a/bin/generate_master_html.py b/bin/generate_master_html.py index 346be3f..7714864 100755 --- a/bin/generate_master_html.py +++ b/bin/generate_master_html.py @@ -69,20 +69,6 @@ args = parser.parse_args() -def get_date_id(samplesheet_csv_fpath): - date_ids = [] - parent_dir = os.path.dirname(samplesheet_csv_fpath) - pipeline_info_dir = os.path.join(parent_dir, 'pipeline_info') - for filename in os.listdir(pipeline_info_dir): - if filename.startswith("execution_report"): - execution_report_fpath = os.path.join(pipeline_info_dir, filename) - date_id = find_date_in_string(execution_report_fpath, r'(\d{4}-\d{2}-\d{2}[^.]+)') - date_ids.append(date_id) - date_list = map(find_date_in_string, date_ids, [r'\b(\d{4}-\d{2}-\d{2})']*len(date_ids)) - date_id_zipped = list(zip(date_ids, date_list)) - sorted_date_ids = [date_id[0] for date_id in sorted(date_id_zipped, key=lambda date: datetime.strptime(date[1], "%Y-%m-%d"), reverse=True)] - return sorted_date_ids[0] - def find_date_in_string(input_string, date_pattern): """Searches for a date within a given string.""" date = "" @@ -103,19 +89,18 @@ def get_sample_ids(samplesheet_csv): sample_ids = df['sample'].tolist() return sample_ids -def generate_master_html(template_html_fpath, sample_ids, seqrun_date, date_id): +def generate_master_html(template_html_fpath, sample_ids, seqrun_date): """Read the template from an HTML file.""" with open(template_html_fpath, "r") as file: master_template = file.read() template = Template(master_template) - rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date, date_id=date_id) + rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date) return rendered_html def main(): sample_ids = get_sample_ids(args.csv) seqrun_date = find_date_in_string(args.csv, r'/(\d{8})_') - date_id = get_date_id(args.csv) - rendered_html = generate_master_html(args.html, sample_ids, seqrun_date, date_id) + rendered_html = generate_master_html(args.html, sample_ids, seqrun_date) with open("master.html", "w") as fout: fout.write(rendered_html) diff --git a/nextflow.config b/nextflow.config index f079444..e481c1f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -208,22 +208,25 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + overwrite = true + file = "${params.tracedir}/execution_timeline.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + overwrite = true + file = "${params.tracedir}/execution_report.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + overwrite = true + file = "${params.tracedir}/execution_trace.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + overwrite = true + file = "${params.tracedir}/pipeline_dag.html" } manifest { diff --git a/workflows/gmsemu.nf b/workflows/gmsemu.nf index d5791e9..c599768 100644 --- a/workflows/gmsemu.nf +++ b/workflows/gmsemu.nf @@ -88,8 +88,6 @@ workflow GMSEMU { ch_input = GENERATE_INPUT.out.sample_sheet_merged } - GENERATE_MASTER_HTML(GENERATE_INPUT.out.sample_sheet_merged) - // Validate and stage input files INPUT_CHECK(ch_input) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) @@ -197,6 +195,7 @@ workflow GMSEMU { ) multiqc_report = MULTIQC.out.report.toList() + GENERATE_MASTER_HTML(GENERATE_INPUT.out.sample_sheet_merged) } /* From 112465a071af1e08d186c171d0965bd1a7c2672f Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 4 Feb 2025 09:52:06 +0100 Subject: [PATCH 13/21] Update generate_master_html to include timestap as input variable --- assets/master_template.html | 6 +++--- bin/generate_master_html.py | 13 ++++++++++--- modules/local/generate_master_html/main.nf | 2 +- nextflow.config | 14 +++++--------- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/assets/master_template.html b/assets/master_template.html index 5adfb04..284f2c7 100644 --- a/assets/master_template.html +++ b/assets/master_template.html @@ -53,9 +53,9 @@

Sample Report

- - - + + + {% endfor %} diff --git a/bin/generate_master_html.py b/bin/generate_master_html.py index 7714864..1e51635 100755 --- a/bin/generate_master_html.py +++ b/bin/generate_master_html.py @@ -66,6 +66,13 @@ dest='html', required=True ) +parser.add_argument( + '-t', '--timestamp', + help='pipeline execution timestamp', + metavar='PIPELINE_EXECUTION_TIMESTAMP', + dest='timestamp', + required=True + ) args = parser.parse_args() @@ -89,18 +96,18 @@ def get_sample_ids(samplesheet_csv): sample_ids = df['sample'].tolist() return sample_ids -def generate_master_html(template_html_fpath, sample_ids, seqrun_date): +def generate_master_html(template_html_fpath, sample_ids, seqrun_date, timestamp): """Read the template from an HTML file.""" with open(template_html_fpath, "r") as file: master_template = file.read() template = Template(master_template) - rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date) + rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date, timestamp=timestamp) return rendered_html def main(): sample_ids = get_sample_ids(args.csv) seqrun_date = find_date_in_string(args.csv, r'/(\d{8})_') - rendered_html = generate_master_html(args.html, sample_ids, seqrun_date) + rendered_html = generate_master_html(args.html, sample_ids, seqrun_date, args.timestamp) with open("master.html", "w") as fout: fout.write(rendered_html) diff --git a/modules/local/generate_master_html/main.nf b/modules/local/generate_master_html/main.nf index 304cb57..d5a3382 100644 --- a/modules/local/generate_master_html/main.nf +++ b/modules/local/generate_master_html/main.nf @@ -14,6 +14,6 @@ process GENERATE_MASTER_HTML { script: """ - generate_master_html.py --csv $csv --html $params.master_template + generate_master_html.py --csv $csv --html $params.master_template --timestamp $trace_timestamp """ } diff --git a/nextflow.config b/nextflow.config index e481c1f..030ac01 100644 --- a/nextflow.config +++ b/nextflow.config @@ -207,26 +207,22 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] - +def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - overwrite = true - file = "${params.tracedir}/execution_timeline.html" + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" } report { enabled = true - overwrite = true - file = "${params.tracedir}/execution_report.html" + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" } trace { enabled = true - overwrite = true - file = "${params.tracedir}/execution_trace.txt" + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - overwrite = true - file = "${params.tracedir}/pipeline_dag.html" + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { From 0dcbaccacd39a82d8088ccdabe706f86d9665021 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 4 Feb 2025 13:45:25 +0100 Subject: [PATCH 14/21] Rm MERGE_BARCODES publishDir for unnecessary publishing of reads to save space --- conf/modules.config | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2f61358..5af39d5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,24 +18,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: MERGE_BARCODES { - publishDir = [ - path: { "${params.outdir}/fastq_pass_merged" }, - mode: params.publish_dir_mode, - pattern: 'fastq_pass_merged' - ] - } - - - withName: MERGE_BARCODES_SAMPLESHEET { - publishDir = [ - path: { "${params.outdir}/fastq_pass_merged" }, - mode: params.publish_dir_mode, - pattern: 'fastq_pass_merged' -// pattern: '*fastq.gz' - ] - } - withName: GENERATE_INPUT { publishDir = [ path: { "${params.outdir}/" }, From 9722dc0b8e80ac9d2e37ada12be4c47e218c156f Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 4 Feb 2025 13:51:23 +0100 Subject: [PATCH 15/21] Add params.trace_timestamp --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 030ac01..a66ff55 100644 --- a/nextflow.config +++ b/nextflow.config @@ -208,6 +208,7 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') +params.trace_timestamp = trace_timestamp timeline { enabled = true file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" From 6050b9b3ed58671bab4e59e7734f798e1d4e8069 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 4 Feb 2025 13:52:17 +0100 Subject: [PATCH 16/21] Update CHANGELOG re generate_master_html --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fa97c8..64bd3db 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + ## [Unreleased] +### Added + +- Added a `generate_master_html` python script that creates `master.html` file containing a table of samples with corresponding pointers to each html output file +- Added repective `GENERATE_MASTER_HTML` process +- Added `cmd.config` +- Added `params.trace_timestamp` to `nextflow.config` + +### Fixed + +### Changed + ## [v0.1.0] ### Added From 5836b23d3af741fb786a89cdecd9bb0377688680 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 4 Feb 2025 13:54:37 +0100 Subject: [PATCH 17/21] Add changelog_update_reminder GA workflow --- .github/workflows/changelog_update_reminder.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .github/workflows/changelog_update_reminder.yml diff --git a/.github/workflows/changelog_update_reminder.yml b/.github/workflows/changelog_update_reminder.yml new file mode 100644 index 0000000..accee86 --- /dev/null +++ b/.github/workflows/changelog_update_reminder.yml @@ -0,0 +1,14 @@ +name: "Changelog update reminder" +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] + +jobs: + changelog: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dangoslen/changelog-enforcer@v3 + with: + changeLogPath: 'CHANGELOG.md' + skipLabel: 'Skip-Changelog' From af8db01c05df40853355f01285f1216e2aa3ce85 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 4 Feb 2025 14:02:39 +0100 Subject: [PATCH 18/21] Fix params.trace_timestamp in GENERATE_MASTER_HTML process --- modules/local/generate_master_html/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/generate_master_html/main.nf b/modules/local/generate_master_html/main.nf index d5a3382..40933bd 100644 --- a/modules/local/generate_master_html/main.nf +++ b/modules/local/generate_master_html/main.nf @@ -14,6 +14,6 @@ process GENERATE_MASTER_HTML { script: """ - generate_master_html.py --csv $csv --html $params.master_template --timestamp $trace_timestamp + generate_master_html.py --csv ${csv} --html ${params.master_template} --timestamp ${params.trace_timestamp} """ } From a542cb1692ce75eb6807c1ab42aebcfb9eb00673 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 4 Feb 2025 14:11:29 +0100 Subject: [PATCH 19/21] Update CHANGELOG re changelog_update_reminder GA workflow --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64bd3db..99b5eb6 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added repective `GENERATE_MASTER_HTML` process - Added `cmd.config` - Added `params.trace_timestamp` to `nextflow.config` +- Added `changelog_update_reminder` GA workflow ### Fixed From 57145f17c245b818dc7a9a38a2479f3ba02fd3d7 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 12 Feb 2025 10:49:43 +0100 Subject: [PATCH 20/21] Provide option to save_merged_reads --- CHANGELOG.md | 2 ++ conf/cmd.config | 3 +++ conf/modules.config | 18 ++++++++++++++++++ nextflow.config | 3 +++ 4 files changed, 26 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99b5eb6..42b1178 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Provided option to `save_merged_reads` + ## [v0.1.0] ### Added diff --git a/conf/cmd.config b/conf/cmd.config index 05234e6..c0a2953 100644 --- a/conf/cmd.config +++ b/conf/cmd.config @@ -24,4 +24,7 @@ params { max_memory = '300.GB' max_time = '48.h' + // Reads + save_merged_reads = false + } diff --git a/conf/modules.config b/conf/modules.config index 5af39d5..00ed49a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,6 +18,24 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + withName: MERGE_BARCODES { + publishDir = [ + path: { "${params.outdir}/fastq_pass_merged" }, + mode: params.publish_dir_mode, + pattern: 'fastq_pass_merged', + enable: params.save_merged_reads + ] + } + + withName: MERGE_BARCODES_SAMPLESHEET { + publishDir = [ + path: { "${params.outdir}/fastq_pass_merged" }, + mode: params.publish_dir_mode, + pattern: 'fastq_pass_merged', + enable: params.save_merged_reads + ] + } + withName: GENERATE_INPUT { publishDir = [ path: { "${params.outdir}/" }, diff --git a/nextflow.config b/nextflow.config index a66ff55..f9a8730 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,6 +46,9 @@ params { //Save the trimmed reads save_preprocessed_reads = false + //Save the merged reads + save_merged_reads = true + // krona run_krona = true krona_taxonomy_tab = "$projectDir/assets/databases/krona/taxonomy/taxonomy.tab" From b8d1ae162c19813422d47b2f315eed5f793f8aec Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 13 Feb 2025 08:50:59 +0100 Subject: [PATCH 21/21] Add toggling of publishDir for merged reads --- CHANGELOG.md | 1 + conf/modules.config | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42b1178..25cc51a 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `cmd.config` - Added `params.trace_timestamp` to `nextflow.config` - Added `changelog_update_reminder` GA workflow +- Added optional ability to save merged reads ### Fixed diff --git a/conf/modules.config b/conf/modules.config index 00ed49a..3b94c70 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -23,7 +23,7 @@ process { path: { "${params.outdir}/fastq_pass_merged" }, mode: params.publish_dir_mode, pattern: 'fastq_pass_merged', - enable: params.save_merged_reads + enabled: params.save_merged_reads ] } @@ -32,7 +32,7 @@ process { path: { "${params.outdir}/fastq_pass_merged" }, mode: params.publish_dir_mode, pattern: 'fastq_pass_merged', - enable: params.save_merged_reads + enabled: params.save_merged_reads ] }
Sample IDFastQCKronaNanoPlot Length vs Quality Scatter (Dot)NanoPlot Length vs Quality Scatter (KDE)NanoPlot ReportNanoPlot Non-weighted Histogram (Read Length)NanoPlot Non-weighted Log-transformed HistogramNanoPlot Weighted HistogramNanoPlot Weighted Log-transformed HistogramNanoPlot Yield by LengthMultiQC ReportPipeline Execution ReportPipeline Execution TimelinePipeline DAGSample IDResultsQCNanoPlotPipeline
KronaMultiQC ReportReportLength vs Quality Scatter (Dot)Length vs Quality Scatter (KDE)Non-weighted HistogramNon-weighted Log-transformed HistogramWeighted HistogramWeighted Log-transformed HistogramYield by LengthExecution ReportExecution TimelineDAG
{{ sample_id }}FastQC KronaMultiQC Dot Scatter Plot KDE Scatter Plot NanoPlot Report Weighted Histogram Weighted Log-transformed Histogram Yield by LengthMultiQC Execution Report Execution Timeline Pipeline DAGWeighted Histogram Weighted Log-transformed Histogram Yield by LengthExecution ReportExecution TimelinePipeline DAGExecution ReportExecution TimelinePipeline DAG
Weighted Histogram Weighted Log-transformed Histogram Yield by LengthExecution ReportExecution TimelinePipeline DAGExecution ReportExecution TimelinePipeline DAG