genomic-medicine-sweden · ryanjameskennedy · Dec 16, 2024 · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024
@@ -0,0 +1,14 @@
+name: "Changelog update reminder"
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
+
+jobs:
+  changelog:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dangoslen/changelog-enforcer@v3
+        with:
+          changeLogPath: 'CHANGELOG.md'
+          skipLabel: 'Skip-Changelog'
@@ -4,8 +4,24 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
 ## [Unreleased]
 
+### Added
+
+- Added a `generate_master_html` python script that creates `master.html` file containing a table of samples with corresponding pointers to each html output file
+- Added repective `GENERATE_MASTER_HTML` process
+- Added `cmd.config`
+- Added `params.trace_timestamp` to `nextflow.config`
+- Added `changelog_update_reminder` GA workflow
+- Added optional ability to save merged reads
+
+### Fixed
+
+### Changed
+
+- Provided option to `save_merged_reads`
+
 ## [v0.1.0]
 
 ### Added

@@ -0,0 +1,72 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>16S Samples Report</title>
+    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
+</head>
+<body>
+    <div class="container my-5">
+        <div class="card">
+            <div class="card-header text-white bg-primary">
+                <h2 class="card-title mb-0">Sample Report</h2>
+            </div>
+            <div class="card-body">
+                <div class="table-responsive">
+                    <table class="table table-bordered table-striped table-hover">
+                        <thead class="table-success">
+                            <tr>
+                                <th rowspan="2">Sample ID</th>
+                                <th colspan="1" class="text-center">Results</th>
+                                <th colspan="1" class="text-center">QC</th>
+                                <th colspan="8" class="text-center">NanoPlot</th>
+                                <th colspan="3" class="text-center">Pipeline</th>
+                            </tr>
+                            <tr>
+                                <th class="text-center">Krona</th>
+                                <th class="text-center">MultiQC Report</th>
+                                <th class="text-center">Report</th>
+                                <th class="text-center">Length vs Quality Scatter (Dot)</th>
+                                <th class="text-center">Length vs Quality Scatter (KDE)</th>
+                                <th class="text-center">Non-weighted Histogram</th>
+                                <th class="text-center">Non-weighted Log-transformed Histogram</th>
+                                <th class="text-center">Weighted Histogram</th>
+                                <th class="text-center">Weighted Log-transformed Histogram</th>
+                                <th class="text-center">Yield by Length</th>
+                                <th class="text-center">Execution Report</th>
+                                <th class="text-center">Execution Timeline</th>
+                                <th class="text-center">DAG</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                            {% for sample_id in sample_ids %}
+                            <tr>
+                                <td>{{ sample_id }}</td>
+                                <td><a href="./krona/{{ sample_id }}_T1_krona.html">Krona</a></td>
+                                <td><a href="./multiqc/multiqc_report.html">MultiQC</a></td>
+                                <td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedLengthvsQualityScatterPlot_dot.html">Dot Scatter Plot</a></td>
+                                <td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedLengthvsQualityScatterPlot_kde.html">KDE Scatter Plot</a></td>
+                                <td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNanoPlot-report.html">NanoPlot Report</a></td>
+                                <td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNon_weightedHistogramReadlength.html">Non-weighted Histogram</a></td>
+                                <td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNon_weightedLogTransformed_HistogramReadlength.html">Non-weighted Log-transformed Histogram</a></td>
+                                <td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedWeightedHistogramReadlength.html">Weighted Histogram</a></td>
+                                <td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedWeightedLogTransformed_HistogramReadlength.html">Weighted Log-transformed Histogram</a></td>
+                                <td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedYield_By_Length.html">Yield by Length</a></td>
+                                <td><a href="./pipeline_info/execution_report_{{ timestamp }}.html">Execution Report</a></td>
+                                <td><a href="./pipeline_info/execution_timeline_{{ timestamp }}.html">Execution Timeline</a></td>
+                                <td><a href="./pipeline_info/pipeline_dag_{{ timestamp }}.html">Pipeline DAG</a></td>
+                            </tr>
+                            {% endfor %}
+                        </tbody>
+                    </table>
+                </div>
+            </div>
+            <div class="card-footer text-muted">
+                Sequenced on {{ seqrun_date }}
+            </div>
+        </div>
+    </div>
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
+</body>
+</html>
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+
+"""Generate a master html template."""
+
+import os
+import re
+import argparse
+import pandas as pd
+from jinja2 import Template
+from datetime import datetime
+
+description = '''
+------------------------
+Title: generate_master_html.py
+Date: 2024-12-16
+Author(s): Ryan Kennedy
+------------------------
+Description:
+    This script creates master html file that points to all html files that were outputted from EMU.
+
+List of functions:
+    get_sample_ids, generate_master_html.
+
+List of standard modules:
+    csv, os, argparse.
+
+List of "non standard" modules:
+    pandas, jinja2.
+
+Procedure:
+    1. Get sample IDs by parsing samplesheet csv.
+    2. Render html using template.
+    3. Write out master.html file.
+
+-----------------------------------------------------------------------------------------------------------
+'''
+
+usage = '''
+-----------------------------------------------------------------------------------------------------------
+Generates master html file that points to all html files.
+Executed using: python3 ./generate_master_html.py -i <Input_Directory> -o <Output_Filepath>
+-----------------------------------------------------------------------------------------------------------
+'''
+
+parser = argparse.ArgumentParser(
+                description=description,
+                formatter_class=argparse.RawDescriptionHelpFormatter,
+                epilog=usage
+                )
+parser.add_argument(
+    '-v', '--version',
+    action='version',
+    version='%(prog)s 0.0.1'
+    )
+parser.add_argument(
+    '-c', '--csv',
+    help='input samplesheet csv filepath',
+    metavar='SAMPLESHEET_CSV_FILEPATH',
+    dest='csv',
+    required=True
+    )
+parser.add_argument(
+    '-m', '--html',
+    help='input master html template filepath',
+    metavar='MASTER_HTML_TEMPLATE_FILEPATH',
+    dest='html',
+    required=True
+    )
+parser.add_argument(
+    '-t', '--timestamp',
+    help='pipeline execution timestamp',
+    metavar='PIPELINE_EXECUTION_TIMESTAMP',
+    dest='timestamp',
+    required=True
+    )
+
+args = parser.parse_args()
+
+def find_date_in_string(input_string, date_pattern):
+    """Searches for a date within a given string."""
+    date = ""
+    match = re.search(date_pattern, input_string)
+    if match:
+        date_regex = match.group(1)
+        if len(date_regex) == 8:
+            date = datetime.strptime(date_regex, "%Y%m%d").strftime("%d-%m-%Y")
+        elif len(date_regex) > 8:
+            date = date_regex
+        else:
+            date = "(No date found)"
+    return date
+
+def get_sample_ids(samplesheet_csv):
+    """Get sample id from csv."""
+    df = pd.read_csv(samplesheet_csv)
+    sample_ids = df['sample'].tolist()
+    return sample_ids
+
+def generate_master_html(template_html_fpath, sample_ids, seqrun_date, timestamp):
+    """Read the template from an HTML file."""
+    with open(template_html_fpath, "r") as file:
+        master_template = file.read()
+    template = Template(master_template)
+    rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date, timestamp=timestamp)
+    return rendered_html
+
+def main():
+    sample_ids = get_sample_ids(args.csv)
+    seqrun_date = find_date_in_string(args.csv, r'/(\d{8})_')
+    rendered_html = generate_master_html(args.html, sample_ids, seqrun_date, args.timestamp)
+    with open("master.html", "w") as fout:
+        fout.write(rendered_html)
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,30 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/gmsemu -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    process.executor            = 'slurm'
+    process.queue               = 'low'
+    config_profile_name         = 'cmd profile'
+    config_profile_description  = 'CMD High performance profile'
+
+    // Databases
+    db                          = '/fs1/pipelines/gms_16S-dev/assets/databases/emu_database'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus                    = 60
+    max_memory                  = '300.GB'
+    max_time                    = '48.h'
+
+    // Reads
+    save_merged_reads           = false
+
+}
@@ -22,17 +22,17 @@ process {
         publishDir = [
             path: { "${params.outdir}/fastq_pass_merged" },
             mode: params.publish_dir_mode,
-            pattern: 'fastq_pass_merged'
+            pattern: 'fastq_pass_merged',
+            enabled: params.save_merged_reads
         ]
     }
 
-
     withName: MERGE_BARCODES_SAMPLESHEET {
         publishDir = [
             path: { "${params.outdir}/fastq_pass_merged" },
             mode: params.publish_dir_mode,
-            pattern: 'fastq_pass_merged'
-//            pattern: '*fastq.gz'
+            pattern: 'fastq_pass_merged',
+            enabled: params.save_merged_reads
         ]
     }
 
@@ -44,6 +44,14 @@ process {
         ]
     }
 
+    withName: GENERATE_MASTER_HTML {
+        publishDir = [
+            path: { "${params.outdir}/" },
+            mode: params.publish_dir_mode,
+            pattern: 'master.html'
+        ]
+    }
+
     withName: NANOPLOT1 {
         publishDir = [
             path: { "${params.outdir}/nanoplot" },

@@ -0,0 +1,19 @@
+process GENERATE_MASTER_HTML {
+    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
+    //               For Conda, the build (i.e. "pyhdfd78af_1") must be EXCLUDED to support installation on different operating systems.
+    conda "conda-forge::nf-core=3.0.2"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/nf-core:3.0.2--pyhdfd78af_1':
+        'quay.io/biocontainers/nf-core:3.0.2' }"
+
+    input:
+        path csv
+
+    output:
+        path 'master.html', emit: master_html
+
+    script:
+    """
+    generate_master_html.py --csv ${csv} --html ${params.master_template} --timestamp ${params.trace_timestamp}
+    """
+}
@@ -0,0 +1,54 @@
+name: "emu_abundance"
+## TODO nf-core: Add a description of the module and list keywords
+description: A taxonomic profiler for metagenomic 16S data optimized for error prone long reads.
+keywords:
+  - Metagenomics
+  - 16S
+  - Nanopore
+
+tools:
+  - "emu":
+      ## TODO nf-core: Add a description and other details for the software below
+      description: "Emu is a relative abundance estimator for 16s genomic data."
+      homepage: "https://gitlab.com/treangenlab/emu"
+      documentation: "https://gitlab.com/treangenlab/emu"
+      tool_dev_url: "None"
+      doi: "https://doi.org/10.1038/s41592-022-01520-4"
+      licence: "['MIT']"
+
+## TODO nf-core: Add a description of all of the variables used as input
+input:
+  # Only when we have meta
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  #
+  ## TODO nf-core: Delete / customise this example input
+  - reads:
+      type: file
+      description: fastq.gz file containing metagenomic 16S data
+      pattern: "*.{fastq.gz}"
+
+## TODO nf-core: Add a description of all of the variables used as output
+output:
+  #Only when we have meta
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  #
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  ## TODO nf-core: Delete / customise this example output
+  - report:
+      type: file
+      description: Report (tsv file) over detected species and estimated number of reads and relative abundance
+      pattern: "*{.tsv}"
+
+authors:
+  - "@ryanjameskennedy"