From 4fbed83e800ef0e9ae64886b69d4d09912e992c9 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Mon, 9 Dec 2024 00:53:26 +0000
Subject: [PATCH 01/23] Added variable that checks if data is ONT, based on
 fastq file name.

---
 configs/read_type.config | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configs/read_type.config b/configs/read_type.config
index cd55388a..d4b9b988 100644
--- a/configs/read_type.config
+++ b/configs/read_type.config
@@ -3,4 +3,5 @@
 params {
     // Whether the underlying data is paired-end or single-end
     single_end = new File(params.sample_sheet).text.readLines()[0].contains('fastq_2') ? false : true
-}
\ No newline at end of file
+    ont = new File(params.sample_sheet).text.readLines()[1].contains('ONT') ? true : false
+}

From 71757bc6ba5fd14b2ca04cec2492253c7442a294 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Mon, 9 Dec 2024 23:27:21 +0000
Subject: [PATCH 02/23] Adding the filtlong process and docker image,

---
 configs/containers.config      |  3 +++
 modules/local/filtlong/main.nf | 15 +++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 modules/local/filtlong/main.nf

diff --git a/configs/containers.config b/configs/containers.config
index dbff9cab..9e2ca712 100644
--- a/configs/containers.config
+++ b/configs/containers.config
@@ -73,4 +73,7 @@ process {
     withLabel: fastp {
         container = "staphb/fastp:0.23.4"
     }
+    withLabel: filtlong {
+        container = "staphb/filtlong:0.2.1"
+    }
 }
diff --git a/modules/local/filtlong/main.nf b/modules/local/filtlong/main.nf
new file mode 100644
index 00000000..230521d4
--- /dev/null
+++ b/modules/local/filtlong/main.nf
@@ -0,0 +1,15 @@
+process FILTLONG {
+    label "small"
+    label "filtlong"
+    input:
+        tuple val(sample), path(reads)
+    output:
+        tuple val(sample), path("${sample}_filtlong.fastq.gz"), emit: reads
+    shell:
+        // Filter reads based on length (min 100 bp) and mean quality (min 99%, i.e, a Phred score of 20)
+        '''
+        o=!{sample}_filtlong.fastq.gz
+        i=!{reads[0]}
+        filtlong --min_length 100 --min_mean_q 90 --verbose ${i} | gzip > ${o}
+        '''
+}
\ No newline at end of file

From 66569ad86c2995df4bc87ea9cd2a4cb73fd6e3fe Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Mon, 9 Dec 2024 23:31:21 +0000
Subject: [PATCH 03/23] Added code to pull out legnth data from multiqc.

---
 modules/local/summarizeMultiqcPair/main.nf    |  2 +-
 .../usr/bin/summarize-multiqc-pair.R          | 25 +++++++++++++++++--
 subworkflows/local/processOutput/main.nf      |  5 +++-
 subworkflows/local/qc/main.nf                 |  7 ++++--
 workflows/run_dev_se.nf                       |  3 ++-
 5 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/modules/local/summarizeMultiqcPair/main.nf b/modules/local/summarizeMultiqcPair/main.nf
index 18f3fd8c..d5a6b976 100644
--- a/modules/local/summarizeMultiqcPair/main.nf
+++ b/modules/local/summarizeMultiqcPair/main.nf
@@ -6,7 +6,7 @@ process SUMMARIZE_MULTIQC_PAIR {
         tuple val(stage), val(sample), path(multiqc_data)
         val(single_end)
     output:
-        tuple path("${stage}_${sample}_qc_basic_stats.tsv.gz"), path("${stage}_${sample}_qc_adapter_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_base_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_sequence_stats.tsv.gz")
+        tuple path("${stage}_${sample}_qc_basic_stats.tsv.gz"), path("${stage}_${sample}_qc_adapter_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_base_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_sequence_stats.tsv.gz"), path("${stage}_${sample}_qc_length_stats.tsv.gz")
     shell:
         '''
         summarize-multiqc-pair.R -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD}
diff --git a/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R b/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R
index 6c1193c1..9b1519d0 100755
--- a/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R
+++ b/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R
@@ -41,7 +41,7 @@ out_path_basic <- file.path(opt$output_dir, paste0(id_out, "_qc_basic_stats.tsv.
 out_path_adapters <- file.path(opt$output_dir, paste0(id_out, "_qc_adapter_stats.tsv.gz"))
 out_path_quality_base <- file.path(opt$output_dir, paste0(id_out, "_qc_quality_base_stats.tsv.gz"))
 out_path_quality_sequence <- file.path(opt$output_dir, paste0(id_out, "_qc_quality_sequence_stats.tsv.gz"))
-
+out_path_lengths <- file.path(opt$output_dir, paste0(id_out, "_qc_length_stats.tsv.gz"))
 #=====================#
 # AUXILIARY FUNCTIONS #
 #=====================#
@@ -96,6 +96,18 @@ extract_adapter_data_single <- function(adapter_dataset){
     separate_wider_delim("filename", " - ", names=c("file", "adapter"))
   return(data)
 }
+
+extract_length_data_single <- function(length_dataset){
+  # Convert a single JSON length dataset into a tibble
+  data <- lapply(1:length(length_dataset$name), function(n)
+    length_dataset$data[[n]] %>% as.data.frame %>%
+      mutate(filename=length_dataset$name[n])) %>%
+    bind_rows() %>% as_tibble %>%
+    rename(length=V1, n_sequences=V2) %>%
+    rename(file = filename)
+  return(data)
+}
+
 # NB: Current paired version can't distinguish or annotate forward vs reverse reads in these plots.
 # TODO: Restore this functionality (will require workflow restructuring).
 
@@ -106,6 +118,13 @@ extract_adapter_data <- function(multiqc_json){
   return(data_out)
 }
 
+extract_length_data <- function(multiqc_json){
+  # Extract length data from multiqc JSON
+  datasets <- multiqc_json$report_plot_data$fastqc_sequence_length_distribution_plot$datasets$lines
+  data_out <- lapply(datasets, extract_length_data_single) %>% bind_rows()
+  return(data_out)
+}
+
 extract_per_base_quality_single <- function(per_base_quality_dataset){
   # Convert a single JSON per-base-quality dataset into a tibble
   data <- lapply(1:length(per_base_quality_dataset$name), function(n)
@@ -153,7 +172,8 @@ fastqc_tsv <- readr::read_tsv(fastqc_tsv_path, show_col_types = FALSE)
 # Process
 add_info <- function(tab) mutate(tab, stage=opt$stage, sample=opt$sample)
 basic_info <- basic_info_fastqc(fastqc_tsv, multiqc_json) %>% add_info
-adapters <- extract_adapter_data(multiqc_json) %>% add_info
+adapters <- extract_adapter_data(multiqc_json) %>% add_info()
+lengths <- extract_length_data(multiqc_json) %>% add_info()
 per_base_quality <- extract_per_base_quality(multiqc_json) %>% add_info
 per_sequence_quality <- extract_per_sequence_quality(multiqc_json) %>% add_info
 
@@ -162,3 +182,4 @@ write_tsv(basic_info, out_path_basic)
 write_tsv(adapters, out_path_adapters)
 write_tsv(per_base_quality, out_path_quality_base)
 write_tsv(per_sequence_quality, out_path_quality_sequence)
+write_tsv(lengths, out_path_lengths)
diff --git a/subworkflows/local/processOutput/main.nf b/subworkflows/local/processOutput/main.nf
index c9b4cc4c..9c273a4a 100644
--- a/subworkflows/local/processOutput/main.nf
+++ b/subworkflows/local/processOutput/main.nf
@@ -6,7 +6,7 @@ include { MERGE_TSVS as MERGE_MULTIQC_BASIC } from "../../../modules/local/merge
 include { MERGE_TSVS as MERGE_MULTIQC_ADAPT } from "../../../modules/local/mergeTsvs"
 include { MERGE_TSVS as MERGE_MULTIQC_QBASE } from "../../../modules/local/mergeTsvs"
 include { MERGE_TSVS as MERGE_MULTIQC_QSEQS } from "../../../modules/local/mergeTsvs"
-
+include { MERGE_TSVS as MERGE_MULTIQC_LENGTHS } from "../../../modules/local/mergeTsvs"
 /***********
 | WORKFLOW |
 ***********/
@@ -20,14 +20,17 @@ workflow PROCESS_OUTPUT {
         multiqc_adapt_ch = multiqc_ch.map{ it[1] }.collect().ifEmpty([])
         multiqc_qbase_ch = multiqc_ch.map{ it[2] }.collect().ifEmpty([])
         multiqc_qseqs_ch = multiqc_ch.map{ it[3] }.collect().ifEmpty([])
+        multiqc_lengths_ch = multiqc_ch.map{ it[4] }.collect().ifEmpty([])
         // Merge MultiQC outputs
         basic_out_ch = MERGE_MULTIQC_BASIC(multiqc_basic_ch, "qc_basic_stats")
         adapt_out_ch = MERGE_MULTIQC_ADAPT(multiqc_adapt_ch, "qc_adapter_stats")
         qbase_out_ch = MERGE_MULTIQC_QBASE(multiqc_qbase_ch, "qc_quality_base_stats")
         qseqs_out_ch = MERGE_MULTIQC_QSEQS(multiqc_qseqs_ch, "qc_quality_sequence_stats")
+        lengths_out_ch = MERGE_MULTIQC_LENGTHS(multiqc_lengths_ch, "qc_length_stats")
     emit:
         basic = basic_out_ch
         adapt = adapt_out_ch
         qbase = qbase_out_ch
         qseqs = qseqs_out_ch
+        lengths = lengths_out_ch
 }
diff --git a/subworkflows/local/qc/main.nf b/subworkflows/local/qc/main.nf
index 8823136f..8a4895e5 100644
--- a/subworkflows/local/qc/main.nf
+++ b/subworkflows/local/qc/main.nf
@@ -9,7 +9,7 @@ include { MERGE_TSVS as MERGE_MULTIQC_BASIC } from "../../../modules/local/merge
 include { MERGE_TSVS as MERGE_MULTIQC_ADAPT } from "../../../modules/local/mergeTsvs"
 include { MERGE_TSVS as MERGE_MULTIQC_QBASE } from "../../../modules/local/mergeTsvs"
 include { MERGE_TSVS as MERGE_MULTIQC_QSEQS } from "../../../modules/local/mergeTsvs"
-
+include { MERGE_TSVS as MERGE_MULTIQC_LENGTHS } from "../../../modules/local/mergeTsvs"
 /***********
 | WORKFLOW |
 ***********/
@@ -33,15 +33,18 @@ workflow QC {
         multiqc_adapt_ch = process_ch.map{ it[1] }.collect().ifEmpty([])
         multiqc_qbase_ch = process_ch.map{ it[2] }.collect().ifEmpty([])
         multiqc_qseqs_ch = process_ch.map{ it[3] }.collect().ifEmpty([])
+        multiqc_lengths_ch = process_ch.map{ it[4] }.collect().ifEmpty([])
         // 5. Merge MultiQC outputs
         basic_out_ch = MERGE_MULTIQC_BASIC(multiqc_basic_ch, "${stage_label}_qc_basic_stats")
         adapt_out_ch = MERGE_MULTIQC_ADAPT(multiqc_adapt_ch, "${stage_label}_qc_adapter_stats")
         qbase_out_ch = MERGE_MULTIQC_QBASE(multiqc_qbase_ch, "${stage_label}_qc_quality_base_stats")
         qseqs_out_ch = MERGE_MULTIQC_QSEQS(multiqc_qseqs_ch, "${stage_label}_qc_quality_sequence_stats")
+        lengths_out_ch = MERGE_MULTIQC_LENGTHS(multiqc_lengths_ch, "${stage_label}_qc_length_stats")
         // 6. Combine outputs into a single output channel
         out_ch = basic_out_ch.combine(adapt_out_ch)
             .combine(qbase_out_ch).combine(qseqs_out_ch)
-            .map({file1, file2, file3, file4 -> tuple(file1, file2, file3, file4)})
+            .combine(lengths_out_ch)
+            .map({file1, file2, file3, file4, file5 -> tuple(file1, file2, file3, file4, file5)})
     emit:
         qc = out_ch
 }
diff --git a/workflows/run_dev_se.nf b/workflows/run_dev_se.nf
index 0274e05e..541f308a 100644
--- a/workflows/run_dev_se.nf
+++ b/workflows/run_dev_se.nf
@@ -93,4 +93,5 @@ workflow RUN_DEV_SE {
         PROCESS_OUTPUT.out.adapt >> "results"
         PROCESS_OUTPUT.out.qbase >> "results"
         PROCESS_OUTPUT.out.qseqs >> "results"
-}
+        PROCESS_OUTPUT.out.lengths >> "results"
+}
\ No newline at end of file

From 09eaf8f62781c3d54ec02e3db266a16358fa178d Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Mon, 9 Dec 2024 23:32:14 +0000
Subject: [PATCH 04/23] Adding filtlong to the Cleaning step.

---
 subworkflows/local/clean/main.nf | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/clean/main.nf b/subworkflows/local/clean/main.nf
index 9f1e638e..4436b5a7 100644
--- a/subworkflows/local/clean/main.nf
+++ b/subworkflows/local/clean/main.nf
@@ -7,7 +7,11 @@
 ***************************/
 
 include { QC } from "../../../subworkflows/local/qc"
-include { FASTP } from "../../../modules/local/fastp"
+if (params.ont) {
+    include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong"
+} else {
+    include { FASTP as FILTER_READS } from "../../../modules/local/fastp"
+}
 
 /***********
 | WORKFLOW |
@@ -22,9 +26,13 @@ workflow CLEAN {
         stage_label
         single_end
     main:
-        fastp_ch = FASTP(reads_ch, adapter_path, single_end)
-        qc_ch = QC(fastp_ch.reads, fastqc_cpus, fastqc_mem, stage_label, single_end)
+        if (params.ont) {
+            filter_ch = FILTER_READS(reads_ch)
+        } else {
+            filter_ch = FILTER_READS(reads_ch, adapter_path, single_end)
+        }
+        qc_ch = QC(filter_ch.reads, fastqc_cpus, fastqc_mem, stage_label, single_end)
     emit:
-        reads = fastp_ch.reads
+        reads = filter_ch.reads
         qc = qc_ch.qc
 }
\ No newline at end of file

From 2e039dff6b7f9ec28c6d64c93f511a8bbd6a2297 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <58591538+simonleandergrimm@users.noreply.github.com>
Date: Mon, 16 Dec 2024 09:26:59 -0500
Subject: [PATCH 05/23] Update main.nf

---
 modules/local/filtlong/main.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/filtlong/main.nf b/modules/local/filtlong/main.nf
index 230521d4..eda013d0 100644
--- a/modules/local/filtlong/main.nf
+++ b/modules/local/filtlong/main.nf
@@ -6,10 +6,10 @@ process FILTLONG {
     output:
         tuple val(sample), path("${sample}_filtlong.fastq.gz"), emit: reads
     shell:
-        // Filter reads based on length (min 100 bp) and mean quality (min 99%, i.e, a Phred score of 20)
+        // Filter reads based on length (min 100 bp) and mean average base quality (min 90%, i.e, a Phred score of 10)
         '''
         o=!{sample}_filtlong.fastq.gz
         i=!{reads[0]}
         filtlong --min_length 100 --min_mean_q 90 --verbose ${i} | gzip > ${o}
         '''
-}
\ No newline at end of file
+}

From e1459018323645cf5b724c4d98d94923555f9961 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Wed, 18 Dec 2024 20:48:32 +0000
Subject: [PATCH 06/23] edited FASTP logic to take into account fastp_single
 and fastp_paired

---
 subworkflows/local/clean/main.nf | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/subworkflows/local/clean/main.nf b/subworkflows/local/clean/main.nf
index 4436b5a7..8de37022 100644
--- a/subworkflows/local/clean/main.nf
+++ b/subworkflows/local/clean/main.nf
@@ -10,7 +10,11 @@ include { QC } from "../../../subworkflows/local/qc"
 if (params.ont) {
     include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong"
 } else {
-    include { FASTP as FILTER_READS } from "../../../modules/local/fastp"
+    if (params.single_end) {
+        include { FASTP_SINGLE as FILTER_READS } from "../../../modules/local/fastp"
+    } else {
+        include { FASTP_PAIRED as FILTER_READS } from "../../../modules/local/fastp"
+    }
 }
 
 /***********

From 34793cbd1fcff0ecfc01d1972c3a1720ff8993c5 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Wed, 18 Dec 2024 20:55:12 +0000
Subject: [PATCH 07/23] Renamed summarize_multiqc processes/folders/scripts to
 not have pair in the name.

---
 .../local/{summarizeMultiqcPair => summarizeMultiqc}/main.nf  | 4 ++--
 .../resources/usr/bin/summarize-multiqc.R}                    | 0
 subworkflows/local/qc/main.nf                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename modules/local/{summarizeMultiqcPair => summarizeMultiqc}/main.nf (78%)
 rename modules/local/{summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R => summarizeMultiqc/resources/usr/bin/summarize-multiqc.R} (100%)

diff --git a/modules/local/summarizeMultiqcPair/main.nf b/modules/local/summarizeMultiqc/main.nf
similarity index 78%
rename from modules/local/summarizeMultiqcPair/main.nf
rename to modules/local/summarizeMultiqc/main.nf
index d5a6b976..e224349a 100644
--- a/modules/local/summarizeMultiqcPair/main.nf
+++ b/modules/local/summarizeMultiqc/main.nf
@@ -1,5 +1,5 @@
 // Extract paired MultiQC data into a more usable form
-process SUMMARIZE_MULTIQC_PAIR {
+process SUMMARIZE_MULTIQC {
     label "R"
     label "single"
     input:
@@ -9,6 +9,6 @@ process SUMMARIZE_MULTIQC_PAIR {
         tuple path("${stage}_${sample}_qc_basic_stats.tsv.gz"), path("${stage}_${sample}_qc_adapter_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_base_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_sequence_stats.tsv.gz"), path("${stage}_${sample}_qc_length_stats.tsv.gz")
     shell:
         '''
-        summarize-multiqc-pair.R -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD}
+        summarize-multiqcR -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD}
         '''
 }
\ No newline at end of file
diff --git a/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R b/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R
similarity index 100%
rename from modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R
rename to modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R
diff --git a/subworkflows/local/qc/main.nf b/subworkflows/local/qc/main.nf
index 8a4895e5..065c6b93 100644
--- a/subworkflows/local/qc/main.nf
+++ b/subworkflows/local/qc/main.nf
@@ -4,7 +4,7 @@
 
 include { FASTQC_LABELED } from "../../../modules/local/fastqc"
 include { MULTIQC_LABELED } from "../../../modules/local/multiqc"
-include { SUMMARIZE_MULTIQC_PAIR } from "../../../modules/local/summarizeMultiqcPair"
+include { SUMMARIZE_MULTIQC } from "../../../modules/local/summarizeMultiqc"
 include { MERGE_TSVS as MERGE_MULTIQC_BASIC } from "../../../modules/local/mergeTsvs"
 include { MERGE_TSVS as MERGE_MULTIQC_ADAPT } from "../../../modules/local/mergeTsvs"
 include { MERGE_TSVS as MERGE_MULTIQC_QBASE } from "../../../modules/local/mergeTsvs"

From 22073c19b6707968719a673fe6d9042c40158ca9 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Wed, 18 Dec 2024 21:04:07 +0000
Subject: [PATCH 08/23] Adopting testing set up from profile ont branch

---
 test-data/ont-samplesheet.csv |  2 ++
 tests/main.nf.test            | 10 ++++++++++
 tests/run_dev_ont.config      | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+)
 create mode 100644 test-data/ont-samplesheet.csv
 create mode 100644 tests/run_dev_ont.config

diff --git a/test-data/ont-samplesheet.csv b/test-data/ont-samplesheet.csv
new file mode 100644
index 00000000..52e157af
--- /dev/null
+++ b/test-data/ont-samplesheet.csv
@@ -0,0 +1,2 @@
+sample,fastq
+NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-restricted/NAO-ONT-20240710-WW-RNA1/raw/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz
\ No newline at end of file
diff --git a/tests/main.nf.test b/tests/main.nf.test
index 925af3cc..be793356 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -27,6 +27,16 @@ nextflow_pipeline {
             assert workflow.success
         }
     }
+
+    test("Test Oxford Nanopore run workflow") {
+        config "tests/run_dev_ont.config"
+        tag "run_dev_ont"
+
+        then {
+            assert workflow.success
+        }
+    }
+
     test("Test validation workflow") {
         config "tests/run_validation.config"
         tag "validation"
diff --git a/tests/run_dev_ont.config b/tests/run_dev_ont.config
new file mode 100644
index 00000000..4e1f84b2
--- /dev/null
+++ b/tests/run_dev_ont.config
@@ -0,0 +1,34 @@
+/************************************************
+| CONFIGURATION FILE FOR NAO VIRAL MGS WORKFLOW |
+************************************************/
+
+params {
+    mode = "run_dev_se"
+
+    // Directories
+    base_dir = "./" // Parent for working and output directories (can be S3)
+    ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow)
+
+    // Files
+    sample_sheet = "${projectDir}/test-data/ont-samplesheet.csv" // Path to library TSV
+    adapters = "${projectDir}/ref/adapters.fasta" // Path to adapter file for adapter trimming
+
+    // Numerical
+    human_read_filtering = true // Whether to filter human reads
+    grouping = false // Whether to group samples by 'group' column in samplesheet
+    n_reads_trunc = 0 // Number of reads per sample to run through pipeline (0 = all reads)
+    n_reads_profile = 1000000 // Number of reads per sample to run through taxonomic profiling
+    bt2_score_threshold = 20 // Normalized score threshold for HV calling (typically 15 or 20)
+    blast_hv_fraction = 0 // Fraction of putative HV reads to BLAST vs nt (0 = don't run BLAST)
+    kraken_memory = "128 GB" // Memory needed to safely load Kraken DB
+    quality_encoding = "phred33" // FASTQ quality encoding (probably phred33, maybe phred64)
+    fuzzy_match_alignment_duplicates = 0 // Fuzzy matching the start coordinate of reads for identification of duplicates through alignment (0 = exact matching; options are 0, 1, or 2)
+    host_taxon = "vertebrate"
+
+    blast_db_prefix = "nt_others"
+}
+
+includeConfig "${projectDir}/configs/containers.config"
+includeConfig "${projectDir}/configs/profiles.config"
+includeConfig "${projectDir}/configs/read_type.config"
+includeConfig "${projectDir}/configs/output.config"

From 3ce9cc68cac07b1601e237d1afb7a2794b761b94 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Wed, 18 Dec 2024 21:28:32 +0000
Subject: [PATCH 09/23] Fixed summarize multiqc typo and uneeded single-end
 variable for fastp

---
 modules/local/summarizeMultiqc/main.nf | 2 +-
 subworkflows/local/clean/main.nf       | 2 +-
 subworkflows/local/qc/main.nf          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/summarizeMultiqc/main.nf b/modules/local/summarizeMultiqc/main.nf
index e224349a..b8cc845c 100644
--- a/modules/local/summarizeMultiqc/main.nf
+++ b/modules/local/summarizeMultiqc/main.nf
@@ -9,6 +9,6 @@ process SUMMARIZE_MULTIQC {
         tuple path("${stage}_${sample}_qc_basic_stats.tsv.gz"), path("${stage}_${sample}_qc_adapter_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_base_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_sequence_stats.tsv.gz"), path("${stage}_${sample}_qc_length_stats.tsv.gz")
     shell:
         '''
-        summarize-multiqcR -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD}
+        summarize-multiqc.R -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD}
         '''
 }
\ No newline at end of file
diff --git a/subworkflows/local/clean/main.nf b/subworkflows/local/clean/main.nf
index 8de37022..c9f844b7 100644
--- a/subworkflows/local/clean/main.nf
+++ b/subworkflows/local/clean/main.nf
@@ -33,7 +33,7 @@ workflow CLEAN {
         if (params.ont) {
             filter_ch = FILTER_READS(reads_ch)
         } else {
-            filter_ch = FILTER_READS(reads_ch, adapter_path, single_end)
+            filter_ch = FILTER_READS(reads_ch, adapter_path)
         }
         qc_ch = QC(filter_ch.reads, fastqc_cpus, fastqc_mem, stage_label, single_end)
     emit:
diff --git a/subworkflows/local/qc/main.nf b/subworkflows/local/qc/main.nf
index 065c6b93..4474bcf7 100644
--- a/subworkflows/local/qc/main.nf
+++ b/subworkflows/local/qc/main.nf
@@ -27,7 +27,7 @@ workflow QC {
         // 2. Extract data with MultiQC for each read file / pair of read files
         multiqc_ch = MULTIQC_LABELED(stage_label, fastqc_ch.zip)
         // 3. Summarize MultiQC information for each read file / pair of read files
-        process_ch = SUMMARIZE_MULTIQC_PAIR(multiqc_ch.data, single_end)
+        process_ch = SUMMARIZE_MULTIQC(multiqc_ch.data, single_end)
         // 4. Collate MultiQC outputs
         multiqc_basic_ch = process_ch.map{ it[0] }.collect().ifEmpty([])
         multiqc_adapt_ch = process_ch.map{ it[1] }.collect().ifEmpty([])

From cfd0ddbee89ea74ecc909eaa375d1c0844b7f8a5 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Wed, 18 Dec 2024 21:31:59 +0000
Subject: [PATCH 10/23] dropped unneeded brackets.

---
 .../summarizeMultiqc/resources/usr/bin/summarize-multiqc.R    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R b/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R
index 9b1519d0..a6eba02e 100755
--- a/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R
+++ b/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R
@@ -172,8 +172,8 @@ fastqc_tsv <- readr::read_tsv(fastqc_tsv_path, show_col_types = FALSE)
 # Process
 add_info <- function(tab) mutate(tab, stage=opt$stage, sample=opt$sample)
 basic_info <- basic_info_fastqc(fastqc_tsv, multiqc_json) %>% add_info
-adapters <- extract_adapter_data(multiqc_json) %>% add_info()
-lengths <- extract_length_data(multiqc_json) %>% add_info()
+adapters <- extract_adapter_data(multiqc_json) %>% add_info
+lengths <- extract_length_data(multiqc_json) %>% add_info
 per_base_quality <- extract_per_base_quality(multiqc_json) %>% add_info
 per_sequence_quality <- extract_per_sequence_quality(multiqc_json) %>% add_info
 

From bbd108bc34626211bc37a45318cb1754d4e0f4d9 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Fri, 20 Dec 2024 15:32:53 +0000
Subject: [PATCH 11/23] Added test config for ONT

---
 tests/run_dev_ont.config | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/run_dev_ont.config b/tests/run_dev_ont.config
index 4e1f84b2..9ab5031d 100644
--- a/tests/run_dev_ont.config
+++ b/tests/run_dev_ont.config
@@ -11,7 +11,6 @@ params {
 
     // Files
     sample_sheet = "${projectDir}/test-data/ont-samplesheet.csv" // Path to library TSV
-    adapters = "${projectDir}/ref/adapters.fasta" // Path to adapter file for adapter trimming
 
     // Numerical
     human_read_filtering = true // Whether to filter human reads

From c9b72c43fb0ca39e49bc58ca060e5ad447f0fa6c Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Fri, 20 Dec 2024 15:33:41 +0000
Subject: [PATCH 12/23] Added ONT run to dev

---
 .github/workflows/end-to-end-se.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/end-to-end-se.yml b/.github/workflows/end-to-end-se.yml
index d723dd68..031074cd 100644
--- a/.github/workflows/end-to-end-se.yml
+++ b/.github/workflows/end-to-end-se.yml
@@ -28,5 +28,8 @@ jobs:
           wget -qO- https://get.nf-test.com | bash
           sudo mv nf-test /usr/local/bin/
 
-      - name: Run run_dev_se workflow
-        run: nf-test test --tag run_dev_se --verbose
\ No newline at end of file
+      - name: Run run_dev_se workflow on single-end data
+        run: nf-test test --tag run_dev_se --verbose
+
+      - name: Run run_dev_se workflow on ONT data
+      run: nf-test test --tag run_dev_se_ont --verbose
\ No newline at end of file

From 5b877edd07cd35f49b63967cf22363d027d9266e Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Tue, 7 Jan 2025 22:06:30 +0000
Subject: [PATCH 13/23] testing new tidyverse container

---
 configs/containers.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/containers.config b/configs/containers.config
index 9e2ca712..b0324890 100644
--- a/configs/containers.config
+++ b/configs/containers.config
@@ -55,7 +55,7 @@ process {
         container = "securebio/nao-pypkg"
     }
     withLabel: tidyverse {
-        container = "rocker/tidyverse:4.4.1"
+        container = "rocker/tidyverse:4.4.2"
     }
     withLabel: R {
         container = "securebio/nao-rpkg"

From f75db731f535ebb287f8e5feb139d72a142fe076 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Tue, 7 Jan 2025 22:56:56 +0000
Subject: [PATCH 14/23] fixed idnent in end-to-end-se.yml

---
 .github/workflows/end-to-end-se.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/end-to-end-se.yml b/.github/workflows/end-to-end-se.yml
index 031074cd..66b41924 100644
--- a/.github/workflows/end-to-end-se.yml
+++ b/.github/workflows/end-to-end-se.yml
@@ -32,4 +32,4 @@ jobs:
         run: nf-test test --tag run_dev_se --verbose
 
       - name: Run run_dev_se workflow on ONT data
-      run: nf-test test --tag run_dev_se_ont --verbose
\ No newline at end of file
+        run: nf-test test --tag run_dev_se_ont --verbose
\ No newline at end of file

From 7a4191698b07a26f7102dc0d635f93c91bb149f0 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Wed, 15 Jan 2025 23:50:35 +0000
Subject: [PATCH 15/23] Dropping ont from read_type.config

---
 configs/read_type.config | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configs/read_type.config b/configs/read_type.config
index d4b9b988..88aea81a 100644
--- a/configs/read_type.config
+++ b/configs/read_type.config
@@ -3,5 +3,4 @@
 params {
     // Whether the underlying data is paired-end or single-end
     single_end = new File(params.sample_sheet).text.readLines()[0].contains('fastq_2') ? false : true
-    ont = new File(params.sample_sheet).text.readLines()[1].contains('ONT') ? true : false
 }

From 1df469943bb13a6bd9cdfb404e121d5f8af85dfd Mon Sep 17 00:00:00 2001
From: simonleandergrimm <58591538+simonleandergrimm@users.noreply.github.com>
Date: Fri, 10 Jan 2025 15:26:13 -0500
Subject: [PATCH 16/23] Update run_validation.config

---
 configs/run_validation.config | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configs/run_validation.config b/configs/run_validation.config
index 9d351024..1ebdebdb 100644
--- a/configs/run_validation.config
+++ b/configs/run_validation.config
@@ -24,5 +24,4 @@ includeConfig "${projectDir}/configs/containers.config"
 includeConfig "${projectDir}/configs/resources.config"
 includeConfig "${projectDir}/configs/profiles.config"
 includeConfig "${projectDir}/configs/output.config"
-includeConfig "${projectDir}/configs/read_type.config"
 process.queue = "will-batch-queue" // AWS Batch job queue

From 0f5f68df93179f5c51f066dd01ed2c5b9852d7a8 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Thu, 16 Jan 2025 00:07:25 +0000
Subject: [PATCH 17/23] Fixed tag in end-to-end-se.yml for the ont test run.
 Added a manual parameter for data being ONT. Added adapters to the ONT file
 so it doesn't break.

---
 .github/workflows/end-to-end-se.yml | 2 +-
 configs/run.config                  | 3 +++
 configs/run_dev_se.config           | 3 +++
 test-data/nextflow.config           | 5 +++--
 tests/run_dev_ont.config            | 4 ++++
 tests/run_dev_se.config             | 3 +++
 6 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/end-to-end-se.yml b/.github/workflows/end-to-end-se.yml
index 66b41924..ac04e944 100644
--- a/.github/workflows/end-to-end-se.yml
+++ b/.github/workflows/end-to-end-se.yml
@@ -32,4 +32,4 @@ jobs:
         run: nf-test test --tag run_dev_se --verbose
 
       - name: Run run_dev_se workflow on ONT data
-        run: nf-test test --tag run_dev_se_ont --verbose
\ No newline at end of file
+        run: nf-test test --tag run_dev_ont --verbose
\ No newline at end of file
diff --git a/configs/run.config b/configs/run.config
index 09cbd5d3..34777c3a 100644
--- a/configs/run.config
+++ b/configs/run.config
@@ -5,6 +5,9 @@
 params {
     mode = "run"
 
+    // Sequencing platform
+    ont = false // Whether the sequencing is ONT (true) or Illumina (false)
+
     // Directories
     base_dir = "s3://nao-mgs-wb/test-batch" // Parent for working and output directories (can be S3)
     ref_dir = "s3://nao-mgs-wb/index/20241209/output" // Reference/index directory (generated by index workflow)
diff --git a/configs/run_dev_se.config b/configs/run_dev_se.config
index 7414ea35..58371e4e 100644
--- a/configs/run_dev_se.config
+++ b/configs/run_dev_se.config
@@ -5,6 +5,9 @@
 params {
     mode = "run_dev_se"
 
+    // Sequencing platform
+    ont = false // Whether the sequencing is ONT (true) or Illumina (false)
+
     // Directories
     base_dir = "s3://nao-mgs-simon/test_single_read" // Parent for working and output directories (can be S3)
     ref_dir = "s3://nao-mgs-wb/index/20241209/output" // Reference/index directory (generated by index workflow)
diff --git a/test-data/nextflow.config b/test-data/nextflow.config
index 191f7a20..34777c3a 100644
--- a/test-data/nextflow.config
+++ b/test-data/nextflow.config
@@ -5,12 +5,13 @@
 params {
     mode = "run"
 
+    // Sequencing platform
+    ont = false // Whether the sequencing is ONT (true) or Illumina (false)
+
     // Directories
     base_dir = "s3://nao-mgs-wb/test-batch" // Parent for working and output directories (can be S3)
     ref_dir = "s3://nao-mgs-wb/index/20241209/output" // Reference/index directory (generated by index workflow)
 
-
-
     // Files
     sample_sheet = "${launchDir}/samplesheet.csv" // Path to library TSV
     adapters = "${projectDir}/ref/adapters.fasta" // Path to adapter file for adapter trimming
diff --git a/tests/run_dev_ont.config b/tests/run_dev_ont.config
index 9ab5031d..e36229cc 100644
--- a/tests/run_dev_ont.config
+++ b/tests/run_dev_ont.config
@@ -5,12 +5,16 @@
 params {
     mode = "run_dev_se"
 
+    // Sequencing platform
+    ont = true // Whether the sequencing is ONT (true) or Illumina (false)
+
     // Directories
     base_dir = "./" // Parent for working and output directories (can be S3)
     ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow)
 
     // Files
     sample_sheet = "${projectDir}/test-data/ont-samplesheet.csv" // Path to library TSV
+    adapters = "${projectDir}/ref/adapters.fasta" // Path to adapter file for adapter trimming. Not used for ONT.
 
     // Numerical
     human_read_filtering = true // Whether to filter human reads
diff --git a/tests/run_dev_se.config b/tests/run_dev_se.config
index 34b72954..4d7ff50e 100644
--- a/tests/run_dev_se.config
+++ b/tests/run_dev_se.config
@@ -5,6 +5,9 @@
 params {
     mode = "run_dev_se"
 
+    // Sequencing platform
+    ont = false // Whether the sequencing is ONT (true) or Illumina (false)
+
     // Directories
     base_dir = "./" // Parent for working and output directories (can be S3)
     ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow)

From 919b3c48ee504dc471d489213baec07b6beb52fe Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Fri, 17 Jan 2025 20:26:39 +0000
Subject: [PATCH 18/23] moved location of ONT WW test data

---
 test-data/ont-samplesheet.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test-data/ont-samplesheet.csv b/test-data/ont-samplesheet.csv
index 52e157af..3cd3ac05 100644
--- a/test-data/ont-samplesheet.csv
+++ b/test-data/ont-samplesheet.csv
@@ -1,2 +1,2 @@
 sample,fastq
-NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-restricted/NAO-ONT-20240710-WW-RNA1/raw/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz
\ No newline at end of file
+NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-mgs-simon/ont-ww-test/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz
\ No newline at end of file

From 58416f0e6e285825ee7c55495be397730920d547 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Fri, 17 Jan 2025 20:37:30 +0000
Subject: [PATCH 19/23] Adding qc lengths to runQc. Adding filtlong to
 subsetTrim.

---
 subworkflows/local/runQc/main.nf      |  4 ++++
 subworkflows/local/subsetTrim/main.nf | 23 ++++++++++++++++++++---
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/runQc/main.nf b/subworkflows/local/runQc/main.nf
index 3db2f274..fa71c1ad 100644
--- a/subworkflows/local/runQc/main.nf
+++ b/subworkflows/local/runQc/main.nf
@@ -8,6 +8,7 @@ include { MERGE_TSVS as MERGE_MULTIQC_BASIC } from "../../../modules/local/merge
 include { MERGE_TSVS as MERGE_MULTIQC_ADAPT } from "../../../modules/local/mergeTsvs"
 include { MERGE_TSVS as MERGE_MULTIQC_QBASE } from "../../../modules/local/mergeTsvs"
 include { MERGE_TSVS as MERGE_MULTIQC_QSEQS } from "../../../modules/local/mergeTsvs"
+include { MERGE_TSVS as MERGE_MULTIQC_LENGTHS } from "../../../modules/local/mergeTsvs"
 
 /***********
 | WORKFLOW |
@@ -31,14 +32,17 @@ workflow RUN_QC {
       multiqc_adapt_ch = qc_ch.map{ it[1] }.collect().ifEmpty([])
       multiqc_qbase_ch = qc_ch.map{ it[2] }.collect().ifEmpty([])
       multiqc_qseqs_ch = qc_ch.map{ it[3] }.collect().ifEmpty([])
+      multiqc_lengths_ch = qc_ch.map{ it[4] }.collect().ifEmpty([])
       // 4. Merge MultiQC outputs
       basic_out_ch = MERGE_MULTIQC_BASIC(multiqc_basic_ch, "qc_basic_stats")
       adapt_out_ch = MERGE_MULTIQC_ADAPT(multiqc_adapt_ch, "qc_adapter_stats")
       qbase_out_ch = MERGE_MULTIQC_QBASE(multiqc_qbase_ch, "qc_quality_base_stats")
       qseqs_out_ch = MERGE_MULTIQC_QSEQS(multiqc_qseqs_ch, "qc_quality_sequence_stats")
+      lengths_out_ch = MERGE_MULTIQC_LENGTHS(multiqc_lengths_ch, "qc_length_stats")
     emit:
       qc_basic = basic_out_ch
       qc_adapt = adapt_out_ch
       qc_qbase = qbase_out_ch
       qc_qseqs = qseqs_out_ch
+      qc_lengths = lengths_out_ch
 }
diff --git a/subworkflows/local/subsetTrim/main.nf b/subworkflows/local/subsetTrim/main.nf
index 0fcfc130..06148d39 100644
--- a/subworkflows/local/subsetTrim/main.nf
+++ b/subworkflows/local/subsetTrim/main.nf
@@ -6,14 +6,31 @@ if (params.single_end) {
     include { SUBSET_READS_SINGLE_TARGET as SUBSET_READS_TARGET } from "../../../modules/local/subsetReads"
     include { CONCAT_GROUP_SINGLE as CONCAT_GROUP } from "../../../modules/local/concatGroup"
     include { SUBSET_READS_SINGLE_TARGET; SUBSET_READS_SINGLE_TARGET as SUBSET_READS_TARGET_GROUP } from "../../../modules/local/subsetReads"
-    include { FASTP_SINGLE as FASTP } from "../../../modules/local/fastp"
+    if (params.ont) {
+        include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong"
+    } else {
+        include { FASTP_SINGLE as FILTER_READS } from "../../../modules/local/fastp"
+    }
+
 } else {
     include { SUBSET_READS_PAIRED_TARGET as SUBSET_READS_TARGET } from "../../../modules/local/subsetReads"
     include { SUBSET_READS_PAIRED_TARGET; SUBSET_READS_PAIRED_TARGET as SUBSET_READS_TARGET_GROUP } from "../../../modules/local/subsetReads"
     include { CONCAT_GROUP_PAIRED as CONCAT_GROUP } from "../../../modules/local/concatGroup"
-    include { FASTP_PAIRED as FASTP } from "../../../modules/local/fastp"
+    include { FASTP_PAIRED as FILTER_READS } from "../../../modules/local/fastp"
+}
+
+
+if (params.ont) {
+    include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong"
+} else {
+    if (params.single_end) {
+        include { FASTP_SINGLE as FILTER_READS } from "../../../modules/local/fastp"
+    } else {
+        include { FASTP_PAIRED as FILTER_READS } from "../../../modules/local/fastp"
+    }
 }
 
+
 /***********
 | WORKFLOW |
 ***********/
@@ -60,7 +77,7 @@ workflow SUBSET_TRIM {
         }
 
         // Call fastp adapter trimming
-        fastp_ch = FASTP(grouped_ch, adapter_path)
+        fastp_ch = FILTER_READS(grouped_ch, adapter_path)
     emit:
         subset_reads = grouped_ch
         trimmed_subset_reads = fastp_ch.reads

From 2224711bc23c48f8350f1072c8e90c6548ac1fc7 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Fri, 17 Jan 2025 21:06:37 +0000
Subject: [PATCH 20/23] fixed multiqc output styling.

---
 subworkflows/local/subsetTrim/main.nf | 19 ++++++-------------
 workflows/run.nf                      |  1 +
 workflows/run_dev_se.nf               |  7 +------
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/subworkflows/local/subsetTrim/main.nf b/subworkflows/local/subsetTrim/main.nf
index 06148d39..b609cf3a 100644
--- a/subworkflows/local/subsetTrim/main.nf
+++ b/subworkflows/local/subsetTrim/main.nf
@@ -20,17 +20,6 @@ if (params.single_end) {
 }
 
 
-if (params.ont) {
-    include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong"
-} else {
-    if (params.single_end) {
-        include { FASTP_SINGLE as FILTER_READS } from "../../../modules/local/fastp"
-    } else {
-        include { FASTP_PAIRED as FILTER_READS } from "../../../modules/local/fastp"
-    }
-}
-
-
 /***********
 | WORKFLOW |
 ***********/
@@ -77,8 +66,12 @@ workflow SUBSET_TRIM {
         }
 
         // Call fastp adapter trimming
-        fastp_ch = FILTER_READS(grouped_ch, adapter_path)
+        if (params.ont) {
+            trimmed_ch = FILTER_READS(grouped_ch)
+        } else {
+            trimmed_ch = FILTER_READS(grouped_ch, adapter_path)
+        }
     emit:
         subset_reads = grouped_ch
-        trimmed_subset_reads = fastp_ch.reads
+        trimmed_subset_reads = trimmed_ch.reads
 }
diff --git a/workflows/run.nf b/workflows/run.nf
index 53fdd070..fb7a0f0a 100644
--- a/workflows/run.nf
+++ b/workflows/run.nf
@@ -90,6 +90,7 @@ workflow RUN {
         RUN_QC.out.qc_adapt >> "results"
         RUN_QC.out.qc_qbase >> "results"
         RUN_QC.out.qc_qseqs >> "results"
+        RUN_QC.out.qc_lengths >> "results"
         // Final results
         EXTRACT_VIRAL_READS.out.tsv >> "results"
         EXTRACT_VIRAL_READS.out.counts >> "results"
diff --git a/workflows/run_dev_se.nf b/workflows/run_dev_se.nf
index ff3d54b5..f04bc2ab 100644
--- a/workflows/run_dev_se.nf
+++ b/workflows/run_dev_se.nf
@@ -62,18 +62,13 @@ workflow RUN_DEV_SE {
         time_ch >> "logging"
         version_ch >> "logging"
         // QC
-        PROCESS_OUTPUT.out.basic >> "results"
-        PROCESS_OUTPUT.out.adapt >> "results"
-        PROCESS_OUTPUT.out.qbase >> "results"
-        PROCESS_OUTPUT.out.qseqs >> "results"
-        PROCESS_OUTPUT.out.lengths >> "results"
         COUNT_TOTAL_READS.out.read_counts >> "results"
         RUN_QC.out.qc_basic >> "results"
         RUN_QC.out.qc_adapt >> "results"
         RUN_QC.out.qc_qbase >> "results"
         RUN_QC.out.qc_qseqs >> "results"
+        RUN_QC.out.qc_lengths >> "results"
         // Final results
         PROFILE.out.bracken >> "results"
         PROFILE.out.kraken >> "results"
 }
-

From b6868c1d775db404e3a59ffe316a18816b919869 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Mon, 20 Jan 2025 21:24:41 +0000
Subject: [PATCH 21/23] fixed address of ont ww test data.

---
 test-data/ont-samplesheet.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test-data/ont-samplesheet.csv b/test-data/ont-samplesheet.csv
index 3cd3ac05..60339aff 100644
--- a/test-data/ont-samplesheet.csv
+++ b/test-data/ont-samplesheet.csv
@@ -1,2 +1,2 @@
 sample,fastq
-NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-mgs-simon/ont-ww-test/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz
\ No newline at end of file
+NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-testing/ont-ww-test/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz
\ No newline at end of file

From 0b9672d46efd5387ee500a112a90b335968c4c66 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Tue, 21 Jan 2025 16:29:46 +0000
Subject: [PATCH 22/23] Increased fitlong mean quality score to 99%

---
 modules/local/filtlong/main.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/filtlong/main.nf b/modules/local/filtlong/main.nf
index eda013d0..04aa34c1 100644
--- a/modules/local/filtlong/main.nf
+++ b/modules/local/filtlong/main.nf
@@ -6,10 +6,10 @@ process FILTLONG {
     output:
         tuple val(sample), path("${sample}_filtlong.fastq.gz"), emit: reads
     shell:
-        // Filter reads based on length (min 100 bp) and mean average base quality (min 90%, i.e, a Phred score of 10)
+        // Filter reads based on length (min 100 bp) and mean average base quality (min 99%, i.e, a Phred score of 20)
         '''
         o=!{sample}_filtlong.fastq.gz
         i=!{reads[0]}
-        filtlong --min_length 100 --min_mean_q 90 --verbose ${i} | gzip > ${o}
+        filtlong --min_length 100 --min_mean_q 99 --verbose ${i} | gzip > ${o}
         '''
 }

From 7fd1ee563a81e866545b63ddcd7a19af31ebb205 Mon Sep 17 00:00:00 2001
From: simonleandergrimm <simonleandergrimm@gmail.com>
Date: Tue, 21 Jan 2025 19:09:09 +0000
Subject: [PATCH 23/23] missed adding ont param to tests/run.config. now fixed.

---
 tests/run.config | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/run.config b/tests/run.config
index 6efa41d7..dc1ef93d 100644
--- a/tests/run.config
+++ b/tests/run.config
@@ -7,6 +7,9 @@
 params {
     mode = "run"
 
+    // Sequencing platform
+    ont = false // Whether the sequencing is ONT (true) or Illumina (false)
+
     // Directories
     base_dir = "./" // Parent for working and output directories (can be S3)
     ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow)