From 4fbed83e800ef0e9ae64886b69d4d09912e992c9 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Mon, 9 Dec 2024 00:53:26 +0000 Subject: [PATCH 01/23] Added variable that checks if data is ONT, based on fastq file name. --- configs/read_type.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/read_type.config b/configs/read_type.config index cd55388a..d4b9b988 100644 --- a/configs/read_type.config +++ b/configs/read_type.config @@ -3,4 +3,5 @@ params { // Whether the underlying data is paired-end or single-end single_end = new File(params.sample_sheet).text.readLines()[0].contains('fastq_2') ? false : true -} \ No newline at end of file + ont = new File(params.sample_sheet).text.readLines()[1].contains('ONT') ? true : false +} From 71757bc6ba5fd14b2ca04cec2492253c7442a294 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Mon, 9 Dec 2024 23:27:21 +0000 Subject: [PATCH 02/23] Adding the filtlong process and docker image, --- configs/containers.config | 3 +++ modules/local/filtlong/main.nf | 15 +++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 modules/local/filtlong/main.nf diff --git a/configs/containers.config b/configs/containers.config index dbff9cab..9e2ca712 100644 --- a/configs/containers.config +++ b/configs/containers.config @@ -73,4 +73,7 @@ process { withLabel: fastp { container = "staphb/fastp:0.23.4" } + withLabel: filtlong { + container = "staphb/filtlong:0.2.1" + } } diff --git a/modules/local/filtlong/main.nf b/modules/local/filtlong/main.nf new file mode 100644 index 00000000..230521d4 --- /dev/null +++ b/modules/local/filtlong/main.nf @@ -0,0 +1,15 @@ +process FILTLONG { + label "small" + label "filtlong" + input: + tuple val(sample), path(reads) + output: + tuple val(sample), path("${sample}_filtlong.fastq.gz"), emit: reads + shell: + // Filter reads based on length (min 100 bp) and mean quality (min 99%, i.e, a Phred score of 20) + ''' + o=!{sample}_filtlong.fastq.gz + i=!{reads[0]} + filtlong --min_length 100 --min_mean_q 90 --verbose ${i} | gzip > ${o} + ''' +} \ No newline at end of file From 66569ad86c2995df4bc87ea9cd2a4cb73fd6e3fe Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Mon, 9 Dec 2024 23:31:21 +0000 Subject: [PATCH 03/23] Added code to pull out legnth data from multiqc. --- modules/local/summarizeMultiqcPair/main.nf | 2 +- .../usr/bin/summarize-multiqc-pair.R | 25 +++++++++++++++++-- subworkflows/local/processOutput/main.nf | 5 +++- subworkflows/local/qc/main.nf | 7 ++++-- workflows/run_dev_se.nf | 3 ++- 5 files changed, 35 insertions(+), 7 deletions(-) diff --git a/modules/local/summarizeMultiqcPair/main.nf b/modules/local/summarizeMultiqcPair/main.nf index 18f3fd8c..d5a6b976 100644 --- a/modules/local/summarizeMultiqcPair/main.nf +++ b/modules/local/summarizeMultiqcPair/main.nf @@ -6,7 +6,7 @@ process SUMMARIZE_MULTIQC_PAIR { tuple val(stage), val(sample), path(multiqc_data) val(single_end) output: - tuple path("${stage}_${sample}_qc_basic_stats.tsv.gz"), path("${stage}_${sample}_qc_adapter_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_base_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_sequence_stats.tsv.gz") + tuple path("${stage}_${sample}_qc_basic_stats.tsv.gz"), path("${stage}_${sample}_qc_adapter_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_base_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_sequence_stats.tsv.gz"), path("${stage}_${sample}_qc_length_stats.tsv.gz") shell: ''' summarize-multiqc-pair.R -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD} diff --git a/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R b/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R index 6c1193c1..9b1519d0 100755 --- a/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R +++ b/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R @@ -41,7 +41,7 @@ out_path_basic <- file.path(opt$output_dir, paste0(id_out, "_qc_basic_stats.tsv. out_path_adapters <- file.path(opt$output_dir, paste0(id_out, "_qc_adapter_stats.tsv.gz")) out_path_quality_base <- file.path(opt$output_dir, paste0(id_out, "_qc_quality_base_stats.tsv.gz")) out_path_quality_sequence <- file.path(opt$output_dir, paste0(id_out, "_qc_quality_sequence_stats.tsv.gz")) - +out_path_lengths <- file.path(opt$output_dir, paste0(id_out, "_qc_length_stats.tsv.gz")) #=====================# # AUXILIARY FUNCTIONS # #=====================# @@ -96,6 +96,18 @@ extract_adapter_data_single <- function(adapter_dataset){ separate_wider_delim("filename", " - ", names=c("file", "adapter")) return(data) } + +extract_length_data_single <- function(length_dataset){ + # Convert a single JSON length dataset into a tibble + data <- lapply(1:length(length_dataset$name), function(n) + length_dataset$data[[n]] %>% as.data.frame %>% + mutate(filename=length_dataset$name[n])) %>% + bind_rows() %>% as_tibble %>% + rename(length=V1, n_sequences=V2) %>% + rename(file = filename) + return(data) +} + # NB: Current paired version can't distinguish or annotate forward vs reverse reads in these plots. # TODO: Restore this functionality (will require workflow restructuring). @@ -106,6 +118,13 @@ extract_adapter_data <- function(multiqc_json){ return(data_out) } +extract_length_data <- function(multiqc_json){ + # Extract length data from multiqc JSON + datasets <- multiqc_json$report_plot_data$fastqc_sequence_length_distribution_plot$datasets$lines + data_out <- lapply(datasets, extract_length_data_single) %>% bind_rows() + return(data_out) +} + extract_per_base_quality_single <- function(per_base_quality_dataset){ # Convert a single JSON per-base-quality dataset into a tibble data <- lapply(1:length(per_base_quality_dataset$name), function(n) @@ -153,7 +172,8 @@ fastqc_tsv <- readr::read_tsv(fastqc_tsv_path, show_col_types = FALSE) # Process add_info <- function(tab) mutate(tab, stage=opt$stage, sample=opt$sample) basic_info <- basic_info_fastqc(fastqc_tsv, multiqc_json) %>% add_info -adapters <- extract_adapter_data(multiqc_json) %>% add_info +adapters <- extract_adapter_data(multiqc_json) %>% add_info() +lengths <- extract_length_data(multiqc_json) %>% add_info() per_base_quality <- extract_per_base_quality(multiqc_json) %>% add_info per_sequence_quality <- extract_per_sequence_quality(multiqc_json) %>% add_info @@ -162,3 +182,4 @@ write_tsv(basic_info, out_path_basic) write_tsv(adapters, out_path_adapters) write_tsv(per_base_quality, out_path_quality_base) write_tsv(per_sequence_quality, out_path_quality_sequence) +write_tsv(lengths, out_path_lengths) diff --git a/subworkflows/local/processOutput/main.nf b/subworkflows/local/processOutput/main.nf index c9b4cc4c..9c273a4a 100644 --- a/subworkflows/local/processOutput/main.nf +++ b/subworkflows/local/processOutput/main.nf @@ -6,7 +6,7 @@ include { MERGE_TSVS as MERGE_MULTIQC_BASIC } from "../../../modules/local/merge include { MERGE_TSVS as MERGE_MULTIQC_ADAPT } from "../../../modules/local/mergeTsvs" include { MERGE_TSVS as MERGE_MULTIQC_QBASE } from "../../../modules/local/mergeTsvs" include { MERGE_TSVS as MERGE_MULTIQC_QSEQS } from "../../../modules/local/mergeTsvs" - +include { MERGE_TSVS as MERGE_MULTIQC_LENGTHS } from "../../../modules/local/mergeTsvs" /*********** | WORKFLOW | ***********/ @@ -20,14 +20,17 @@ workflow PROCESS_OUTPUT { multiqc_adapt_ch = multiqc_ch.map{ it[1] }.collect().ifEmpty([]) multiqc_qbase_ch = multiqc_ch.map{ it[2] }.collect().ifEmpty([]) multiqc_qseqs_ch = multiqc_ch.map{ it[3] }.collect().ifEmpty([]) + multiqc_lengths_ch = multiqc_ch.map{ it[4] }.collect().ifEmpty([]) // Merge MultiQC outputs basic_out_ch = MERGE_MULTIQC_BASIC(multiqc_basic_ch, "qc_basic_stats") adapt_out_ch = MERGE_MULTIQC_ADAPT(multiqc_adapt_ch, "qc_adapter_stats") qbase_out_ch = MERGE_MULTIQC_QBASE(multiqc_qbase_ch, "qc_quality_base_stats") qseqs_out_ch = MERGE_MULTIQC_QSEQS(multiqc_qseqs_ch, "qc_quality_sequence_stats") + lengths_out_ch = MERGE_MULTIQC_LENGTHS(multiqc_lengths_ch, "qc_length_stats") emit: basic = basic_out_ch adapt = adapt_out_ch qbase = qbase_out_ch qseqs = qseqs_out_ch + lengths = lengths_out_ch } diff --git a/subworkflows/local/qc/main.nf b/subworkflows/local/qc/main.nf index 8823136f..8a4895e5 100644 --- a/subworkflows/local/qc/main.nf +++ b/subworkflows/local/qc/main.nf @@ -9,7 +9,7 @@ include { MERGE_TSVS as MERGE_MULTIQC_BASIC } from "../../../modules/local/merge include { MERGE_TSVS as MERGE_MULTIQC_ADAPT } from "../../../modules/local/mergeTsvs" include { MERGE_TSVS as MERGE_MULTIQC_QBASE } from "../../../modules/local/mergeTsvs" include { MERGE_TSVS as MERGE_MULTIQC_QSEQS } from "../../../modules/local/mergeTsvs" - +include { MERGE_TSVS as MERGE_MULTIQC_LENGTHS } from "../../../modules/local/mergeTsvs" /*********** | WORKFLOW | ***********/ @@ -33,15 +33,18 @@ workflow QC { multiqc_adapt_ch = process_ch.map{ it[1] }.collect().ifEmpty([]) multiqc_qbase_ch = process_ch.map{ it[2] }.collect().ifEmpty([]) multiqc_qseqs_ch = process_ch.map{ it[3] }.collect().ifEmpty([]) + multiqc_lengths_ch = process_ch.map{ it[4] }.collect().ifEmpty([]) // 5. Merge MultiQC outputs basic_out_ch = MERGE_MULTIQC_BASIC(multiqc_basic_ch, "${stage_label}_qc_basic_stats") adapt_out_ch = MERGE_MULTIQC_ADAPT(multiqc_adapt_ch, "${stage_label}_qc_adapter_stats") qbase_out_ch = MERGE_MULTIQC_QBASE(multiqc_qbase_ch, "${stage_label}_qc_quality_base_stats") qseqs_out_ch = MERGE_MULTIQC_QSEQS(multiqc_qseqs_ch, "${stage_label}_qc_quality_sequence_stats") + lengths_out_ch = MERGE_MULTIQC_LENGTHS(multiqc_lengths_ch, "${stage_label}_qc_length_stats") // 6. Combine outputs into a single output channel out_ch = basic_out_ch.combine(adapt_out_ch) .combine(qbase_out_ch).combine(qseqs_out_ch) - .map({file1, file2, file3, file4 -> tuple(file1, file2, file3, file4)}) + .combine(lengths_out_ch) + .map({file1, file2, file3, file4, file5 -> tuple(file1, file2, file3, file4, file5)}) emit: qc = out_ch } diff --git a/workflows/run_dev_se.nf b/workflows/run_dev_se.nf index 0274e05e..541f308a 100644 --- a/workflows/run_dev_se.nf +++ b/workflows/run_dev_se.nf @@ -93,4 +93,5 @@ workflow RUN_DEV_SE { PROCESS_OUTPUT.out.adapt >> "results" PROCESS_OUTPUT.out.qbase >> "results" PROCESS_OUTPUT.out.qseqs >> "results" -} + PROCESS_OUTPUT.out.lengths >> "results" +} \ No newline at end of file From 09eaf8f62781c3d54ec02e3db266a16358fa178d Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Mon, 9 Dec 2024 23:32:14 +0000 Subject: [PATCH 04/23] Adding filtlong to the Cleaning step. --- subworkflows/local/clean/main.nf | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/clean/main.nf b/subworkflows/local/clean/main.nf index 9f1e638e..4436b5a7 100644 --- a/subworkflows/local/clean/main.nf +++ b/subworkflows/local/clean/main.nf @@ -7,7 +7,11 @@ ***************************/ include { QC } from "../../../subworkflows/local/qc" -include { FASTP } from "../../../modules/local/fastp" +if (params.ont) { + include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong" +} else { + include { FASTP as FILTER_READS } from "../../../modules/local/fastp" +} /*********** | WORKFLOW | @@ -22,9 +26,13 @@ workflow CLEAN { stage_label single_end main: - fastp_ch = FASTP(reads_ch, adapter_path, single_end) - qc_ch = QC(fastp_ch.reads, fastqc_cpus, fastqc_mem, stage_label, single_end) + if (params.ont) { + filter_ch = FILTER_READS(reads_ch) + } else { + filter_ch = FILTER_READS(reads_ch, adapter_path, single_end) + } + qc_ch = QC(filter_ch.reads, fastqc_cpus, fastqc_mem, stage_label, single_end) emit: - reads = fastp_ch.reads + reads = filter_ch.reads qc = qc_ch.qc } \ No newline at end of file From 2e039dff6b7f9ec28c6d64c93f511a8bbd6a2297 Mon Sep 17 00:00:00 2001 From: simonleandergrimm <58591538+simonleandergrimm@users.noreply.github.com> Date: Mon, 16 Dec 2024 09:26:59 -0500 Subject: [PATCH 05/23] Update main.nf --- modules/local/filtlong/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/filtlong/main.nf b/modules/local/filtlong/main.nf index 230521d4..eda013d0 100644 --- a/modules/local/filtlong/main.nf +++ b/modules/local/filtlong/main.nf @@ -6,10 +6,10 @@ process FILTLONG { output: tuple val(sample), path("${sample}_filtlong.fastq.gz"), emit: reads shell: - // Filter reads based on length (min 100 bp) and mean quality (min 99%, i.e, a Phred score of 20) + // Filter reads based on length (min 100 bp) and mean average base quality (min 90%, i.e, a Phred score of 10) ''' o=!{sample}_filtlong.fastq.gz i=!{reads[0]} filtlong --min_length 100 --min_mean_q 90 --verbose ${i} | gzip > ${o} ''' -} \ No newline at end of file +} From e1459018323645cf5b724c4d98d94923555f9961 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Wed, 18 Dec 2024 20:48:32 +0000 Subject: [PATCH 06/23] edited FASTP logic to take into account fastp_single and fastp_paired --- subworkflows/local/clean/main.nf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/clean/main.nf b/subworkflows/local/clean/main.nf index 4436b5a7..8de37022 100644 --- a/subworkflows/local/clean/main.nf +++ b/subworkflows/local/clean/main.nf @@ -10,7 +10,11 @@ include { QC } from "../../../subworkflows/local/qc" if (params.ont) { include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong" } else { - include { FASTP as FILTER_READS } from "../../../modules/local/fastp" + if (params.single_end) { + include { FASTP_SINGLE as FILTER_READS } from "../../../modules/local/fastp" + } else { + include { FASTP_PAIRED as FILTER_READS } from "../../../modules/local/fastp" + } } /*********** From 34793cbd1fcff0ecfc01d1972c3a1720ff8993c5 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Wed, 18 Dec 2024 20:55:12 +0000 Subject: [PATCH 07/23] Renamed summarize_multiqc processes/folders/scripts to not have pair in the name. --- .../local/{summarizeMultiqcPair => summarizeMultiqc}/main.nf | 4 ++-- .../resources/usr/bin/summarize-multiqc.R} | 0 subworkflows/local/qc/main.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename modules/local/{summarizeMultiqcPair => summarizeMultiqc}/main.nf (78%) rename modules/local/{summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R => summarizeMultiqc/resources/usr/bin/summarize-multiqc.R} (100%) diff --git a/modules/local/summarizeMultiqcPair/main.nf b/modules/local/summarizeMultiqc/main.nf similarity index 78% rename from modules/local/summarizeMultiqcPair/main.nf rename to modules/local/summarizeMultiqc/main.nf index d5a6b976..e224349a 100644 --- a/modules/local/summarizeMultiqcPair/main.nf +++ b/modules/local/summarizeMultiqc/main.nf @@ -1,5 +1,5 @@ // Extract paired MultiQC data into a more usable form -process SUMMARIZE_MULTIQC_PAIR { +process SUMMARIZE_MULTIQC { label "R" label "single" input: @@ -9,6 +9,6 @@ process SUMMARIZE_MULTIQC_PAIR { tuple path("${stage}_${sample}_qc_basic_stats.tsv.gz"), path("${stage}_${sample}_qc_adapter_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_base_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_sequence_stats.tsv.gz"), path("${stage}_${sample}_qc_length_stats.tsv.gz") shell: ''' - summarize-multiqc-pair.R -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD} + summarize-multiqcR -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD} ''' } \ No newline at end of file diff --git a/modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R b/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R similarity index 100% rename from modules/local/summarizeMultiqcPair/resources/usr/bin/summarize-multiqc-pair.R rename to modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R diff --git a/subworkflows/local/qc/main.nf b/subworkflows/local/qc/main.nf index 8a4895e5..065c6b93 100644 --- a/subworkflows/local/qc/main.nf +++ b/subworkflows/local/qc/main.nf @@ -4,7 +4,7 @@ include { FASTQC_LABELED } from "../../../modules/local/fastqc" include { MULTIQC_LABELED } from "../../../modules/local/multiqc" -include { SUMMARIZE_MULTIQC_PAIR } from "../../../modules/local/summarizeMultiqcPair" +include { SUMMARIZE_MULTIQC } from "../../../modules/local/summarizeMultiqc" include { MERGE_TSVS as MERGE_MULTIQC_BASIC } from "../../../modules/local/mergeTsvs" include { MERGE_TSVS as MERGE_MULTIQC_ADAPT } from "../../../modules/local/mergeTsvs" include { MERGE_TSVS as MERGE_MULTIQC_QBASE } from "../../../modules/local/mergeTsvs" From 22073c19b6707968719a673fe6d9042c40158ca9 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Wed, 18 Dec 2024 21:04:07 +0000 Subject: [PATCH 08/23] Adopting testing set up from profile ont branch --- test-data/ont-samplesheet.csv | 2 ++ tests/main.nf.test | 10 ++++++++++ tests/run_dev_ont.config | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 test-data/ont-samplesheet.csv create mode 100644 tests/run_dev_ont.config diff --git a/test-data/ont-samplesheet.csv b/test-data/ont-samplesheet.csv new file mode 100644 index 00000000..52e157af --- /dev/null +++ b/test-data/ont-samplesheet.csv @@ -0,0 +1,2 @@ +sample,fastq +NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-restricted/NAO-ONT-20240710-WW-RNA1/raw/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz \ No newline at end of file diff --git a/tests/main.nf.test b/tests/main.nf.test index 925af3cc..be793356 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -27,6 +27,16 @@ nextflow_pipeline { assert workflow.success } } + + test("Test Oxford Nanopore run workflow") { + config "tests/run_dev_ont.config" + tag "run_dev_ont" + + then { + assert workflow.success + } + } + test("Test validation workflow") { config "tests/run_validation.config" tag "validation" diff --git a/tests/run_dev_ont.config b/tests/run_dev_ont.config new file mode 100644 index 00000000..4e1f84b2 --- /dev/null +++ b/tests/run_dev_ont.config @@ -0,0 +1,34 @@ +/************************************************ +| CONFIGURATION FILE FOR NAO VIRAL MGS WORKFLOW | +************************************************/ + +params { + mode = "run_dev_se" + + // Directories + base_dir = "./" // Parent for working and output directories (can be S3) + ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow) + + // Files + sample_sheet = "${projectDir}/test-data/ont-samplesheet.csv" // Path to library TSV + adapters = "${projectDir}/ref/adapters.fasta" // Path to adapter file for adapter trimming + + // Numerical + human_read_filtering = true // Whether to filter human reads + grouping = false // Whether to group samples by 'group' column in samplesheet + n_reads_trunc = 0 // Number of reads per sample to run through pipeline (0 = all reads) + n_reads_profile = 1000000 // Number of reads per sample to run through taxonomic profiling + bt2_score_threshold = 20 // Normalized score threshold for HV calling (typically 15 or 20) + blast_hv_fraction = 0 // Fraction of putative HV reads to BLAST vs nt (0 = don't run BLAST) + kraken_memory = "128 GB" // Memory needed to safely load Kraken DB + quality_encoding = "phred33" // FASTQ quality encoding (probably phred33, maybe phred64) + fuzzy_match_alignment_duplicates = 0 // Fuzzy matching the start coordinate of reads for identification of duplicates through alignment (0 = exact matching; options are 0, 1, or 2) + host_taxon = "vertebrate" + + blast_db_prefix = "nt_others" +} + +includeConfig "${projectDir}/configs/containers.config" +includeConfig "${projectDir}/configs/profiles.config" +includeConfig "${projectDir}/configs/read_type.config" +includeConfig "${projectDir}/configs/output.config" From 3ce9cc68cac07b1601e237d1afb7a2794b761b94 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Wed, 18 Dec 2024 21:28:32 +0000 Subject: [PATCH 09/23] Fixed summarize multiqc typo and uneeded single-end variable for fastp --- modules/local/summarizeMultiqc/main.nf | 2 +- subworkflows/local/clean/main.nf | 2 +- subworkflows/local/qc/main.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/summarizeMultiqc/main.nf b/modules/local/summarizeMultiqc/main.nf index e224349a..b8cc845c 100644 --- a/modules/local/summarizeMultiqc/main.nf +++ b/modules/local/summarizeMultiqc/main.nf @@ -9,6 +9,6 @@ process SUMMARIZE_MULTIQC { tuple path("${stage}_${sample}_qc_basic_stats.tsv.gz"), path("${stage}_${sample}_qc_adapter_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_base_stats.tsv.gz"), path("${stage}_${sample}_qc_quality_sequence_stats.tsv.gz"), path("${stage}_${sample}_qc_length_stats.tsv.gz") shell: ''' - summarize-multiqcR -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD} + summarize-multiqc.R -i !{multiqc_data} -s !{stage} -S !{sample} -r !{single_end} -o ${PWD} ''' } \ No newline at end of file diff --git a/subworkflows/local/clean/main.nf b/subworkflows/local/clean/main.nf index 8de37022..c9f844b7 100644 --- a/subworkflows/local/clean/main.nf +++ b/subworkflows/local/clean/main.nf @@ -33,7 +33,7 @@ workflow CLEAN { if (params.ont) { filter_ch = FILTER_READS(reads_ch) } else { - filter_ch = FILTER_READS(reads_ch, adapter_path, single_end) + filter_ch = FILTER_READS(reads_ch, adapter_path) } qc_ch = QC(filter_ch.reads, fastqc_cpus, fastqc_mem, stage_label, single_end) emit: diff --git a/subworkflows/local/qc/main.nf b/subworkflows/local/qc/main.nf index 065c6b93..4474bcf7 100644 --- a/subworkflows/local/qc/main.nf +++ b/subworkflows/local/qc/main.nf @@ -27,7 +27,7 @@ workflow QC { // 2. Extract data with MultiQC for each read file / pair of read files multiqc_ch = MULTIQC_LABELED(stage_label, fastqc_ch.zip) // 3. Summarize MultiQC information for each read file / pair of read files - process_ch = SUMMARIZE_MULTIQC_PAIR(multiqc_ch.data, single_end) + process_ch = SUMMARIZE_MULTIQC(multiqc_ch.data, single_end) // 4. Collate MultiQC outputs multiqc_basic_ch = process_ch.map{ it[0] }.collect().ifEmpty([]) multiqc_adapt_ch = process_ch.map{ it[1] }.collect().ifEmpty([]) From cfd0ddbee89ea74ecc909eaa375d1c0844b7f8a5 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Wed, 18 Dec 2024 21:31:59 +0000 Subject: [PATCH 10/23] dropped unneeded brackets. --- .../summarizeMultiqc/resources/usr/bin/summarize-multiqc.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R b/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R index 9b1519d0..a6eba02e 100755 --- a/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R +++ b/modules/local/summarizeMultiqc/resources/usr/bin/summarize-multiqc.R @@ -172,8 +172,8 @@ fastqc_tsv <- readr::read_tsv(fastqc_tsv_path, show_col_types = FALSE) # Process add_info <- function(tab) mutate(tab, stage=opt$stage, sample=opt$sample) basic_info <- basic_info_fastqc(fastqc_tsv, multiqc_json) %>% add_info -adapters <- extract_adapter_data(multiqc_json) %>% add_info() -lengths <- extract_length_data(multiqc_json) %>% add_info() +adapters <- extract_adapter_data(multiqc_json) %>% add_info +lengths <- extract_length_data(multiqc_json) %>% add_info per_base_quality <- extract_per_base_quality(multiqc_json) %>% add_info per_sequence_quality <- extract_per_sequence_quality(multiqc_json) %>% add_info From bbd108bc34626211bc37a45318cb1754d4e0f4d9 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Fri, 20 Dec 2024 15:32:53 +0000 Subject: [PATCH 11/23] Added test config for ONT --- tests/run_dev_ont.config | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/run_dev_ont.config b/tests/run_dev_ont.config index 4e1f84b2..9ab5031d 100644 --- a/tests/run_dev_ont.config +++ b/tests/run_dev_ont.config @@ -11,7 +11,6 @@ params { // Files sample_sheet = "${projectDir}/test-data/ont-samplesheet.csv" // Path to library TSV - adapters = "${projectDir}/ref/adapters.fasta" // Path to adapter file for adapter trimming // Numerical human_read_filtering = true // Whether to filter human reads From c9b72c43fb0ca39e49bc58ca060e5ad447f0fa6c Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Fri, 20 Dec 2024 15:33:41 +0000 Subject: [PATCH 12/23] Added ONT run to dev --- .github/workflows/end-to-end-se.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/end-to-end-se.yml b/.github/workflows/end-to-end-se.yml index d723dd68..031074cd 100644 --- a/.github/workflows/end-to-end-se.yml +++ b/.github/workflows/end-to-end-se.yml @@ -28,5 +28,8 @@ jobs: wget -qO- https://get.nf-test.com | bash sudo mv nf-test /usr/local/bin/ - - name: Run run_dev_se workflow - run: nf-test test --tag run_dev_se --verbose \ No newline at end of file + - name: Run run_dev_se workflow on single-end data + run: nf-test test --tag run_dev_se --verbose + + - name: Run run_dev_se workflow on ONT data + run: nf-test test --tag run_dev_se_ont --verbose \ No newline at end of file From 5b877edd07cd35f49b63967cf22363d027d9266e Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Tue, 7 Jan 2025 22:06:30 +0000 Subject: [PATCH 13/23] testing new tidyverse container --- configs/containers.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/containers.config b/configs/containers.config index 9e2ca712..b0324890 100644 --- a/configs/containers.config +++ b/configs/containers.config @@ -55,7 +55,7 @@ process { container = "securebio/nao-pypkg" } withLabel: tidyverse { - container = "rocker/tidyverse:4.4.1" + container = "rocker/tidyverse:4.4.2" } withLabel: R { container = "securebio/nao-rpkg" From f75db731f535ebb287f8e5feb139d72a142fe076 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Tue, 7 Jan 2025 22:56:56 +0000 Subject: [PATCH 14/23] fixed idnent in end-to-end-se.yml --- .github/workflows/end-to-end-se.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/end-to-end-se.yml b/.github/workflows/end-to-end-se.yml index 031074cd..66b41924 100644 --- a/.github/workflows/end-to-end-se.yml +++ b/.github/workflows/end-to-end-se.yml @@ -32,4 +32,4 @@ jobs: run: nf-test test --tag run_dev_se --verbose - name: Run run_dev_se workflow on ONT data - run: nf-test test --tag run_dev_se_ont --verbose \ No newline at end of file + run: nf-test test --tag run_dev_se_ont --verbose \ No newline at end of file From 7a4191698b07a26f7102dc0d635f93c91bb149f0 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Wed, 15 Jan 2025 23:50:35 +0000 Subject: [PATCH 15/23] Dropping ont from read_type.config --- configs/read_type.config | 1 - 1 file changed, 1 deletion(-) diff --git a/configs/read_type.config b/configs/read_type.config index d4b9b988..88aea81a 100644 --- a/configs/read_type.config +++ b/configs/read_type.config @@ -3,5 +3,4 @@ params { // Whether the underlying data is paired-end or single-end single_end = new File(params.sample_sheet).text.readLines()[0].contains('fastq_2') ? false : true - ont = new File(params.sample_sheet).text.readLines()[1].contains('ONT') ? true : false } From 1df469943bb13a6bd9cdfb404e121d5f8af85dfd Mon Sep 17 00:00:00 2001 From: simonleandergrimm <58591538+simonleandergrimm@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:26:13 -0500 Subject: [PATCH 16/23] Update run_validation.config --- configs/run_validation.config | 1 - 1 file changed, 1 deletion(-) diff --git a/configs/run_validation.config b/configs/run_validation.config index 9d351024..1ebdebdb 100644 --- a/configs/run_validation.config +++ b/configs/run_validation.config @@ -24,5 +24,4 @@ includeConfig "${projectDir}/configs/containers.config" includeConfig "${projectDir}/configs/resources.config" includeConfig "${projectDir}/configs/profiles.config" includeConfig "${projectDir}/configs/output.config" -includeConfig "${projectDir}/configs/read_type.config" process.queue = "will-batch-queue" // AWS Batch job queue From 0f5f68df93179f5c51f066dd01ed2c5b9852d7a8 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Thu, 16 Jan 2025 00:07:25 +0000 Subject: [PATCH 17/23] Fixed tag in end-to-end-se.yml for the ont test run. Added a manual parameter for data being ONT. Added adapters to the ONT file so it doesn't break. --- .github/workflows/end-to-end-se.yml | 2 +- configs/run.config | 3 +++ configs/run_dev_se.config | 3 +++ test-data/nextflow.config | 5 +++-- tests/run_dev_ont.config | 4 ++++ tests/run_dev_se.config | 3 +++ 6 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/end-to-end-se.yml b/.github/workflows/end-to-end-se.yml index 66b41924..ac04e944 100644 --- a/.github/workflows/end-to-end-se.yml +++ b/.github/workflows/end-to-end-se.yml @@ -32,4 +32,4 @@ jobs: run: nf-test test --tag run_dev_se --verbose - name: Run run_dev_se workflow on ONT data - run: nf-test test --tag run_dev_se_ont --verbose \ No newline at end of file + run: nf-test test --tag run_dev_ont --verbose \ No newline at end of file diff --git a/configs/run.config b/configs/run.config index 09cbd5d3..34777c3a 100644 --- a/configs/run.config +++ b/configs/run.config @@ -5,6 +5,9 @@ params { mode = "run" + // Sequencing platform + ont = false // Whether the sequencing is ONT (true) or Illumina (false) + // Directories base_dir = "s3://nao-mgs-wb/test-batch" // Parent for working and output directories (can be S3) ref_dir = "s3://nao-mgs-wb/index/20241209/output" // Reference/index directory (generated by index workflow) diff --git a/configs/run_dev_se.config b/configs/run_dev_se.config index 7414ea35..58371e4e 100644 --- a/configs/run_dev_se.config +++ b/configs/run_dev_se.config @@ -5,6 +5,9 @@ params { mode = "run_dev_se" + // Sequencing platform + ont = false // Whether the sequencing is ONT (true) or Illumina (false) + // Directories base_dir = "s3://nao-mgs-simon/test_single_read" // Parent for working and output directories (can be S3) ref_dir = "s3://nao-mgs-wb/index/20241209/output" // Reference/index directory (generated by index workflow) diff --git a/test-data/nextflow.config b/test-data/nextflow.config index 191f7a20..34777c3a 100644 --- a/test-data/nextflow.config +++ b/test-data/nextflow.config @@ -5,12 +5,13 @@ params { mode = "run" + // Sequencing platform + ont = false // Whether the sequencing is ONT (true) or Illumina (false) + // Directories base_dir = "s3://nao-mgs-wb/test-batch" // Parent for working and output directories (can be S3) ref_dir = "s3://nao-mgs-wb/index/20241209/output" // Reference/index directory (generated by index workflow) - - // Files sample_sheet = "${launchDir}/samplesheet.csv" // Path to library TSV adapters = "${projectDir}/ref/adapters.fasta" // Path to adapter file for adapter trimming diff --git a/tests/run_dev_ont.config b/tests/run_dev_ont.config index 9ab5031d..e36229cc 100644 --- a/tests/run_dev_ont.config +++ b/tests/run_dev_ont.config @@ -5,12 +5,16 @@ params { mode = "run_dev_se" + // Sequencing platform + ont = true // Whether the sequencing is ONT (true) or Illumina (false) + // Directories base_dir = "./" // Parent for working and output directories (can be S3) ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow) // Files sample_sheet = "${projectDir}/test-data/ont-samplesheet.csv" // Path to library TSV + adapters = "${projectDir}/ref/adapters.fasta" // Path to adapter file for adapter trimming. Not used for ONT. // Numerical human_read_filtering = true // Whether to filter human reads diff --git a/tests/run_dev_se.config b/tests/run_dev_se.config index 34b72954..4d7ff50e 100644 --- a/tests/run_dev_se.config +++ b/tests/run_dev_se.config @@ -5,6 +5,9 @@ params { mode = "run_dev_se" + // Sequencing platform + ont = false // Whether the sequencing is ONT (true) or Illumina (false) + // Directories base_dir = "./" // Parent for working and output directories (can be S3) ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow) From 919b3c48ee504dc471d489213baec07b6beb52fe Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Fri, 17 Jan 2025 20:26:39 +0000 Subject: [PATCH 18/23] moved location of ONT WW test data --- test-data/ont-samplesheet.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-data/ont-samplesheet.csv b/test-data/ont-samplesheet.csv index 52e157af..3cd3ac05 100644 --- a/test-data/ont-samplesheet.csv +++ b/test-data/ont-samplesheet.csv @@ -1,2 +1,2 @@ sample,fastq -NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-restricted/NAO-ONT-20240710-WW-RNA1/raw/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz \ No newline at end of file +NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-mgs-simon/ont-ww-test/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz \ No newline at end of file From 58416f0e6e285825ee7c55495be397730920d547 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Fri, 17 Jan 2025 20:37:30 +0000 Subject: [PATCH 19/23] Adding qc lengths to runQc. Adding filtlong to subsetTrim. --- subworkflows/local/runQc/main.nf | 4 ++++ subworkflows/local/subsetTrim/main.nf | 23 ++++++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/runQc/main.nf b/subworkflows/local/runQc/main.nf index 3db2f274..fa71c1ad 100644 --- a/subworkflows/local/runQc/main.nf +++ b/subworkflows/local/runQc/main.nf @@ -8,6 +8,7 @@ include { MERGE_TSVS as MERGE_MULTIQC_BASIC } from "../../../modules/local/merge include { MERGE_TSVS as MERGE_MULTIQC_ADAPT } from "../../../modules/local/mergeTsvs" include { MERGE_TSVS as MERGE_MULTIQC_QBASE } from "../../../modules/local/mergeTsvs" include { MERGE_TSVS as MERGE_MULTIQC_QSEQS } from "../../../modules/local/mergeTsvs" +include { MERGE_TSVS as MERGE_MULTIQC_LENGTHS } from "../../../modules/local/mergeTsvs" /*********** | WORKFLOW | @@ -31,14 +32,17 @@ workflow RUN_QC { multiqc_adapt_ch = qc_ch.map{ it[1] }.collect().ifEmpty([]) multiqc_qbase_ch = qc_ch.map{ it[2] }.collect().ifEmpty([]) multiqc_qseqs_ch = qc_ch.map{ it[3] }.collect().ifEmpty([]) + multiqc_lengths_ch = qc_ch.map{ it[4] }.collect().ifEmpty([]) // 4. Merge MultiQC outputs basic_out_ch = MERGE_MULTIQC_BASIC(multiqc_basic_ch, "qc_basic_stats") adapt_out_ch = MERGE_MULTIQC_ADAPT(multiqc_adapt_ch, "qc_adapter_stats") qbase_out_ch = MERGE_MULTIQC_QBASE(multiqc_qbase_ch, "qc_quality_base_stats") qseqs_out_ch = MERGE_MULTIQC_QSEQS(multiqc_qseqs_ch, "qc_quality_sequence_stats") + lengths_out_ch = MERGE_MULTIQC_LENGTHS(multiqc_lengths_ch, "qc_length_stats") emit: qc_basic = basic_out_ch qc_adapt = adapt_out_ch qc_qbase = qbase_out_ch qc_qseqs = qseqs_out_ch + qc_lengths = lengths_out_ch } diff --git a/subworkflows/local/subsetTrim/main.nf b/subworkflows/local/subsetTrim/main.nf index 0fcfc130..06148d39 100644 --- a/subworkflows/local/subsetTrim/main.nf +++ b/subworkflows/local/subsetTrim/main.nf @@ -6,14 +6,31 @@ if (params.single_end) { include { SUBSET_READS_SINGLE_TARGET as SUBSET_READS_TARGET } from "../../../modules/local/subsetReads" include { CONCAT_GROUP_SINGLE as CONCAT_GROUP } from "../../../modules/local/concatGroup" include { SUBSET_READS_SINGLE_TARGET; SUBSET_READS_SINGLE_TARGET as SUBSET_READS_TARGET_GROUP } from "../../../modules/local/subsetReads" - include { FASTP_SINGLE as FASTP } from "../../../modules/local/fastp" + if (params.ont) { + include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong" + } else { + include { FASTP_SINGLE as FILTER_READS } from "../../../modules/local/fastp" + } + } else { include { SUBSET_READS_PAIRED_TARGET as SUBSET_READS_TARGET } from "../../../modules/local/subsetReads" include { SUBSET_READS_PAIRED_TARGET; SUBSET_READS_PAIRED_TARGET as SUBSET_READS_TARGET_GROUP } from "../../../modules/local/subsetReads" include { CONCAT_GROUP_PAIRED as CONCAT_GROUP } from "../../../modules/local/concatGroup" - include { FASTP_PAIRED as FASTP } from "../../../modules/local/fastp" + include { FASTP_PAIRED as FILTER_READS } from "../../../modules/local/fastp" +} + + +if (params.ont) { + include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong" +} else { + if (params.single_end) { + include { FASTP_SINGLE as FILTER_READS } from "../../../modules/local/fastp" + } else { + include { FASTP_PAIRED as FILTER_READS } from "../../../modules/local/fastp" + } } + /*********** | WORKFLOW | ***********/ @@ -60,7 +77,7 @@ workflow SUBSET_TRIM { } // Call fastp adapter trimming - fastp_ch = FASTP(grouped_ch, adapter_path) + fastp_ch = FILTER_READS(grouped_ch, adapter_path) emit: subset_reads = grouped_ch trimmed_subset_reads = fastp_ch.reads From 2224711bc23c48f8350f1072c8e90c6548ac1fc7 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Fri, 17 Jan 2025 21:06:37 +0000 Subject: [PATCH 20/23] fixed multiqc output styling. --- subworkflows/local/subsetTrim/main.nf | 19 ++++++------------- workflows/run.nf | 1 + workflows/run_dev_se.nf | 7 +------ 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/subworkflows/local/subsetTrim/main.nf b/subworkflows/local/subsetTrim/main.nf index 06148d39..b609cf3a 100644 --- a/subworkflows/local/subsetTrim/main.nf +++ b/subworkflows/local/subsetTrim/main.nf @@ -20,17 +20,6 @@ if (params.single_end) { } -if (params.ont) { - include { FILTLONG as FILTER_READS } from "../../../modules/local/filtlong" -} else { - if (params.single_end) { - include { FASTP_SINGLE as FILTER_READS } from "../../../modules/local/fastp" - } else { - include { FASTP_PAIRED as FILTER_READS } from "../../../modules/local/fastp" - } -} - - /*********** | WORKFLOW | ***********/ @@ -77,8 +66,12 @@ workflow SUBSET_TRIM { } // Call fastp adapter trimming - fastp_ch = FILTER_READS(grouped_ch, adapter_path) + if (params.ont) { + trimmed_ch = FILTER_READS(grouped_ch) + } else { + trimmed_ch = FILTER_READS(grouped_ch, adapter_path) + } emit: subset_reads = grouped_ch - trimmed_subset_reads = fastp_ch.reads + trimmed_subset_reads = trimmed_ch.reads } diff --git a/workflows/run.nf b/workflows/run.nf index 53fdd070..fb7a0f0a 100644 --- a/workflows/run.nf +++ b/workflows/run.nf @@ -90,6 +90,7 @@ workflow RUN { RUN_QC.out.qc_adapt >> "results" RUN_QC.out.qc_qbase >> "results" RUN_QC.out.qc_qseqs >> "results" + RUN_QC.out.qc_lengths >> "results" // Final results EXTRACT_VIRAL_READS.out.tsv >> "results" EXTRACT_VIRAL_READS.out.counts >> "results" diff --git a/workflows/run_dev_se.nf b/workflows/run_dev_se.nf index ff3d54b5..f04bc2ab 100644 --- a/workflows/run_dev_se.nf +++ b/workflows/run_dev_se.nf @@ -62,18 +62,13 @@ workflow RUN_DEV_SE { time_ch >> "logging" version_ch >> "logging" // QC - PROCESS_OUTPUT.out.basic >> "results" - PROCESS_OUTPUT.out.adapt >> "results" - PROCESS_OUTPUT.out.qbase >> "results" - PROCESS_OUTPUT.out.qseqs >> "results" - PROCESS_OUTPUT.out.lengths >> "results" COUNT_TOTAL_READS.out.read_counts >> "results" RUN_QC.out.qc_basic >> "results" RUN_QC.out.qc_adapt >> "results" RUN_QC.out.qc_qbase >> "results" RUN_QC.out.qc_qseqs >> "results" + RUN_QC.out.qc_lengths >> "results" // Final results PROFILE.out.bracken >> "results" PROFILE.out.kraken >> "results" } - From b6868c1d775db404e3a59ffe316a18816b919869 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Mon, 20 Jan 2025 21:24:41 +0000 Subject: [PATCH 21/23] fixed address of ont ww test data. --- test-data/ont-samplesheet.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-data/ont-samplesheet.csv b/test-data/ont-samplesheet.csv index 3cd3ac05..60339aff 100644 --- a/test-data/ont-samplesheet.csv +++ b/test-data/ont-samplesheet.csv @@ -1,2 +1,2 @@ sample,fastq -NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-mgs-simon/ont-ww-test/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz \ No newline at end of file +NAO-ONT-20240710-WW-RNA1-div0000,s3://nao-testing/ont-ww-test/NAO-ONT-20240710-WW-RNA1-div0000.fastq.gz \ No newline at end of file From 0b9672d46efd5387ee500a112a90b335968c4c66 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Tue, 21 Jan 2025 16:29:46 +0000 Subject: [PATCH 22/23] Increased fitlong mean quality score to 99% --- modules/local/filtlong/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/filtlong/main.nf b/modules/local/filtlong/main.nf index eda013d0..04aa34c1 100644 --- a/modules/local/filtlong/main.nf +++ b/modules/local/filtlong/main.nf @@ -6,10 +6,10 @@ process FILTLONG { output: tuple val(sample), path("${sample}_filtlong.fastq.gz"), emit: reads shell: - // Filter reads based on length (min 100 bp) and mean average base quality (min 90%, i.e, a Phred score of 10) + // Filter reads based on length (min 100 bp) and mean average base quality (min 99%, i.e, a Phred score of 20) ''' o=!{sample}_filtlong.fastq.gz i=!{reads[0]} - filtlong --min_length 100 --min_mean_q 90 --verbose ${i} | gzip > ${o} + filtlong --min_length 100 --min_mean_q 99 --verbose ${i} | gzip > ${o} ''' } From 7fd1ee563a81e866545b63ddcd7a19af31ebb205 Mon Sep 17 00:00:00 2001 From: simonleandergrimm Date: Tue, 21 Jan 2025 19:09:09 +0000 Subject: [PATCH 23/23] missed adding ont param to tests/run.config. now fixed. --- tests/run.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/run.config b/tests/run.config index 6efa41d7..dc1ef93d 100644 --- a/tests/run.config +++ b/tests/run.config @@ -7,6 +7,9 @@ params { mode = "run" + // Sequencing platform + ont = false // Whether the sequencing is ONT (true) or Illumina (false) + // Directories base_dir = "./" // Parent for working and output directories (can be S3) ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow)