From ad8917263f96b1ad453761d5e9ea6f65b9e3a4e3 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 14 Oct 2024 08:19:18 +0200 Subject: [PATCH 01/14] install chopper from nf-core/chopper --- modules.json | 5 ++ modules/nf-core/chopper/environment.yml | 5 ++ modules/nf-core/chopper/main.nf | 42 +++++++++++++ modules/nf-core/chopper/meta.yml | 60 +++++++++++++++++++ modules/nf-core/chopper/tests/main.nf.test | 45 ++++++++++++++ .../nf-core/chopper/tests/main.nf.test.snap | 16 +++++ modules/nf-core/chopper/tests/tags.yml | 2 + 7 files changed, 175 insertions(+) create mode 100644 modules/nf-core/chopper/environment.yml create mode 100644 modules/nf-core/chopper/main.nf create mode 100644 modules/nf-core/chopper/meta.yml create mode 100644 modules/nf-core/chopper/tests/main.nf.test create mode 100644 modules/nf-core/chopper/tests/main.nf.test.snap create mode 100644 modules/nf-core/chopper/tests/tags.yml diff --git a/modules.json b/modules.json index a72556a7..40573cbc 100644 --- a/modules.json +++ b/modules.json @@ -62,6 +62,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "chopper": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "concoct/concoct": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", diff --git a/modules/nf-core/chopper/environment.yml b/modules/nf-core/chopper/environment.yml new file mode 100644 index 00000000..e80840e1 --- /dev/null +++ b/modules/nf-core/chopper/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::chopper=0.3.0 diff --git a/modules/nf-core/chopper/main.nf b/modules/nf-core/chopper/main.nf new file mode 100644 index 00000000..06f79849 --- /dev/null +++ b/modules/nf-core/chopper/main.nf @@ -0,0 +1,42 @@ +process CHOPPER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/chopper:0.3.0--hd03093a_0': + 'biocontainers/chopper:0.3.0--hd03093a_0' }" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.fastq.gz") , emit: fastq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if ("$fastq" == "${prefix}.fastq.gz") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + zcat \\ + $args \\ + $fastq | \\ + chopper \\ + --threads $task.cpus \\ + $args2 | \\ + gzip \\ + $args3 > ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/chopper/meta.yml b/modules/nf-core/chopper/meta.yml new file mode 100644 index 00000000..916c865e --- /dev/null +++ b/modules/nf-core/chopper/meta.yml @@ -0,0 +1,60 @@ +name: "chopper" +description: Filter and trim long read data. +keywords: + - filter + - trimming + - fastq + - nanopore + - qc +tools: + - "zcat": + description: "zcat uncompresses either a list of files on the command line or + its standard input and writes the uncompressed data on standard output." + documentation: "https://linux.die.net/man/1/zcat" + args_id: "$args" + identifier: "" + - "chopper": + description: "A rust command line for filtering and trimming long reads." + homepage: "https://github.com/wdecoster/chopper" + documentation: "https://github.com/wdecoster/chopper" + tool_dev_url: "https://github.com/wdecoster/chopper" + doi: "10.1093/bioinformatics/bty149" + licence: ["MIT"] + args_id: "$args2" + identifier: "" + - "gzip": + description: "Gzip reduces the size of the named files using Lempel-Ziv coding + (LZ77)." + documentation: "https://linux.die.net/man/1/gzip" + args_id: "$args3" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: FastQ with reads from long read sequencing e.g. PacBio or ONT + pattern: "*.{fastq.gz}" +output: + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: Filtered and trimmed FastQ file + pattern: "*.{fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FynnFreyer" +maintainers: + - "@FynnFreyer" diff --git a/modules/nf-core/chopper/tests/main.nf.test b/modules/nf-core/chopper/tests/main.nf.test new file mode 100644 index 00000000..ee195b5f --- /dev/null +++ b/modules/nf-core/chopper/tests/main.nf.test @@ -0,0 +1,45 @@ +nextflow_process { + + name "Test Process CHOPPER" + script "../main.nf" + process "CHOPPER" + tag "chopper" + tag "modules" + tag "modules_nfcore" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test_out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + + def fastq_content = path(process.out.fastq.get(0).get(1)).linesGzip + + assertAll( + { assert process.success }, + // original pytest checks + { assert process.out.fastq.get(0).get(1) ==~ ".*/test_out.fastq.gz" }, + { assert fastq_content.contains("@2109d790-67ec-4fd1-8931-6c7e61908ff3 runid=97ca62ca093ff43533aa34c38a10b1d6325e7e7b read=52274 ch=243 start_time=2021-02-05T23:27:30Z flow_cell_id=FAP51364 protocol_group_id=data sample_id=RN20097 barcode=barcode01 barcode_alias=barcode01")}, + // additional nf-test checks + // Order of reads is not deterministic, so only assess whether the number of reads is correct + { assert snapshot(fastq_content.size()).match("number_of_lines") }, + { assert snapshot(process.out.versions).match("versions") } + + ) + } + + } + +} diff --git a/modules/nf-core/chopper/tests/main.nf.test.snap b/modules/nf-core/chopper/tests/main.nf.test.snap new file mode 100644 index 00000000..d2587e66 --- /dev/null +++ b/modules/nf-core/chopper/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5fe28ea455482c9fe88603ddcc461881" + ] + ], + "timestamp": "2023-10-20T08:27:24.592662298" + }, + "number_of_lines": { + "content": [ + 400 + ], + "timestamp": "2023-10-20T08:27:24.581289647" + } +} \ No newline at end of file diff --git a/modules/nf-core/chopper/tests/tags.yml b/modules/nf-core/chopper/tests/tags.yml new file mode 100644 index 00000000..89b6233b --- /dev/null +++ b/modules/nf-core/chopper/tests/tags.yml @@ -0,0 +1,2 @@ +chopper: + - modules/nf-core/chopper/** From c614c0130fbdf8db8182ddb5920568a78b0bc6d7 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 14 Oct 2024 11:37:14 +0200 Subject: [PATCH 02/14] Add nanoq from nf-core/nanoq, and add code in longread_preprocessing subworkflow for alternative use of chopper for lambda-removal instead of nanolyse, and nanoq for longread filtering instead of filtlong --- modules.json | 5 + modules/nf-core/nanoq/environment.yml | 7 + modules/nf-core/nanoq/main.nf | 49 ++++ modules/nf-core/nanoq/meta.yml | 63 +++++ modules/nf-core/nanoq/tests/main.nf.test | 122 ++++++++ modules/nf-core/nanoq/tests/main.nf.test.snap | 267 ++++++++++++++++++ modules/nf-core/nanoq/tests/tags.yml | 2 + subworkflows/local/longread_preprocessing.nf | 43 ++- 8 files changed, 546 insertions(+), 12 deletions(-) create mode 100644 modules/nf-core/nanoq/environment.yml create mode 100644 modules/nf-core/nanoq/main.nf create mode 100644 modules/nf-core/nanoq/meta.yml create mode 100644 modules/nf-core/nanoq/tests/main.nf.test create mode 100644 modules/nf-core/nanoq/tests/main.nf.test.snap create mode 100644 modules/nf-core/nanoq/tests/tags.yml diff --git a/modules.json b/modules.json index 40573cbc..99e6d2e2 100644 --- a/modules.json +++ b/modules.json @@ -212,6 +212,11 @@ "git_sha": "3135090b46f308a260fc9d5991d7d2f9c0785309", "installed_by": ["modules"] }, + "nanoq": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "porechop/abi": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", diff --git a/modules/nf-core/nanoq/environment.yml b/modules/nf-core/nanoq/environment.yml new file mode 100644 index 00000000..1a95d24e --- /dev/null +++ b/modules/nf-core/nanoq/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::nanoq=0.10.0" diff --git a/modules/nf-core/nanoq/main.nf b/modules/nf-core/nanoq/main.nf new file mode 100644 index 00000000..6d35a407 --- /dev/null +++ b/modules/nf-core/nanoq/main.nf @@ -0,0 +1,49 @@ +process NANOQ { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nanoq:0.10.0--h031d066_2' : + 'biocontainers/nanoq:0.10.0--h031d066_2'}" + + input: + tuple val(meta), path(ontreads) + val(output_format) //One of the following: fastq, fastq.gz, fastq.bz2, fastq.lzma, fasta, fasta.gz, fasta.bz2, fasta.lzma. + + output: + tuple val(meta), path("*.{stats,json}") , emit: stats + tuple val(meta), path("*_filtered.${output_format}") , emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_filtered" + """ + nanoq -i $ontreads \\ + ${args} \\ + -r ${prefix}.stats \\ + -o ${prefix}.$output_format + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoq: \$(nanoq --version | sed -e 's/nanoq //g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_filtered" + """ + echo "" | gzip > ${prefix}.$output_format + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoq: \$(nanoq --version | sed -e 's/nanoq //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/nanoq/meta.yml b/modules/nf-core/nanoq/meta.yml new file mode 100644 index 00000000..0ff2b9b4 --- /dev/null +++ b/modules/nf-core/nanoq/meta.yml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "nanoq" +description: Nanoq implements ultra-fast read filters and summary reports for high-throughput + nanopore reads. +keywords: + - nanoq + - Read filters + - Read trimming + - Read report +tools: + - "nanoq": + description: "Ultra-fast quality control and summary reports for nanopore reads" + homepage: "https://github.com/esteinig/nanoq" + documentation: "https://github.com/esteinig/nanoq" + tool_dev_url: "https://github.com/esteinig/nanoq" + doi: "10.21105/joss.02991" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ontreads: + type: file + description: Compressed or uncompressed nanopore reads in fasta or fastq formats. + pattern: "*.{fa,fna,faa,fasta,fq,fastq}{,.gz,.bz2,.xz}" + - - output_format: + type: string + description: "Specifies the output format. One of these formats: fasta, fastq; + fasta.gz, fastq.gz; fasta.bz2, fastq.bz2; fasta.lzma, fastq.lzma." +output: + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.{stats,json}": + type: file + description: Summary report of reads statistics. + pattern: "*.{stats,json}" + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*_filtered.${output_format}": + type: file + description: Filtered reads. + pattern: "*.{fasta,fastq}{,.gz,.bz2,.lzma}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@LilyAnderssonLee" +maintainers: + - "@LilyAnderssonLee" diff --git a/modules/nf-core/nanoq/tests/main.nf.test b/modules/nf-core/nanoq/tests/main.nf.test new file mode 100644 index 00000000..ef63d12f --- /dev/null +++ b/modules/nf-core/nanoq/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process NANOQ" + script "../main.nf" + process "NANOQ" + + tag "modules" + tag "modules_nfcore" + tag "nanoq" + + test("sarscov2 - nanopore_uncompressed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + + input[1] = 'fastq' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - nanopore_compressed_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("sarscov2 - nanopore_compressed_bz2") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.bz2' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("sarscov2 - nanopore_compressed_lzma") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.lzma' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - nanopore_compressed_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/nanoq/tests/main.nf.test.snap b/modules/nf-core/nanoq/tests/main.nf.test.snap new file mode 100644 index 00000000..b5dda2a7 --- /dev/null +++ b/modules/nf-core/nanoq/tests/main.nf.test.snap @@ -0,0 +1,267 @@ +{ + "sarscov2 - nanopore_compressed_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:32.117229" + }, + "sarscov2 - nanopore_compressed_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:42:06.039307" + }, + "sarscov2 - nanopore_compressed_bz2": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.bz2:md5,b53cf14fd4eb5b16c459c41f03cc8a4b" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.bz2:md5,b53cf14fd4eb5b16c459c41f03cc8a4b" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:36.674647" + }, + "sarscov2 - nanopore_compressed_lzma": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.lzma:md5,65dda701689f913734dc245b68c89e07" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.lzma:md5,65dda701689f913734dc245b68c89e07" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:41.51344" + }, + "sarscov2 - nanopore_uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:26.868897" + } +} \ No newline at end of file diff --git a/modules/nf-core/nanoq/tests/tags.yml b/modules/nf-core/nanoq/tests/tags.yml new file mode 100644 index 00000000..37457df1 --- /dev/null +++ b/modules/nf-core/nanoq/tests/tags.yml @@ -0,0 +1,2 @@ +nanoq: + - "modules/nf-core/nanoq/**" diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index ec434858..76c0fa45 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -8,6 +8,8 @@ include { NANOLYSE } from '../../mo include { PORECHOP_PORECHOP } from '../../modules/nf-core/porechop/porechop/main' include { PORECHOP_ABI } from '../../modules/nf-core/porechop/abi/main' include { FILTLONG } from '../../modules/nf-core/filtlong' +include { CHOPPER } from '../../modules/nf-core/chopper' +include { NANOQ } from '../../modules/nf-core/nanoq' workflow LONGREAD_PREPROCESSING { take: @@ -52,12 +54,21 @@ workflow LONGREAD_PREPROCESSING { } if (!params.keep_lambda) { - NANOLYSE ( - ch_long_reads, - ch_nanolyse_db - ) - ch_long_reads = NANOLYSE.out.fastq - ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) + if (params.longread_phageremoval_tool == 'chopper') { + CHOPPER ( + ch_long_reads + ) + ch_long_reads = CHOPPER.out.fastq + ch_versions = ch_versions.mix(CHOPPER.out.versions.first()) + } else if (params.longread_phageremoval_tool == 'nanolyse') { + NANOLYSE ( + ch_long_reads, + ch_nanolyse_db + ) + ch_long_reads = NANOLYSE.out.fastq + ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) + } + } // join long and short reads by sample name @@ -69,12 +80,20 @@ workflow LONGREAD_PREPROCESSING { .join(ch_short_reads_tmp, by: 0) .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, sr, lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end - FILTLONG ( - ch_short_and_long_reads - ) - ch_long_reads = FILTLONG.out.reads - ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) + if (params.longread_filtering_tool == 'filtlong') { + FILTLONG ( + ch_short_and_long_reads + ) + ch_long_reads = FILTLONG.out.reads + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) + } else if (params.longread_filtering_tool == 'nanoq') { + NANOQ ( + ch_long_reads + ) + ch_long_reads = NANOQ.out.reads + ch_versions = ch_versions.mix(NANOQ.out.versions.first()) + } NANOPLOT_FILTERED ( ch_long_reads From 7102af43231abcca40d30e0c4968d7b927976277 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 14 Oct 2024 15:18:40 +0200 Subject: [PATCH 03/14] Fix config for modules nanoq and filtlong in modules.config --- conf/modules.config | 47 +++++++++++++++++++- nextflow.config | 7 ++- nextflow_schema.json | 19 +++++++- subworkflows/local/longread_preprocessing.nf | 20 +++++---- 4 files changed, 79 insertions(+), 14 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b226ba01..c3fb6a44 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -192,17 +192,40 @@ process { "--min_length ${params.longreads_min_length}", "--keep_percent ${params.longreads_keep_percent}", "--trim", - "--length_weight ${params.longreads_length_weight}" + "--length_weight ${params.longreads_length_weight}", + params.longreads_min_quality ? "--min_mean_q ${params.longreads_min_quality}" : '', ].join(' ').trim() publishDir = [ path: { "${params.outdir}/QC_longreads/Filtlong" }, mode: params.publish_dir_mode, pattern: "*_filtlong.fastq.gz", - enabled: params.save_filtlong_reads + enabled: params.save_filtered_reads ] ext.prefix = { "${meta.id}_run${meta.run}_filtlong" } } + withName: NANOQ { + ext.args = [ + "--min-len ${params.longreads_min_length}", + params.longreads_min_quality ? "--min-qual ${params.longreads_min_quality}": '', + "-vv" + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/QC_longreads/Nanoq" }, + mode: params.publish_dir_mode, + pattern: "*_nanoq_filtered.fastq.gz", + enabled: params.save_filtered_reads + ], + [ + path: { "${params.outdir}/QC_longreads/Nanoq" }, + mode: params.publish_dir_mode, + pattern: "*_nanoq_filtered.stats" + ] + ] + ext.prefix = { "${meta.id}_run${meta.run}_nanoq_filtered" } + } + withName: NANOLYSE { publishDir = [ [ @@ -220,6 +243,26 @@ process { ext.prefix = { "${meta.id}_run${meta.run}_lambdafiltered" } } + withName: CHOPPER { + ext.args2 = [ + "--contam ${params.lambda_reference}" + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/QC_longreads/Chopper" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/QC_longreads/Chopper" }, + mode: params.publish_dir_mode, + pattern: "*_chopper.fastq.gz", + enabled: params.save_lambdaremoved_reads + ] + ] + ext.prefix = { "${meta.id}_run${meta.run}_chopper" } + } + withName: NANOPLOT_RAW { ext.prefix = 'raw' ext.args = { diff --git a/nextflow.config b/nextflow.config index b6d281d0..822656b3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,9 @@ params { adapterremoval_trim_quality_stretch = false keep_phix = false // long read preprocessing options - longread_adaptertrimming_tool = "porechop_abi" + longread_adaptertrimming_tool = "porechop_abi" + longread_phageremoval_tool = "chopper" + longread_filtering_tool = "filtlong" // phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz" phix_reference = "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz" save_phixremoved_reads = false @@ -102,6 +104,7 @@ params { // long read preprocessing options skip_adapter_trimming = false keep_lambda = false + longreads_min_quality = null longreads_min_length = 1000 longreads_keep_percent = 90 longreads_length_weight = 10 @@ -109,7 +112,7 @@ params { lambda_reference = "${baseDir}/assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz" save_lambdaremoved_reads = false save_porechop_reads = false - save_filtlong_reads = false + save_filtered_reads = false // binning options skip_metabat2 = false diff --git a/nextflow_schema.json b/nextflow_schema.json index b4809d15..6f81582e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -453,6 +453,11 @@ "default": 1000, "description": "Discard any read which is shorter than this value." }, + "longreads_min_quality": { + "type": "integer", + "default": null, + "description": "Discard any read which has a mean quality score lower than this value." + }, "longreads_keep_percent": { "type": "integer", "default": 90, @@ -482,7 +487,7 @@ "type": "boolean", "description": "Specify to save the resulting clipped FASTQ files to --outdir." }, - "save_filtlong_reads": { + "save_filtered_reads": { "type": "boolean", "description": "Specify to save the resulting length filtered FASTQ files to --outdir." }, @@ -491,6 +496,18 @@ "description": "Specify which long read adapter trimming tool to use.", "enum": ["porechop", "porechop_abi"], "default": "porechop_abi" + }, + "longread_phageremoval_tool": { + "type": "string", + "description": "Specify which long read phage removal tool to use.", + "enum": ["nanolyse", "chopper"], + "default": "chopper" + }, + "longread_filtering_tool": { + "type": "string", + "description": "Specify which long read filtering tool to use.", + "enum": ["filtlong", "nanoq"], + "default": "filtlong" } } }, diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 76c0fa45..9e4cc0c1 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -71,16 +71,16 @@ workflow LONGREAD_PREPROCESSING { } - // join long and short reads by sample name - ch_short_reads_tmp = ch_short_reads - .map { meta, sr -> [ meta.id, meta, sr ] } + if (params.longread_filtering_tool == 'filtlong') { + // join long and short reads by sample name + ch_short_reads_tmp = ch_short_reads + .map { meta, sr -> [ meta.id, meta, sr ] } - ch_short_and_long_reads = ch_long_reads - .map { meta, lr -> [ meta.id, meta, lr ] } - .join(ch_short_reads_tmp, by: 0) - .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, sr, lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end + ch_short_and_long_reads = ch_long_reads + .map { meta, lr -> [ meta.id, meta, lr ] } + .join(ch_short_reads_tmp, by: 0) + .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, sr, lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end - if (params.longread_filtering_tool == 'filtlong') { FILTLONG ( ch_short_and_long_reads ) @@ -89,10 +89,12 @@ workflow LONGREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) } else if (params.longread_filtering_tool == 'nanoq') { NANOQ ( - ch_long_reads + ch_long_reads, + 'fastq.gz' ) ch_long_reads = NANOQ.out.reads ch_versions = ch_versions.mix(NANOQ.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(NANOQ.out.stats) } NANOPLOT_FILTERED ( From 9bc8156ac712b2631c720a24465e252f06320795 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Tue, 15 Oct 2024 10:35:37 +0200 Subject: [PATCH 04/14] Make it possible to use chopper as filtering tool as well --- conf/modules.config | 4 ++- nextflow.config | 1 - nextflow_schema.json | 8 +----- subworkflows/local/longread_preprocessing.nf | 29 +++++++++----------- 4 files changed, 17 insertions(+), 25 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c3fb6a44..0adc8846 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -245,7 +245,9 @@ process { withName: CHOPPER { ext.args2 = [ - "--contam ${params.lambda_reference}" + !params.keep_lambda ? "--contam ${params.lambda_reference}": "", + params.longreads_min_quality ? "--quality ${params.longreads_min_quality}": "", + params.longreads_min_length ? "--minlength ${params.longreads_min_length}": "", ].join(' ').trim() publishDir = [ [ diff --git a/nextflow.config b/nextflow.config index 822656b3..f8bf1a4e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -29,7 +29,6 @@ params { keep_phix = false // long read preprocessing options longread_adaptertrimming_tool = "porechop_abi" - longread_phageremoval_tool = "chopper" longread_filtering_tool = "filtlong" // phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz" phix_reference = "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz" diff --git a/nextflow_schema.json b/nextflow_schema.json index 6f81582e..38faad2f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -497,16 +497,10 @@ "enum": ["porechop", "porechop_abi"], "default": "porechop_abi" }, - "longread_phageremoval_tool": { - "type": "string", - "description": "Specify which long read phage removal tool to use.", - "enum": ["nanolyse", "chopper"], - "default": "chopper" - }, "longread_filtering_tool": { "type": "string", "description": "Specify which long read filtering tool to use.", - "enum": ["filtlong", "nanoq"], + "enum": ["filtlong", "nanoq", "chopper"], "default": "filtlong" } } diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 9e4cc0c1..76865c25 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -53,22 +53,13 @@ workflow LONGREAD_PREPROCESSING { } } - if (!params.keep_lambda) { - if (params.longread_phageremoval_tool == 'chopper') { - CHOPPER ( - ch_long_reads - ) - ch_long_reads = CHOPPER.out.fastq - ch_versions = ch_versions.mix(CHOPPER.out.versions.first()) - } else if (params.longread_phageremoval_tool == 'nanolyse') { - NANOLYSE ( - ch_long_reads, - ch_nanolyse_db - ) - ch_long_reads = NANOLYSE.out.fastq - ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) - } - + if (!params.keep_lambda && params.longread_filtering_tool != 'chopper') { + NANOLYSE ( + ch_long_reads, + ch_nanolyse_db + ) + ch_long_reads = NANOLYSE.out.fastq + ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) } if (params.longread_filtering_tool == 'filtlong') { @@ -95,6 +86,12 @@ workflow LONGREAD_PREPROCESSING { ch_long_reads = NANOQ.out.reads ch_versions = ch_versions.mix(NANOQ.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(NANOQ.out.stats) + } else if (params.longread_filtering_tool == 'chopper') { + CHOPPER ( + ch_long_reads + ) + ch_long_reads = CHOPPER.out.fastq + ch_versions = ch_versions.mix(CHOPPER.out.versions.first()) } NANOPLOT_FILTERED ( From cce1ada15adfc52e4a16966e04eb0ecadcb32f13 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 1 Nov 2024 08:15:27 +0100 Subject: [PATCH 05/14] Update chopper module. Pass on fasta file when invoking chopper process --- conf/modules.config | 5 +- modules.json | 2 +- modules/nf-core/chopper/environment.yml | 2 +- modules/nf-core/chopper/main.nf | 18 ++++- modules/nf-core/chopper/meta.yml | 4 + modules/nf-core/chopper/tests/main.nf.test | 74 +++++++++++++++++-- .../nf-core/chopper/tests/main.nf.test.snap | 60 +++++++++++++-- subworkflows/local/longread_preprocessing.nf | 7 +- workflows/mag.nf | 6 +- 9 files changed, 154 insertions(+), 24 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 0adc8846..f829be7b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -245,9 +245,8 @@ process { withName: CHOPPER { ext.args2 = [ - !params.keep_lambda ? "--contam ${params.lambda_reference}": "", - params.longreads_min_quality ? "--quality ${params.longreads_min_quality}": "", - params.longreads_min_length ? "--minlength ${params.longreads_min_length}": "", + params.longreads_min_quality ? "--quality ${params.longreads_min_quality}": '', + params.longreads_min_length ? "--minlength ${params.longreads_min_length}": '' ].join(' ').trim() publishDir = [ [ diff --git a/modules.json b/modules.json index 99e6d2e2..967fa112 100644 --- a/modules.json +++ b/modules.json @@ -64,7 +64,7 @@ }, "chopper": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "22737835af2db3dd0d5b6b332e75e160d0199fae", "installed_by": ["modules"] }, "concoct/concoct": { diff --git a/modules/nf-core/chopper/environment.yml b/modules/nf-core/chopper/environment.yml index e80840e1..2195b5ed 100644 --- a/modules/nf-core/chopper/environment.yml +++ b/modules/nf-core/chopper/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::chopper=0.3.0 + - bioconda::chopper=0.9.0 diff --git a/modules/nf-core/chopper/main.nf b/modules/nf-core/chopper/main.nf index 06f79849..6fc0b2d2 100644 --- a/modules/nf-core/chopper/main.nf +++ b/modules/nf-core/chopper/main.nf @@ -4,11 +4,12 @@ process CHOPPER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/chopper:0.3.0--hd03093a_0': - 'biocontainers/chopper:0.3.0--hd03093a_0' }" + 'https://depot.galaxyproject.org/singularity/chopper:0.9.0--hdcf5f25_0': + 'biocontainers/chopper:0.9.0--hdcf5f25_0' }" input: tuple val(meta), path(fastq) + path fasta output: tuple val(meta), path("*.fastq.gz") , emit: fastq @@ -22,6 +23,7 @@ process CHOPPER { def args2 = task.ext.args2 ?: '' def args3 = task.ext.args3 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def fasta_filtering = fasta ? "--contam ${fasta}" : "" if ("$fastq" == "${prefix}.fastq.gz") error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ @@ -30,6 +32,7 @@ process CHOPPER { $fastq | \\ chopper \\ --threads $task.cpus \\ + $fasta_filtering \\ $args2 | \\ gzip \\ $args3 > ${prefix}.fastq.gz @@ -39,4 +42,15 @@ process CHOPPER { chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo | gzip > ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2) + END_VERSIONS + """ } diff --git a/modules/nf-core/chopper/meta.yml b/modules/nf-core/chopper/meta.yml index 916c865e..049cf62d 100644 --- a/modules/nf-core/chopper/meta.yml +++ b/modules/nf-core/chopper/meta.yml @@ -38,6 +38,10 @@ input: type: file description: FastQ with reads from long read sequencing e.g. PacBio or ONT pattern: "*.{fastq.gz}" + - - fasta: + type: file + description: An optional reference fasta file against which to remove reads that align to it. + pattern: "*.fasta" output: - fastq: - meta: diff --git a/modules/nf-core/chopper/tests/main.nf.test b/modules/nf-core/chopper/tests/main.nf.test index ee195b5f..e611fa9f 100644 --- a/modules/nf-core/chopper/tests/main.nf.test +++ b/modules/nf-core/chopper/tests/main.nf.test @@ -7,7 +7,43 @@ nextflow_process { tag "modules" tag "modules_nfcore" - test("Should run without failures") { + test("test with lambda reference") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test_out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + """ + } + } + + then { + + def fastq_content = path(process.out.fastq.get(0).get(1)).linesGzip + + assertAll( + { assert process.success }, + // original pytest checks + { assert process.out.fastq.get(0).get(1) ==~ ".*/test_out.fastq.gz" }, + { assert !fastq_content.contains("@a52a642e-88d0-4584-babd-414ea84db484 runid=71c83ae0021f873e29b130c6562a4c27185f93b8 read=2768 ch=489 start_time=2021-08-11T12:07:39Z flow_cell_id=FAQ57606 protocol_group_id=210811_47CoV_SA sample_id=CS5 barcode=barcode04 barcode_alias=barcode04")}, + // additional nf-test checks + // Order of reads is not deterministic, so only assess whether the number of reads is correct + { assert snapshot( + fastq_content.size(), + process.out.versions + ).match() } + ) + } + } + + test("test without lambda reference") { when { params { @@ -19,6 +55,7 @@ nextflow_process { [id:'test_out' ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) ] + input[1] = [] """ } } @@ -34,12 +71,37 @@ nextflow_process { { assert fastq_content.contains("@2109d790-67ec-4fd1-8931-6c7e61908ff3 runid=97ca62ca093ff43533aa34c38a10b1d6325e7e7b read=52274 ch=243 start_time=2021-02-05T23:27:30Z flow_cell_id=FAP51364 protocol_group_id=data sample_id=RN20097 barcode=barcode01 barcode_alias=barcode01")}, // additional nf-test checks // Order of reads is not deterministic, so only assess whether the number of reads is correct - { assert snapshot(fastq_content.size()).match("number_of_lines") }, - { assert snapshot(process.out.versions).match("versions") } - + { assert snapshot( + fastq_content.size(), + process.out.versions + ).match() } ) } - } -} + test("test-chopper-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [id:'test_out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + process.out.versions + ).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/chopper/tests/main.nf.test.snap b/modules/nf-core/chopper/tests/main.nf.test.snap index d2587e66..60522256 100644 --- a/modules/nf-core/chopper/tests/main.nf.test.snap +++ b/modules/nf-core/chopper/tests/main.nf.test.snap @@ -1,16 +1,64 @@ { - "versions": { + "test without lambda reference": { "content": [ + 400, [ - "versions.yml:md5,5fe28ea455482c9fe88603ddcc461881" + "versions.yml:md5,74a27493c09d0c481f6e52b517e12023" ] ], - "timestamp": "2023-10-20T08:27:24.592662298" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-31T15:14:36.37897815" }, - "number_of_lines": { + "test with lambda reference": { "content": [ - 400 + 15984, + [ + "versions.yml:md5,74a27493c09d0c481f6e52b517e12023" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-31T15:14:31.324993049" + }, + "test-chopper-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_out" + }, + "test_out.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,74a27493c09d0c481f6e52b517e12023" + ], + "fastq": [ + [ + { + "id": "test_out" + }, + "test_out.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,74a27493c09d0c481f6e52b517e12023" + ] + }, + [ + "versions.yml:md5,74a27493c09d0c481f6e52b517e12023" + ] ], - "timestamp": "2023-10-20T08:27:24.581289647" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-31T15:29:08.715579423" } } \ No newline at end of file diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 76865c25..7de6dd25 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -15,7 +15,7 @@ workflow LONGREAD_PREPROCESSING { take: ch_raw_long_reads // [ [meta] , fastq] (mandatory) ch_short_reads // [ [meta] , fastq1, fastq2] (mandatory) - ch_nanolyse_db // [fasta] + ch_lambda_db // [fasta] main: ch_versions = Channel.empty() @@ -56,7 +56,7 @@ workflow LONGREAD_PREPROCESSING { if (!params.keep_lambda && params.longread_filtering_tool != 'chopper') { NANOLYSE ( ch_long_reads, - ch_nanolyse_db + ch_lambda_db ) ch_long_reads = NANOLYSE.out.fastq ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) @@ -88,7 +88,8 @@ workflow LONGREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix(NANOQ.out.stats) } else if (params.longread_filtering_tool == 'chopper') { CHOPPER ( - ch_long_reads + ch_long_reads, + ch_lambda_db.ifEmpty([]) ) ch_long_reads = CHOPPER.out.fastq ch_versions = ch_versions.mix(CHOPPER.out.versions.first()) diff --git a/workflows/mag.nf b/workflows/mag.nf index 49314024..c3ddf65b 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -135,8 +135,10 @@ if(!params.keep_phix) { } if (!params.keep_lambda) { - ch_nanolyse_db = Channel + ch_lambda_db = Channel .value(file( "${params.lambda_reference}" )) +} else { + ch_lambda_db = Channel.empty() } if (params.genomad_db){ @@ -361,7 +363,7 @@ workflow MAG { LONGREAD_PREPROCESSING ( ch_raw_long_reads, ch_short_reads, - ch_nanolyse_db + ch_lambda_db ) ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions) From 7ec9a740caa996058d708038a36e1bf9836a05da Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 1 Nov 2024 09:37:44 +0100 Subject: [PATCH 06/14] fix save_filtered_longreads parameter --- conf/modules.config | 18 ++++++++++++++---- nextflow.config | 2 +- nextflow_schema.json | 4 ++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 77968e8d..e87a9732 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -190,7 +190,7 @@ process { path: { "${params.outdir}/QC_longreads/Filtlong" }, mode: params.publish_dir_mode, pattern: "*_filtlong.fastq.gz", - enabled: params.save_filtlong_reads + enabled: params.save_filtered_longreads ] ext.prefix = { "${meta.id}_run${meta.run}_filtlong" } } @@ -206,7 +206,7 @@ process { path: { "${params.outdir}/QC_longreads/Nanoq" }, mode: params.publish_dir_mode, pattern: "*_nanoq_filtered.fastq.gz", - enabled: params.save_filtered_reads + enabled: params.save_filtered_longreads ], [ path: { "${params.outdir}/QC_longreads/Nanoq" }, @@ -218,7 +218,17 @@ process { } withName: NANOLYSE { - publishDir = [[path: { "${params.outdir}/QC_longreads/NanoLyse" }, mode: params.publish_dir_mode, pattern: "*.log"], [path: { "${params.outdir}/QC_longreads/NanoLyse" }, mode: params.publish_dir_mode, pattern: "*_nanolyse.fastq.gz", enabled: params.save_lambdaremoved_reads]] + publishDir = [ + [ + path: { "${params.outdir}/QC_longreads/NanoLyse" }, + mode: params.publish_dir_mode, pattern: "*.log" + ], + [ + path: { "${params.outdir}/QC_longreads/NanoLyse" }, + mode: params.publish_dir_mode, pattern: "*_nanolyse.fastq.gz", + enabled: params.save_lambdaremoved_reads + ] + ] ext.prefix = { "${meta.id}_run${meta.run}_lambdafiltered" } } @@ -237,7 +247,7 @@ process { path: { "${params.outdir}/QC_longreads/Chopper" }, mode: params.publish_dir_mode, pattern: "*_chopper.fastq.gz", - enabled: params.save_lambdaremoved_reads + enabled: params.save_lambdaremoved_reads || params.save_filtered_longreads ] ] ext.prefix = { "${meta.id}_run${meta.run}_chopper" } diff --git a/nextflow.config b/nextflow.config index 295924fb..a5637f85 100644 --- a/nextflow.config +++ b/nextflow.config @@ -111,7 +111,7 @@ params { lambda_reference = "${baseDir}/assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz" save_lambdaremoved_reads = false save_porechop_reads = false - save_filtered_reads = false + save_filtered_longreads = false // binning options skip_metabat2 = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 00d87a65..2a7744d6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -424,9 +424,9 @@ "type": "boolean", "description": "Specify to save the resulting clipped FASTQ files to --outdir." }, - "save_filtered_reads": { + "save_filtered_longreads": { "type": "boolean", - "description": "Specify to save the resulting length filtered FASTQ files to --outdir." + "description": "Specify to save the resulting length filtered long read FASTQ files to --outdir." }, "longread_adaptertrimming_tool": { "type": "string", From 4ceacae5ea26b8c3695db45fb6a06b0fcae97665 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 1 Nov 2024 10:51:18 +0100 Subject: [PATCH 07/14] Update changelog, citations, and output --- CHANGELOG.md | 8 ++++++++ CITATIONS.md | 8 ++++++++ docs/output.md | 24 +++++++++++++++++++----- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a902cd7b..89c6d53a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#692](https://github.com/nf-core/mag/pull/692) - Added Nanoq as optional longread filtering tool +- [#692](https://github.com/nf-core/mag/pull/692) - Added chopper as optional longread filtering tool and/or phage lambda removal tool + ### `Changed` ### `Fixed` ### `Dependencies` +| Tool | Previous version | New version | +| ------- | ---------------- | ----------- | +| chopper | | 0.8.0 | +| nanoq | | 0.10.0 | + ### `Deprecated` ## 3.2.0 [2024-10-27] diff --git a/CITATIONS.md b/CITATIONS.md index 52caa1e6..74138f6c 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -40,6 +40,10 @@ > Parks, D. H., Imelfort, M., Skennerton, C. T., Hugenholtz, P., & Tyson, G. W. (2015). CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes. Genome Research, 25(7), 1043–1055. doi: 10.1101/gr.186072.114 +- [Chopper](https://doi.org/10.1093/bioinformatics/bty149) + + > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149 + - [CONCOCT](https://doi.org/10.1038/nmeth.3103) > Alneberg, J., Bjarnason, B. S., de Bruijn, I., Schirmer, M., Quick, J., Ijaz, U. Z., Lahti, L., Loman, N. J., Andersson, A. F., & Quince, C. (2014). Binning metagenomic contigs by coverage and composition. Nature Methods, 11(11), 1144–1146. doi: 10.1038/nmeth.3103 @@ -114,6 +118,10 @@ > De Coster, W., D’Hert, S., Schultz, D. T., Cruts, M., & Van Broeckhoven, C. (2018). NanoPack: visualizing and processing long-read sequencing data. Bioinformatics, 34(15), 2666-2669. doi: 10.1093/bioinformatics/bty149. +- [Nanoq](https://doi.org/10.21105/joss.02991) + + > Steinig, E., Coin, L. (2022). Nanoq: ultra-fast quality control for nanopore reads. Journal of Open Source Software, 7(69), 2991, doi: 10.21105/joss.02991 + - [Porechop](https://github.com/rrwick/Porechop) - [Porechop-abi](https://github.com/bonsai-team/Porechop_ABI) diff --git a/docs/output.md b/docs/output.md index 4e43ffb6..8eddd954 100644 --- a/docs/output.md +++ b/docs/output.md @@ -109,9 +109,9 @@ The pipeline uses Nanolyse to map the reads against the Lambda phage and removes -### Filtlong and porechop +### Long read adapter removal -The pipeline uses filtlong and porechop to perform quality control of the long reads that are eventually provided with the TSV input file. +The pipeline uses porecho_abi or porechop to perform adaptertrimming of the long reads that are eventually provided with the TSV input file.
Output files @@ -119,15 +119,29 @@ The pipeline uses filtlong and porechop to perform quality control of the long r - `QC_longreads/porechop/` - `[sample]_[run]_porechop_trimmed.fastq.gz`: If `--longread_adaptertrimming_tool 'porechop'`, the adapter trimmed FASTQ files from porechop - `[sample]_[run]_porechop-abi_trimmed.fastq.gz`: If `--longread_adaptertrimming_tool 'porechop_abi'`, the adapter trimmed FASTQ files from porechop_ABI -- `QC_longreads/filtlong/` + +
+ +### Long read filtering + +The pipeline uses filtlong, chopper, or nanoq for quality filtering of long reads, specified with `--longread_filtering_tool filtlong|chopper|nanoq`. Only is capable of filtering long reads against short reads, and is therefor currently recommended in the hybrid mode. If chopper is selected as long read filtering tool, Phage Lambda removal will be performed with chopper as well, instead of nanolyse. + +
+Output files + +- `QC_longreads/Filtlong/` - `[sample]_[run]_filtlong.fastq.gz`: The length and quality filtered reads in FASTQ from Filtlong +- `QC_longreads/Nanoq/` + - `[sample]_[run]_nanoq_filtered.fastq.gz`: The length and quality filtered reads in FASTQ from Nanoq +- `QC_longreads/Chopper/` + - `[sample]_[run]_nanoq_chopper.fastq.gz`: The length and quality filtered, optionally phage lambda removed reads in FASTQ from Chopper
-Trimmed and filtered FASTQ output directories and files will only exist if `--save_porechop_reads` and/or `--save_filtlong_reads` (respectively) are provided to the run command . +Trimmed and filtered FASTQ output directories and files will only exist if `--save_porechop_reads` and/or `--save_filtered_longreads` (respectively) are provided to the run command . No direct host read removal is performed for long reads. -However, since within this pipeline filtlong uses a read quality based on k-mer matches to the already filtered short reads, reads not overlapping those short reads might be discarded. +However, since within this pipeline filtlong uses a read quality based on k-mer matches to the already filtered short reads, reads not overlapping those short reads might be discarded. Note that this only applies when using filtlong as long read filtering tool. The lower the parameter `--longreads_length_weight`, the higher the impact of the read qualities for filtering. For further documentation see the [filtlong online documentation](https://github.com/rrwick/Filtlong). From 03408c6400faa1769b9222330552ea0b51ab83c9 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 1 Nov 2024 11:22:04 +0100 Subject: [PATCH 08/14] fix linting --- nextflow_schema.json | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2a7744d6..6e16c49c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -392,7 +392,6 @@ }, "longreads_min_quality": { "type": "integer", - "default": null, "description": "Discard any read which has a mean quality score lower than this value." }, "longreads_keep_percent": { From fc9f7fc3083430534926dd46eb75f2f9dc49d607 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 22 Nov 2024 08:05:16 +0100 Subject: [PATCH 09/14] Update CHANGELOG.md Co-authored-by: James A. Fellows Yates --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ce7720f..6fe24902 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Tool | Previous version | New version | | ------- | ---------------- | ----------- | -| chopper | | 0.8.0 | +| chopper | | 0.9.0 | | nanoq | | 0.10.0 | ### `Deprecated` From 6e04c6e29827274eeb67604bb7b4b8ffcdec5ac9 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 22 Nov 2024 08:05:40 +0100 Subject: [PATCH 10/14] Update CHANGELOG.md Co-authored-by: James A. Fellows Yates --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fe24902..98c1efbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#692](https://github.com/nf-core/mag/pull/692) - Added Nanoq as optional longread filtering tool -- [#692](https://github.com/nf-core/mag/pull/692) - Added chopper as optional longread filtering tool and/or phage lambda removal tool +- [#692](https://github.com/nf-core/mag/pull/692) - Added Nanoq as optional longread filtering tool (added by @muabnezor) +- [#692](https://github.com/nf-core/mag/pull/692) - Added chopper as optional longread filtering tool and/or phage lambda removal tool (added by @muabnezor) - [#708](https://github.com/nf-core/mag/pull/708) - Added `--exclude_unbins_from_postbinning` parameter to exclude unbinned contigs from post-binning processes, speeding up Prokka in some cases (added by @dialvarezs) ### `Changed` From 6131b9125c99d18e2e67dcff7ba389b6d2603e6b Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 22 Nov 2024 08:06:01 +0100 Subject: [PATCH 11/14] Update conf/modules.config Co-authored-by: James A. Fellows Yates --- conf/modules.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b8831f0d..f5af5335 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -203,10 +203,10 @@ process { ].join(' ').trim() publishDir = [ [ - path: { "${params.outdir}/QC_longreads/Nanoq" }, - mode: params.publish_dir_mode, - pattern: "*_nanoq_filtered.fastq.gz", - enabled: params.save_filtered_longreads + path: { "${params.outdir}/QC_longreads/Nanoq" }, + mode: params.publish_dir_mode, + pattern: "*_nanoq_filtered.fastq.gz", + enabled: params.save_filtered_longreads ], [ path: { "${params.outdir}/QC_longreads/Nanoq" }, From b7c88eacf3cf4af9213aacd370f27867ffcae7d4 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 22 Nov 2024 08:08:13 +0100 Subject: [PATCH 12/14] Update docs/output.md Co-authored-by: James A. Fellows Yates --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 8eddd954..d827fa14 100644 --- a/docs/output.md +++ b/docs/output.md @@ -124,7 +124,7 @@ The pipeline uses porecho_abi or porechop to perform adaptertrimming of the long ### Long read filtering -The pipeline uses filtlong, chopper, or nanoq for quality filtering of long reads, specified with `--longread_filtering_tool filtlong|chopper|nanoq`. Only is capable of filtering long reads against short reads, and is therefor currently recommended in the hybrid mode. If chopper is selected as long read filtering tool, Phage Lambda removal will be performed with chopper as well, instead of nanolyse. +The pipeline uses filtlong, chopper, or nanoq for quality filtering of long reads, specified with `--longread_filtering_tool `. Only is capable of filtering long reads against short reads, and is therefore currently recommended in the hybrid mode. If chopper is selected as long read filtering tool, Lambda Phage removal will be performed with chopper as well, instead of nanolyse.
Output files From bd75811bf46bb82f2fd3b4965ac9f8e44d2b1359 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 22 Nov 2024 08:08:23 +0100 Subject: [PATCH 13/14] Update nextflow.config Co-authored-by: James A. Fellows Yates --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 3c540f32..774e2ad2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -111,7 +111,7 @@ params { lambda_reference = "${baseDir}/assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz" save_lambdaremoved_reads = false save_porechop_reads = false - save_filtered_longreads = false + save_filtered_longreads = false // binning options skip_metabat2 = false From e978c23b5058d302a9314d836a306a6b327cd375 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 22 Nov 2024 08:14:44 +0100 Subject: [PATCH 14/14] Update output.md --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index d827fa14..987be311 100644 --- a/docs/output.md +++ b/docs/output.md @@ -124,7 +124,7 @@ The pipeline uses porecho_abi or porechop to perform adaptertrimming of the long ### Long read filtering -The pipeline uses filtlong, chopper, or nanoq for quality filtering of long reads, specified with `--longread_filtering_tool `. Only is capable of filtering long reads against short reads, and is therefore currently recommended in the hybrid mode. If chopper is selected as long read filtering tool, Lambda Phage removal will be performed with chopper as well, instead of nanolyse. +The pipeline uses filtlong, chopper, or nanoq for quality filtering of long reads, specified with `--longread_filtering_tool `. Only filtlong is capable of filtering long reads against short reads, and is therefore currently recommended in the hybrid mode. If chopper is selected as long read filtering tool, Lambda Phage removal will be performed with chopper as well, instead of nanolyse.
Output files