From b064eafe016bfa0869385c5d27e81f0614825565 Mon Sep 17 00:00:00 2001 From: Erin Young Date: Fri, 1 Mar 2024 16:34:41 -0700 Subject: [PATCH] removed errorStragy comments --- donut_falls.nf | 694 +++++++++++++++++++++++++++++++------------------ 1 file changed, 437 insertions(+), 257 deletions(-) diff --git a/donut_falls.nf b/donut_falls.nf index 7c1a3e4..b395173 100755 --- a/donut_falls.nf +++ b/donut_falls.nf @@ -43,8 +43,7 @@ params.sequencing_summary = '' params.sample_sheet = '' params.assembler = 'flye' params.outdir = 'donut_falls' -params.test = false -params.ontime = false +params.test = '' // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### @@ -79,10 +78,15 @@ paramCheck(params.keySet()) // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### -// Channel -// .fromPath(params.sequencing_summary, type:'file') -// .view { "Summary File : $it" } -// .set { ch_sequencing_summary } +if (params.sequencing_summary){ + Channel + .fromPath("${params.sequencing_summary}", type:'file') + .view { "Summary File : $it" } + .set { ch_sequencing_summary } +} else { + ch_sequencing_summary = Channel.empty() +} + // using a sample sheet with the column header of 'sample,fastq,fastq_1,fastq_2' // sample = meta.id @@ -90,19 +94,25 @@ paramCheck(params.keySet()) // fastq_1 = illumina fastq file // fastq_2 = illumina fastq file -Channel - .fromPath("${params.sample_sheet}", type: "file") - .splitCsv( header: true, sep: ',' ) - .map { it -> - meta = [id:it.sample] - tuple( meta, - "${it.fastq}", - "${it.fastq_1}", - "${it.fastq_2}" ) - } - .set{ ch_input_files } -// channel for illumina files +if (params.sample_sheet) { + Channel + .fromPath("${params.sample_sheet}", type: "file") + .splitCsv( header: true, sep: ',' ) + .map { it -> + meta = [id:it.sample] + tuple( meta, + "${it.fastq}", + "${it.fastq_1}", + "${it.fastq_2}" ) + } + .set{ ch_input_files } +} else { + ch_input_files = Channel.empty() +} + + +// channel for illumina files (paired-end only) ch_input_files .filter { it[2] != it[3] } .map { it -> tuple (it[0], [file(it[2], checkIfExists: true), file(it[3], checkIfExists: true)])} @@ -115,6 +125,275 @@ ch_input_files // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### +// TODO + +// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + +// process ontime { +// tag "${meta.id}" +// label "process_medium" +// publishDir "${params.outdir}/${meta.id}", mode: 'copy' +// container 'staphb/ontime:0.2.3' +// errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} +// time '45m' +// +// input: +// tuple val(meta), file(reads) +// +// output: +// tuple val(meta), file("bbduk/*_rmphix_R{1,2}.fastq.gz"), emit: fastq +// path "bbduk/*", emit: files +// path "bbduk/*.phix.stats.txt", emit: stats +// path "logs/${task.process}/*.log", emit: log +// path "versions.yml", emit: versions +// +// when: +// task.ext.when == null || task.ext.when +// +// shell: +// def args = task.ext.args ?: '' +// def prefix = task.ext.prefix ?: "${meta.id}" +// """ +// ontime --version +// +// ontime --help +// +// cat <<-END_VERSIONS > versions.yml +// "${task.process}": +// ontime: "\$(ontime --version | awk '{print \$NF}')" +// END_VERSIONS +// +// exit 1 +// """ +// } + + +// someday... +// process dragonflye { +// tag "${meta.id}" +// label "process_high" +// publishDir "${params.outdir}/${meta.id}", mode: 'copy' +// container 'staphb/dragonflye:1.1.2' +// errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} +// time '10h' +// +// input: +// tuple val(meta), file(fastq) +// +// output: +// tuple val(meta), file("dragonflye/*_dragonflye.fasta"), optional: true, emit: fasta +// tuple val(meta), file("dragonflye/*_dragonflye.gfa"), optional: true, emit: gfa +// path "dragonflye/*_assembly_info.tsv", emit: summary +// path "dragonflye/*", emit: everything +// path "versions.yml", emit: versions +// +// when: +// task.ext.when == null || task.ext.when +// +// shell: +// def args = task.ext.args ?: '' +// def prefix = task.ext.prefix ?: "${meta.id}" +// """ +// dragonflye ${args} \ +// --reads ${fastq} \ +// --cpus ${task.cpus} \ +// --outdir dragonflye \ +// --prefix ${prefix} +// +// # renaming final files +// if [ -f "dragonflye/flye-unpolished.gfa" ] ; then cp dragonflye/flye-unpolished.gfa dragonflye/${prefix}_dragonflye.gfa ; fi +// if [ -f "dragonflye/flye.fasta" ] ; then cp dragonflye/flye.fasta dragonflye/${prefix}_dragonflye.fasta ; fi +// +// # getting a summary file +// head -n 1 dragonflye/flye-info.txt | awk '{print "sample\\t" \$0}' > dragonflye/${prefix}_assembly_info.tsv +// tail -n+2 dragonflye/flye-info.txt | awk -v sample=${prefix} '{print sample "\\t" \$0}' >> dragonflye/${prefix}_assembly_info.tsv +// +// cat <<-END_VERSIONS > versions.yml +// "${task.process}": +// dragonflye: \$(dragonflye --version | awk '{print \$NF}' ) +// END_VERSIONS +// """ +// } + +// someday... +// process hybracter { +// tag "${meta.id}" +// label "process_high" +// publishDir "${params.outdir}/${meta.id}", mode: 'copy' +// container 'quay.io/biocontainers/hybracter:0.6.0--pyhdfd78af_0' +// errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} +// time '10h' +// +// input: +// tuple val(meta), file(reads), file(illumina) +// +// output: +// tuple val(meta), file("bbduk/*_rmphix_R{1,2}.fastq.gz"), emit: fasta +// tuple val(meta), file("bbduk/*_rmphix_R{1,2}.fastq.gz"), emit: gfa +// path "versions.yml", emit: versions +// +// when: +// task.ext.when == null || task.ext.when +// +// shell: +// def args = task.ext.args ?: '' +// def prefix = task.ext.prefix ?: "${meta.id}" +// """ +// hybracter -h +// +// hybracter version +// +// exit 1 +// +// cat <<-END_VERSIONS > versions.yml +// "${task.process}": +// hybracter: "\$(hybracter --version | awk '{print \$NF}')" +// END_VERSIONS +// exit 1 +// """ +// } + +// process test_nfcore { +// tag "Downloading subset15000" +// label "process_single" +// publishDir "${params.outdir}/test_files/nfcore", mode: 'copy' +// container 'staphb/multiqc:1.19' +// errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} +// time '1h' + +// output: +// tuple val("nfcore-subset15000"), file("nfcore_subset15000.fa.gz"), emit: fastq + +// when: +// task.ext.when == null || task.ext.when + +// shell: +// """ +// wget -q https://github.com/nf-core/test-datasets/blob/23f5b889e4736798c8692e9b92810d9a3e37ee97/nanopore/subset15000.fq.gz?raw=true -O nfcore_subset15000.fa.gz +// """ +// } + +// process test_great_dataset { +// tag "Downloading the great dataset" +// label "process_single" +// publishDir "${params.outdir}/test_files/great", mode: 'copy' +// container 'staphb/multiqc:1.19' +// errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} +// time '1h' + +// output: +// tuple val("great_dataset"), file("reads.fastq.gz"), emit: fastq + +// when: +// task.ext.when == null || task.ext.when + +// shell: +// """ +// wget -q https://bridges.monash.edu/ndownloader/files/23754659 -O dataset.tar.gz +// tar -xvf dataset.tar.gz + +// exit 1 +// """ +// } + + +// process test_good_dataset { +// tag "Downloading the good dataset" +// label "process_single" +// publishDir "${params.outdir}/test_files/good", mode: 'copy' +// container 'staphb/multiqc:1.19' +// errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} +// time '1h' + +// output: +// tuple val("good_dataset"), file("reads.fastq.gz"), emit: fastq + +// when: +// task.ext.when == null || task.ext.when + +// shell: +// """ +// wget -q https://bridges.monash.edu/ndownloader/files/23754647 -O dataset.tar.gz +// tar -xvf dataset.tar.gz +// """ +// } + +// process test_mediocre_dataset { +// tag "Downloading the mediocre dataset" +// label "process_single" +// publishDir "${params.outdir}/test_files/mediocre", mode: 'copy' +// container 'staphb/multiqc:1.19' +// errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} +// time '1h' + +// output: +// tuple val("mediocre_dataset"), file("reads.fastq.gz"), emit: fastq + +// when: +// task.ext.when == null || task.ext.when + +// shell: +// """ +// wget -q https://bridges.monash.edu/ndownloader/files/23754629 -O dataset.tar.gz +// tar -xvf dataset.tar.gz + +// exit 1 +// """ +// } + +// process test_bad_dataset { +// tag "Downloading the bad dataset" +// label "process_single" +// publishDir "${params.outdir}/test_files/bad", mode: 'copy' +// container 'staphb/multiqc:1.19' +// errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} +// time '1h' + +// output: +// tuple val("bad_dataset"), file("reads.fastq.gz"), emit: fastq + +// when: +// task.ext.when == null || task.ext.when + +// shell: +// """ +// wget -q https://bridges.monash.edu/ndownloader/files/23754623 -O dataset.tar.gz +// tar -xvf dataset.tar.gz + +// exit 1 +// """ +// } + + // in DONUT FALLS WORKFLOW + // hybracter and plassembler are on the to-do list + // if (params.assembler =~ /hybracter/ ) { + // hybracter(ch_nanopore_input.join(ch_illumina_input, by: 0 , remainder: true)) + // + // ch_gfa = ch_gfa.mix(hybracter.out.gfa) + // // no ch_summary + // ch_consensus = ch_consensus.mix(hybracter.out.fasta) + // ch_versions = ch_versions.mix(hybracter.out.versions.first()) + // } + + // if (params.assembler =~ /dragonflye/ ) { + // dragonflye(ch_nanopore_input) + // + // dragonflye.out.summary + // .collectFile( + // storeDir: "${params.outdir}/summary/", + // keepHeader: true, + // sort: { file -> file.text }, + // name: "dragonflye_summary.tsv") + // .set { dragonflye_summary } + // + // ch_gfa = dragonflye.out.gfa + // ch_summary = ch_summary.mix(dragonflye_summary) + // // no ch_consensus + // ch_versions = ch_versions.mix(dragonflye.out.versions.first()) + // } + +// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + // Processes // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### @@ -124,7 +403,7 @@ process bandage { label "process_low" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'quay.io/biocontainers/bandage:0.8.1--hc9558a2_2' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -159,7 +438,7 @@ process busco { label "process_medium" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/busco:5.6.1-prok-bacteria_odb10_2024-01-08' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '45m' input: @@ -195,7 +474,7 @@ process bwa { label 'process_high' // no publishDir because the sam files are too big container 'staphb/bwa:0.7.17' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '2h' input: @@ -230,7 +509,7 @@ process circulocov { label "process_medium" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'quay.io/uphl/circulocov:0.1.20240104-2024-02-21' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '1h' input: @@ -275,7 +554,7 @@ process copy { label "process_single" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/multiqc:1.19' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -378,7 +657,7 @@ process dnaapler { label "process_medium" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/dnaapler:0.7.0' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '1h' input: @@ -410,107 +689,12 @@ process dnaapler { """ } -process download { - tag "Downloading subset15000" - label "process_single" - publishDir "${params.outdir}/${meta.id}", mode: 'copy' - container 'staphb/multiqc:1.19' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '1h' - - output: - tuple val("subset15000"), file("nfcore_subset15000.fa.gz"), emit: fastq - - when: - task.ext.when == null || task.ext.when - - shell: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - wget -q https://github.com/nf-core/test-datasets/blob/23f5b889e4736798c8692e9b92810d9a3e37ee97/nanopore/subset15000.fq.gz?raw=true -O nfcore_subset15000.fa.gz - - exit 1 - """ -} - -process great_dataset { - tag "Downloading the great dataset" - label "process_single" - publishDir "${params.outdir}/${meta.id}", mode: 'copy' - container 'staphb/multiqc:1.19' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '1h' - - output: - tuple val("great_dataset"), file("reads.fastq.gz"), emit: fastq - - when: - task.ext.when == null || task.ext.when - - shell: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - wget -q https://bridges.monash.edu/ndownloader/files/23754659 -O great_dataset.tar.gz - tar -xvf great_dataset.tar.gz - - exit 1 - """ -} - -process dragonflye { - tag "${meta.id}" - label "process_high" - publishDir "${params.outdir}/${meta.id}", mode: 'copy' - container 'staphb/dragonflye:1.1.2' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - time '10h' - - input: - tuple val(meta), file(fastq) - - output: - tuple val(meta), file("dragonflye/*_dragonflye.fasta"), optional: true, emit: fasta - tuple val(meta), file("dragonflye/*_dragonflye.gfa"), optional: true, emit: gfa - path "dragonflye/*_assembly_info.tsv", emit: summary - path "dragonflye/*", emit: everything - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - shell: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - dragonflye ${args} \ - --reads ${fastq} \ - --cpus ${task.cpus} \ - --outdir dragonflye \ - --prefix ${prefix} - - # renaming final files - if [ -f "dragonflye/flye-unpolished.gfa" ] ; then cp dragonflye/flye-unpolished.gfa dragonflye/${prefix}_dragonflye.gfa ; fi - if [ -f "dragonflye/flye.fasta" ] ; then cp dragonflye/flye.fasta dragonflye/${prefix}_dragonflye.fasta ; fi - - # getting a summary file - head -n 1 dragonflye/flye-info.txt | awk '{print "sample\\t" \$0}' > dragonflye/${prefix}_assembly_info.tsv - tail -n+2 dragonflye/flye-info.txt | awk -v sample=${prefix} '{print sample "\\t" \$0}' >> dragonflye/${prefix}_assembly_info.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - dragonflye: \$(dragonflye --version | awk '{print \$NF}' ) - END_VERSIONS - """ -} - process fastp { tag "${meta.id}" label "process_low" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/fastp:0.23.4' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -573,7 +757,7 @@ process flye { label "process_high" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/flye:2.9.3' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10h' input: @@ -620,7 +804,7 @@ process gfastats { label "process_medium" publishDir "${params.outdir}/${meta.id}", mode: 'copy', pattern: 'gfastats/*' container 'staphb/gfastats:1.3.6' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -664,7 +848,7 @@ process gfa_to_fasta { label "process_low" // no publishDir container 'staphb/multiqc:1.19' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -716,44 +900,6 @@ process gfa_to_fasta { """ } -// someday... -// process hybracter { -// tag "${meta.id}" -// label "process_high" -// publishDir "${params.outdir}/${meta.id}", mode: 'copy' -// container 'quay.io/biocontainers/hybracter:0.6.0--pyhdfd78af_0' -// //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} -// time '10h' - -// input: -// tuple val(meta), file(reads), file(illumina) - -// output: -// tuple val(meta), file("bbduk/*_rmphix_R{1,2}.fastq.gz"), emit: fasta -// tuple val(meta), file("bbduk/*_rmphix_R{1,2}.fastq.gz"), emit: gfa -// path "versions.yml", emit: versions - -// when: -// task.ext.when == null || task.ext.when - -// shell: -// def args = task.ext.args ?: '' -// def prefix = task.ext.prefix ?: "${meta.id}" -// """ -// hybracter -h - -// hybracter version - -// exit 1 - -// cat <<-END_VERSIONS > versions.yml -// "${task.process}": -// hybracter: "\$(hybracter --version | awk '{print \$NF}')" -// END_VERSIONS -// exit 1 -// """ -// } - // From https://github.com/nanoporetech/medaka // > It is not recommended to specify a value of --threads greater than 2 for medaka consensus since the compute scaling efficiency is poor beyond this. // > Note also that medaka consensus may been seen to use resources equivalent to + 4 as an additional 4 threads are used for reading and preparing input data. @@ -762,7 +908,7 @@ process medaka { label "process_medium" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'ontresearch/medaka:v1.11.3' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '30m' input: @@ -805,7 +951,7 @@ process multiqc { label "process_low" publishDir "${params.outdir}", mode: 'copy' container 'staphb/multiqc:1.19' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -988,10 +1134,10 @@ process multiqc { process nanoplot_summary { tag "${summary}" - label "process_medium" - publishDir "${params.outdir}/${meta.id}", mode: 'copy' + label "process_low" + publishDir "${params.outdir}/summary", mode: 'copy' container 'staphb/nanoplot:1.42.0' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -1007,12 +1153,12 @@ process nanoplot_summary { shell: def args = task.ext.args ?: '' """ - mkdir -p nanoplot/summary + mkdir -p nanoplot NanoPlot ${args} \ --summary ${summary} \ --threads ${task.cpus} \ - --outdir nanoplot/summary \ + --outdir nanoplot \ --tsv_stats cat <<-END_VERSIONS > versions.yml @@ -1029,7 +1175,7 @@ process nanoplot { label "process_low" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/nanoplot:1.42.0' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -1068,51 +1214,12 @@ process nanoplot { """ } -// someday... -// process ontime { -// tag "${meta.id}" -// label "process_medium" -// publishDir "${params.outdir}/${meta.id}", mode: 'copy' -// container 'staphb/ontime:0.2.3' -// //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} -// time '45m' - -// input: -// tuple val(meta), file(reads) - -// output: -// tuple val(meta), file("bbduk/*_rmphix_R{1,2}.fastq.gz"), emit: fastq -// path "bbduk/*", emit: files -// path "bbduk/*.phix.stats.txt", emit: stats -// path "logs/${task.process}/*.log", emit: log -// path "versions.yml", emit: versions - -// when: -// task.ext.when == null || task.ext.when - -// shell: -// def args = task.ext.args ?: '' -// def prefix = task.ext.prefix ?: "${meta.id}" -// """ -// ontime --version - -// ontime --help - -// cat <<-END_VERSIONS > versions.yml -// "${task.process}": -// ontime: "\$(ontime --version | awk '{print \$NF}')" -// END_VERSIONS - -// exit 1 -// """ -// } - process polypolish { tag "${meta.id}" label "process_medium" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/polypolish:0.6.0' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '45m' input: @@ -1158,7 +1265,7 @@ process pypolca { label "process_medium" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/pypolca:0.3.1' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '30m' input: @@ -1214,7 +1321,7 @@ process rasusa { label "process_medium" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/rasusa:0.8.0' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -1249,7 +1356,7 @@ process raven { label "process_high" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/raven:1.8.3' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10h' input: @@ -1265,7 +1372,7 @@ process raven { task.ext.when == null || task.ext.when shell: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '--polishing-rounds 2' def prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p raven @@ -1288,7 +1395,7 @@ process summary { label "process_single" publishDir "${params.outdir}/summary", mode: 'copy' container 'staphb/multiqc:1.19' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: @@ -1445,7 +1552,7 @@ process unicycler { label "process_high" publishDir "${params.outdir}/${meta.id}", mode: 'copy' container 'staphb/unicycler:0.5.0' - ////errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10h' input: @@ -1489,7 +1596,7 @@ process versions { publishDir "${params.outdir}/summary", mode: 'copy' container 'staphb/multiqc:1.19' time '10m' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: file(input) @@ -1586,11 +1693,66 @@ process versions { """ } +// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + +// Downloading files for testing + +// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + +process test_unicycler { + tag "Downloading Unicycler test files" + label "process_single" + publishDir "${params.outdir}/test_files/unicycler", mode: 'copy' + container 'staphb/multiqc:1.19' + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + time '1h' + + output: + tuple val("unicycler"), file("long_reads_low_depth.fastq.gz"), file("short_reads*.fastq.gz"), emit: fastq + + when: + task.ext.when == null || task.ext.when + + shell: + """ + wget --quiet https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/short_reads_1.fastq.gz + wget --quiet https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/short_reads_2.fastq.gz + wget --quiet https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/long_reads_low_depth.fastq.gz + """ +} + +process test_donut_falls { + tag "Downloading R10.4 reads" + label "process_single" + publishDir "${params.outdir}/test_files/df", mode: 'copy' + container 'staphb/multiqc:1.19' + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + time '1h' + + output: + tuple val("df"), file("test_files/test.fastq.gz"), file("test_files/test_{1,2}.fastq.gz"), emit: fastq + tuple val("dflr"), file("test_files/test.fastq.gz"), emit: lrfastq + + when: + task.ext.when == null || task.ext.when + + shell: + """ + wget --quiet https://zenodo.org/records/10733190/files/df_test_files.tar.gz?download=1 -O dataset.tar.gz + tar -xvf dataset.tar.gz + """ +} + +// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + +// Donut Falls + +// ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### + workflow DONUT_FALLS { take: ch_nanopore_input ch_illumina_input - //ch_gridion_summary main: // channel for gfa files for gfa stats @@ -1603,46 +1765,21 @@ workflow DONUT_FALLS { ch_versions = Channel.empty() if (params.assembler =~ /unicycler/ ) { - unicycler(ch_illumina_input.join(ch_nanopore_input, by: 0 , remainder: false)) + unicycler(ch_illumina_input.join(ch_nanopore_input, by: 0, remainder: false)) ch_gfa = ch_gfa.mix(unicycler.out.gfa) // no ch_summary ch_consensus = ch_consensus.mix(unicycler.out.fasta) ch_versions = ch_versions.mix(unicycler.out.versions.first()) } - - // hybracter and plassembler are on the to-do list - // if (params.assembler =~ /hybracter/ ) { - // hybracter(ch_nanopore_input.join(ch_illumina_input, by: 0 , remainder: true)) - - // ch_gfa = ch_gfa.mix(hybracter.out.gfa) - // // no ch_summary - // ch_consensus = ch_consensus.mix(hybracter.out.fasta) - // ch_versions = ch_versions.mix(hybracter.out.versions.first()) - // } - - if (params.assembler =~ /dragonflye/ ) { - dragonflye(ch_nanopore_input) - - dragonflye.out.summary - .collectFile( - storeDir: "${params.outdir}/summary/", - keepHeader: true, - sort: { file -> file.text }, - name: "dragonflye_summary.tsv") - .set { dragonflye_summary } - ch_gfa = dragonflye.out.gfa - ch_summary = ch_summary.mix(dragonflye_summary) - // no ch_consensus - ch_versions = ch_versions.mix(dragonflye.out.versions.first()) - } if (params.assembler.replaceAll('dragonflye','dragon') =~ /flye/ || params.assembler =~ /raven/ ) { - // quality filter - ch_illumina_input.map { it -> [it[0], it[1], "illumina"]} + ch_illumina_input + .map { it -> [it[0], it[1], "illumina"]} .mix(ch_nanopore_input.map { it -> [it[0], it[1], "nanopore"]}) + .filter{it[0]} .set { ch_input } fastp(ch_input) @@ -1850,21 +1987,64 @@ workflow DONUT_FALLS { // ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### - workflow { -// if (params.ontime) { -// ontime(ch_nanopore_input) -// } + if (params.test) { + + test_unicycler() -// nanoplot_summary(ch_nanoplot_summary) + test_unicycler.out.fastq + .map { it -> + meta = [id:it[0]] + tuple( meta, + file("${it[1]}", checkIfExists: true), + [file("${it[2][0]}", checkIfExists: true), file("${it[2][1]}", checkIfExists: true)]) + } + .set{ ch_unicycler_out } + + test_donut_falls() + + test_donut_falls.out.fastq + .map { it -> + meta = [id:it[0]] + tuple( meta, + file("${it[1]}", checkIfExists: true), + [file("${it[2][0]}", checkIfExists: true), file("${it[2][1]}", checkIfExists: true)]) + } + .set{ ch_test_df_out } + + test_donut_falls.out.lrfastq + .map { it -> + meta = [id:it[0]] + tuple( meta, + file("${it[1]}", checkIfExists: true), + null ) + } + .set{ ch_test_dflr_out } + + ch_unicycler_out + .mix(ch_test_df_out) + .mix(ch_test_dflr_out) + .set { ch_test } + + ch_test + .map{it -> tuple(it[0], it[1])} + .set { ch_test_nanopore } + + ch_test + .filter{ it[2] } + .map{it -> tuple(it[0], it[2])} + .set { ch_test_illumina } + + ch_nanopore_input = ch_nanopore_input.mix(ch_test_nanopore) + ch_illumina_input = ch_illumina_input.mix(ch_test_illumina) + } + + if (params.sequencing_summary) { + nanoplot_summary(ch_nanoplot_summary) + } - DONUT_FALLS( - ch_nanopore_input, - ch_illumina_input.ifEmpty([]) - //, - //ch_nanoplot_summary.ifEmpty([]) - ) + DONUT_FALLS(ch_nanopore_input, ch_illumina_input.ifEmpty([])) } workflow.onComplete {