From f4b76b1207bb35defeca30b5210e3704cd41bffb Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 10 Dec 2024 11:35:20 +0000 Subject: [PATCH 01/14] Starting complement for umi factor-out --- subworkflows/local/bam_dedup_umi/main.nf | 118 +++++++++++++++++++++++ workflows/rnaseq/main.nf | 97 ++++--------------- workflows/rnaseq/nextflow.config | 10 +- 3 files changed, 142 insertions(+), 83 deletions(-) create mode 100644 subworkflows/local/bam_dedup_umi/main.nf diff --git a/subworkflows/local/bam_dedup_umi/main.nf b/subworkflows/local/bam_dedup_umi/main.nf new file mode 100644 index 000000000..3359fa7b6 --- /dev/null +++ b/subworkflows/local/bam_dedup_umi/main.nf @@ -0,0 +1,118 @@ +// +// BAM deduplication with UMI processing +// + +include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' +include { BAM_SORT_STATS_SAMTOOLS } from '../../../subworkflows/nf-core/bam_sort_stats_samtools' +include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' + +workflow BAM_DEDUP_UMI { + take: + ch_genome_bam // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ path(fasta) ] + umi_dedup_tool // string: 'umicollapse' or 'umitools' + umitools_dedup_stats // boolean: whether to generate UMI-tools dedup stats + bam_csi_index // boolean: whether to generate CSI index + ch_transcriptome_bam // + ch_transcript_fasta + + main: + ch_versions = Channel.empty() + + if (umi_dedup_tool == "umicollapse" && umi_dedup_tool != "umitools"){ + error("Unknown umi_dedup_tool '${umi_dedup_tool}'") + } + + // Genome BAM deduplication + if (umi_dedup_tool == "umicollapse") { + BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE ( + ch_genome_bam + ) + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE + ch_dedup_log = UMI_DEDUP_GENOME.out.dedup_stats + + } else if (umi_dedup_tool == "umitools") { + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS ( + ch_genome_bam, + umitools_dedup_stats + ) + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS + ch_dedup_log = UMI_DEDUP_GENOME.out.deduplog + } + + // Co-ordinate sort, index and run stats on transcriptome BAM. This takes + // some preparation- we have to coordinate sort the BAM, run the + // deduplication, then restore name sorting and run a script from umitools + // to prepare for rsem or salmon + + // 1. Coordinate sort + + BAM_SORT_STATS_SAMTOOLS ( + ch_transcriptome_bam, + ch_transcript_fasta.map { [ [:], it ] } + ) + ch_sorted_transcriptome_bam = BAM_SORT_STATS_SAMTOOLS.out.bam + .join(BAM_SORT_STATS_SAMTOOLS.out.bai) + + // 2. Transcriptome BAM deduplication + if (umi_dedup_tool == "umicollapse") { + BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE ( + ch_sorted_transcriptome_bam + ) + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE + ch_dedup_log = UMI_DEDUP_GENOME.out.dedup_stats + + } else if (umi_dedup_tool == "umitools") { + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS ( + ch_sorted_transcriptome_bam, + umitools_dedup_stats + ) + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS + ch_dedup_log = UMI_DEDUP_GENOME.out.deduplog + } + + // 3. Restore name sorting + SAMTOOLS_SORT ( + UMI_DEDUP_TRANSCRIPTOME.out.bam, + ch_fasta.map { [ [:], it ] } + ) + + // 4. Run prepare_for_rsem.py on paired-end BAM files + // This fixes paired-end reads in name sorted BAM files + // See: https://github.com/nf-core/rnaseq/issues/828 + ended_transcriptome_dedup_bam = SAMTOOLS_SORT.out.bam + .branch { + meta, bam -> + single_end: meta.single_end + return [ meta, bam ] + paired_end: !meta.single_end + return [ meta, bam ] + } + + UMITOOLS_PREPAREFORSALMON ( + ended_transcriptome_dedup_bam.paired_end + .map { meta, bam -> [ meta, bam, [] ] } + ) + + ch_dedup_transcriptome_bam = ch_transcriptome_bam + .single_end + .mix(UMITOOLS_PREPAREFORSALMON.out.bam) + + // Record versions + + ch_versions = UMI_DEDUP_GENOME.out.versions + .mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + .mix(UMITOOLS_PREPAREFORSALMON.out.versions) + + emit: + bam = UMI_DEDUP_GENOME.out.bam // channel: [ val(meta), path(bam) ] + bam_index = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] + dedup_log = ch_dedup_log // channel: [ val(meta), path(log) ] + stats = UMI_DEDUP_GENOME.out.stats + flagstat = UMI_DEDUP_GENOME.out.flagstat + idxstats = UMI_DEDUP_GENOME.out.idxstats + transcriptome_bam = ch_dedup_transcriptome_bam // channel: [ val(meta), path(bam) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 2e0d7d161..4328ec3a7 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -17,6 +17,7 @@ include { MULTIQC_CUSTOM_BIOTYPE } from '../../modules/local/multiqc // include { ALIGN_STAR } from '../../subworkflows/local/align_star' include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' +include { BAM_DEDUP_UMI } from '../../subworkflows/local/bam_dedup_umi' include { checkSamplesAfterGrouping } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' include { multiqcTsvFromList } from '../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness' include { getStarPercentMapped } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' @@ -218,88 +219,28 @@ workflow RNASEQ { // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs // if (params.with_umi) { - // Deduplicate genome BAM file before downstream analysis - if (params.umi_dedup_tool == "umicollapse") { - BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]) - ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.dedup_stats.collect{it[1]}.ifEmpty([])) - } else if (params.umi_dedup_tool == "umitools") { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - params.umitools_dedup_stats - ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.deduplog.collect{it[1]}) - } else { - error("Unknown umi_dedup_tool '${params.umi_dedup_tool}'") - } - ch_genome_bam = UMI_DEDUP_GENOME.out.bam - ch_genome_bam_index = UMI_DEDUP_GENOME.out.bai - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.stats.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.flagstat.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.idxstats.collect{it[1]}) - if (params.bam_csi_index) { - ch_genome_bam_index = UMI_DEDUP_GENOME.out.csi - } - ch_versions = ch_versions.mix(UMI_DEDUP_GENOME.out.versions) - - // Co-ordinate sort, index and run stats on transcriptome BAM - BAM_SORT_STATS_SAMTOOLS ( - ch_transcriptome_bam, - ch_transcript_fasta.map { [ [:], it ] } + BAM_DEDUP_UMI( + ch_genome_bam.join(ch_genome_bam_index, by: [0]), + ch_fasta, + params.umi_dedup_tool, + params.umitools_dedup_stats, + params.bam_csi_index, + BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai, by: [0]) ) - ch_transcriptome_sorted_bam = BAM_SORT_STATS_SAMTOOLS.out.bam - ch_transcriptome_sorted_bai = BAM_SORT_STATS_SAMTOOLS.out.bai - // Deduplicate transcriptome BAM file before read counting with Salmon - if (params.umi_dedup_tool == "umicollapse") { - BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME ( - ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]) + ch_genome_bam = BAM_DEDUP_UMI.out.bam + ch_transcriptome_bam = BAM_DEDUP_UMI.out.bam + ch_genome_bam_index = BAM_DEDUP_UMI.out.bai + ch_versions = BAM_DEDUP_UMI.out.versions + + ch_multiqc_files = ch_multiqc_files + .mix( + BAM_DEDUP_UMI.dedup_log + .concat(BAM_DEDUP_UMI.out.stats) + .concat(BAM_DEDUP_UMI.out.flagstat) + .concat(BAM_DEDUP_UMI.out.idxstats) ) - UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME - } else if (params.umi_dedup_tool == "umitools") { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( - ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]), - params.umitools_dedup_stats - ) - UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME - } else { - error("Unknown umi_dedup_tool '${params.umi_dedup_tool}'") - } - - // Name sort BAM before passing to Salmon - SAMTOOLS_SORT ( - UMI_DEDUP_TRANSCRIPTOME.out.bam, - ch_fasta.map { [ [:], it ] } - ) - - // Only run prepare_for_rsem.py on paired-end BAM files - SAMTOOLS_SORT - .out - .bam - .branch { - meta, bam -> - single_end: meta.single_end - return [ meta, bam ] - paired_end: !meta.single_end - return [ meta, bam ] - } - .set { ch_dedup_bam } - - // Fix paired-end reads in name sorted BAM file - // See: https://github.com/nf-core/rnaseq/issues/828 - UMITOOLS_PREPAREFORSALMON ( - ch_dedup_bam.paired_end.map { meta, bam -> [ meta, bam, [] ] } - ) - ch_versions = ch_versions.mix(UMITOOLS_PREPAREFORSALMON.out.versions.first()) - - ch_dedup_bam - .single_end - .mix(UMITOOLS_PREPAREFORSALMON.out.bam) - .set { ch_transcriptome_bam } } // diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index 5c51aa2a9..02d60e7e3 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -102,7 +102,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { if (params.with_umi) { process { - withName: 'NFCORE_RNASEQ:RNASEQ:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:SAMTOOLS_SORT' { ext.args = '-n' ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } publishDir = [ @@ -113,7 +113,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:UMITOOLS_PREPAREFORSALMON' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:UMITOOLS_PREPAREFORSALMON' { ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" } publishDir = [ [ @@ -130,7 +130,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.transcriptome.sorted" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, @@ -140,7 +140,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, @@ -149,7 +149,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.transcriptome.sorted.bam" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}/samtools_stats" : params.outdir }, From 9603f3da79fd8cc63da18d31f8479e304c8b8819 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 10 Dec 2024 16:32:20 +0000 Subject: [PATCH 02/14] Fixes with local subworkflow --- subworkflows/local/bam_dedup_umi/main.nf | 78 ++++++++++++++--------- workflows/rnaseq/main.nf | 79 ++++++++++-------------- workflows/rnaseq/nextflow.config | 10 +-- 3 files changed, 88 insertions(+), 79 deletions(-) diff --git a/subworkflows/local/bam_dedup_umi/main.nf b/subworkflows/local/bam_dedup_umi/main.nf index 3359fa7b6..a7f76f9a3 100644 --- a/subworkflows/local/bam_dedup_umi/main.nf +++ b/subworkflows/local/bam_dedup_umi/main.nf @@ -2,21 +2,23 @@ // BAM deduplication with UMI processing // -include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' -include { BAM_SORT_STATS_SAMTOOLS } from '../../../subworkflows/nf-core/bam_sort_stats_samtools' -include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem' -include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' +include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' +include { BAM_SORT_STATS_SAMTOOLS } from '../../../subworkflows/nf-core/bam_sort_stats_samtools' +include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' workflow BAM_DEDUP_UMI { take: ch_genome_bam // channel: [ val(meta), path(bam), path(bai) ] - ch_fasta // channel: [ path(fasta) ] + ch_fasta // channel: [ val(meta), path(fasta) ] umi_dedup_tool // string: 'umicollapse' or 'umitools' umitools_dedup_stats // boolean: whether to generate UMI-tools dedup stats bam_csi_index // boolean: whether to generate CSI index - ch_transcriptome_bam // - ch_transcript_fasta + ch_transcriptome_bam // channel: [ val(meta), path(bam) ] + ch_transcript_fasta // channel: [ val(meta), path(fasta) ] main: ch_versions = Channel.empty() @@ -27,18 +29,18 @@ workflow BAM_DEDUP_UMI { // Genome BAM deduplication if (umi_dedup_tool == "umicollapse") { - BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE ( + BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ( ch_genome_bam ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ch_dedup_log = UMI_DEDUP_GENOME.out.dedup_stats } else if (umi_dedup_tool == "umitools") { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS ( + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( ch_genome_bam, umitools_dedup_stats ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS + UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ch_dedup_log = UMI_DEDUP_GENOME.out.deduplog } @@ -51,32 +53,32 @@ workflow BAM_DEDUP_UMI { BAM_SORT_STATS_SAMTOOLS ( ch_transcriptome_bam, - ch_transcript_fasta.map { [ [:], it ] } + ch_transcript_fasta ) ch_sorted_transcriptome_bam = BAM_SORT_STATS_SAMTOOLS.out.bam .join(BAM_SORT_STATS_SAMTOOLS.out.bai) // 2. Transcriptome BAM deduplication if (umi_dedup_tool == "umicollapse") { - BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE ( + BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME ( ch_sorted_transcriptome_bam ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE - ch_dedup_log = UMI_DEDUP_GENOME.out.dedup_stats + UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME + ch_dedup_log = dedup_log.mix(UMI_DEDUP_GENOME.out.dedup_stats) } else if (umi_dedup_tool == "umitools") { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS ( + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( ch_sorted_transcriptome_bam, umitools_dedup_stats ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS - ch_dedup_log = UMI_DEDUP_GENOME.out.deduplog + UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME + ch_dedup_log = ch_dedup_log.mix(UMI_DEDUP_GENOME.out.deduplog) } // 3. Restore name sorting SAMTOOLS_SORT ( UMI_DEDUP_TRANSCRIPTOME.out.bam, - ch_fasta.map { [ [:], it ] } + ch_fasta ) // 4. Run prepare_for_rsem.py on paired-end BAM files @@ -91,28 +93,46 @@ workflow BAM_DEDUP_UMI { return [ meta, bam ] } - UMITOOLS_PREPAREFORSALMON ( + UMITOOLS_PREPAREFORRSEM ( ended_transcriptome_dedup_bam.paired_end .map { meta, bam -> [ meta, bam, [] ] } ) - ch_dedup_transcriptome_bam = ch_transcriptome_bam - .single_end - .mix(UMITOOLS_PREPAREFORSALMON.out.bam) + ch_dedup_transcriptome_bam = ended_transcriptome_dedup_bam.single_end + .mix(UMITOOLS_PREPAREFORRSEM.out.bam) + + // Collect files useful for MultiQC into one helpful emission + + ch_stats = UMI_DEDUP_GENOME.out.stats + .mix(UMI_DEDUP_TRANSCRIPTOME.out.stats) + + ch_flagstat = UMI_DEDUP_GENOME.out.flagstat + .mix(UMI_DEDUP_TRANSCRIPTOME.out.flagstat) + + ch_idxstats = UMI_DEDUP_GENOME.out.idxstats + .mix(UMI_DEDUP_TRANSCRIPTOME.out.idxstats) + + ch_multiqc_files = ch_dedup_log + .mix(ch_stats) + .mix(ch_flagstat) + .mix(ch_idxstats) + .transpose() + .map{it[1]} // Record versions ch_versions = UMI_DEDUP_GENOME.out.versions .mix(BAM_SORT_STATS_SAMTOOLS.out.versions) - .mix(UMITOOLS_PREPAREFORSALMON.out.versions) + .mix(UMITOOLS_PREPAREFORRSEM.out.versions) emit: bam = UMI_DEDUP_GENOME.out.bam // channel: [ val(meta), path(bam) ] - bam_index = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] + bai = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] dedup_log = ch_dedup_log // channel: [ val(meta), path(log) ] - stats = UMI_DEDUP_GENOME.out.stats - flagstat = UMI_DEDUP_GENOME.out.flagstat - idxstats = UMI_DEDUP_GENOME.out.idxstats + stats = ch_stats + flagstat = ch_flagstat + idxstats = ch_idxstats + multiqc_files = ch_multiqc_files transcriptome_bam = ch_dedup_transcriptome_bam // channel: [ val(meta), path(bam) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 4328ec3a7..6db5dfa15 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -15,9 +15,11 @@ include { MULTIQC_CUSTOM_BIOTYPE } from '../../modules/local/multiqc // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { ALIGN_STAR } from '../../subworkflows/local/align_star' -include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' -include { BAM_DEDUP_UMI } from '../../subworkflows/local/bam_dedup_umi' +include { ALIGN_STAR } from '../../subworkflows/local/align_star' +include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' +include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_STAR } from '../../subworkflows/local/bam_dedup_umi' +include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_HISAT } from '../../subworkflows/local/bam_dedup_umi' + include { checkSamplesAfterGrouping } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' include { multiqcTsvFromList } from '../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness' include { getStarPercentMapped } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' @@ -35,7 +37,6 @@ include { methodsDescriptionText } from '../../subworkflows/local/utils_ // MODULE: Installed directly from nf-core/modules // include { DUPRADAR } from '../../modules/nf-core/dupradar' -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort' include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap' include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq' include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie' @@ -43,7 +44,6 @@ include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featu include { KRAKEN2_KRAKEN2 as KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main' include { BRACKEN_BRACKEN as BRACKEN } from '../../modules/nf-core/bracken/bracken/main' include { MULTIQC } from '../../modules/nf-core/multiqc' -include { UMITOOLS_PREPAREFORRSEM as UMITOOLS_PREPAREFORSALMON } from '../../modules/nf-core/umitools/prepareforrsem' include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_FW } from '../../modules/nf-core/bedtools/genomecov' include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_REV } from '../../modules/nf-core/bedtools/genomecov' @@ -220,27 +220,23 @@ workflow RNASEQ { // if (params.with_umi) { - BAM_DEDUP_UMI( + BAM_DEDUP_UMI_STAR( ch_genome_bam.join(ch_genome_bam_index, by: [0]), - ch_fasta, + ch_fasta.map { [ [:], it ] }, params.umi_dedup_tool, params.umitools_dedup_stats, params.bam_csi_index, - BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai, by: [0]) + ch_transcriptome_bam, + ch_transcript_fasta.map { [ [:], it ] } ) - ch_genome_bam = BAM_DEDUP_UMI.out.bam - ch_transcriptome_bam = BAM_DEDUP_UMI.out.bam - ch_genome_bam_index = BAM_DEDUP_UMI.out.bai - ch_versions = BAM_DEDUP_UMI.out.versions - - ch_multiqc_files = ch_multiqc_files - .mix( - BAM_DEDUP_UMI.dedup_log - .concat(BAM_DEDUP_UMI.out.stats) - .concat(BAM_DEDUP_UMI.out.flagstat) - .concat(BAM_DEDUP_UMI.out.idxstats) - ) + ch_genome_bam = BAM_DEDUP_UMI_STAR.out.bam + ch_transcriptome_bam = BAM_DEDUP_UMI_STAR.out.transcriptome_bam + ch_genome_bam_index = BAM_DEDUP_UMI_STAR.out.bai + ch_versions = BAM_DEDUP_UMI_STAR.out.versions + + ch_multiqc_files = ch_multiqc_files + .mix(BAM_DEDUP_UMI_STAR.out.multiqc_files) } // @@ -335,32 +331,25 @@ workflow RNASEQ { // // SUBWORKFLOW: Remove duplicate reads from BAM file based on UMIs // + if (params.with_umi) { - if (params.umi_dedup_tool == "umicollapse") { - BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.dedup_stats.collect{it[1]}.ifEmpty([])) - } else if (params.umi_dedup_tool == "umitools") { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME ( - ch_genome_bam.join(ch_genome_bam_index, by: [0]), - params.umitools_dedup_stats - ) - UMI_DEDUP_GENOME = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.deduplog.collect{it[1]}) - } else { - error("Unknown umi_dedup_tool '${params.umi_dedup_tool}'") - } - ch_genome_bam = UMI_DEDUP_GENOME.out.bam - ch_genome_bam_index = UMI_DEDUP_GENOME.out.bai - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.stats.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.flagstat.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(UMI_DEDUP_GENOME.out.idxstats.collect{it[1]}) - if (params.bam_csi_index) { - ch_genome_bam_index = UMI_DEDUP_GENOME.out.csi - } - ch_versions = ch_versions.mix(UMI_DEDUP_GENOME.out.versions) + + BAM_DEDUP_UMI_HISAT2( + ch_genome_bam.join(ch_genome_bam_index, by: [0]), + ch_fasta.map { [ [:], it ] }, + params.umi_dedup_tool, + params.umitools_dedup_stats, + params.bam_csi_index, + [[],[]], + [[],[]] + ) + + ch_genome_bam = BAM_DEDUP_UMI_HISAT2.out.bam + ch_genome_bam_index = BAM_DEDUP_UMI_HISAT2.out.bai + ch_versions = BAM_DEDUP_UMI_HISAT2.out.versions + + ch_multiqc_files = ch_multiqc_files + .mix(BAM_DEDUP_UMI_HISAT2.out.multiqc_files) } } diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index 02d60e7e3..2eef40849 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -102,7 +102,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { if (params.with_umi) { process { - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):SAMTOOLS_SORT' { ext.args = '-n' ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } publishDir = [ @@ -113,7 +113,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:UMITOOLS_PREPAREFORSALMON' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):UMITOOLS_PREPAREFORRSEM' { ext.prefix = { "${meta.id}.umi_dedup.transcriptome.filtered" } publishDir = [ [ @@ -130,7 +130,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.transcriptome.sorted" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, @@ -140,7 +140,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, @@ -149,7 +149,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.transcriptome.sorted.bam" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}/samtools_stats" : params.outdir }, From d2fd88515d70ab1736b06f2382688628951cbcaa Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 10 Dec 2024 16:34:05 +0000 Subject: [PATCH 03/14] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5c417dd3..488ab2da7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Special thanks to the following for their contributions to the release: - [PR #1369](https://github.com/nf-core/rnaseq/pull/1369) - Add umicollapse as an alternative to umi-tools - [PR #1461](https://github.com/nf-core/rnaseq/pull/1461) - Add FASTQ linting during preprocessing - [PR #1463](https://github.com/nf-core/rnaseq/pull/1463) - Move channel operations outside of the onComplete() block +- [PR #1466](https://github.com/nf-core/rnaseq/pull/1466) - Factor out UMI handling ### Software dependencies From c01791630139c3e4948f1e18452ee3c4abec72fc Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 09:46:22 +0000 Subject: [PATCH 04/14] Fix subworkflow alias --- workflows/rnaseq/main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 62764d343..131a86dd1 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -15,10 +15,10 @@ include { MULTIQC_CUSTOM_BIOTYPE } from '../../modules/local/multiqc // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { ALIGN_STAR } from '../../subworkflows/local/align_star' -include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' -include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_STAR } from '../../subworkflows/local/bam_dedup_umi' -include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_HISAT } from '../../subworkflows/local/bam_dedup_umi' +include { ALIGN_STAR } from '../../subworkflows/local/align_star' +include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' +include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_STAR } from '../../subworkflows/local/bam_dedup_umi' +include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_HISAT2 } from '../../subworkflows/local/bam_dedup_umi' include { checkSamplesAfterGrouping } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' include { multiqcTsvFromList } from '../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness' From 42f3fa7147846a9f6bfc53b2cd6f32d12431090a Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 09:49:57 +0000 Subject: [PATCH 05/14] Fix linting --- workflows/rnaseq/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 131a86dd1..bece803c1 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -354,7 +354,7 @@ workflow RNASEQ { ch_multiqc_files = ch_multiqc_files .mix(BAM_DEDUP_UMI_HISAT2.out.multiqc_files) } else { - + // The deduplicated stats should take priority for MultiQC, but use // them straight out of the aligner otherwise ch_multiqc_files = ch_multiqc_files From d3cc50d77aa30379b257df16bd1ab25d60cf0d20 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 10:26:40 +0000 Subject: [PATCH 06/14] Fix version mixing --- workflows/rnaseq/main.nf | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index bece803c1..5d4085b81 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -181,6 +181,8 @@ workflow RNASEQ { ch_genome_bam_index = Channel.empty() ch_star_log = Channel.empty() ch_unaligned_sequences = Channel.empty() + ch_transcriptome_bam = Channel.empty() + if (!params.skip_alignment && params.aligner == 'star_salmon') { // Check if an AWS iGenome has been provided to use the appropriate version of STAR def is_aws_igenome = false @@ -230,7 +232,7 @@ workflow RNASEQ { ch_genome_bam = BAM_DEDUP_UMI_STAR.out.bam ch_transcriptome_bam = BAM_DEDUP_UMI_STAR.out.transcriptome_bam ch_genome_bam_index = BAM_DEDUP_UMI_STAR.out.bai - ch_versions = BAM_DEDUP_UMI_STAR.out.versions + ch_versions = ch_versions.mix(BAM_DEDUP_UMI_STAR.out.versions) ch_multiqc_files = ch_multiqc_files .mix(BAM_DEDUP_UMI_STAR.out.multiqc_files) @@ -343,13 +345,13 @@ workflow RNASEQ { params.umi_dedup_tool, params.umitools_dedup_stats, params.bam_csi_index, - [[],[]], - [[],[]] + ch_transcriptome_bam, + ch_transcript_fasta.map { [ [:], it ] } ) ch_genome_bam = BAM_DEDUP_UMI_HISAT2.out.bam ch_genome_bam_index = BAM_DEDUP_UMI_HISAT2.out.bai - ch_versions = BAM_DEDUP_UMI_HISAT2.out.versions + ch_versions = ch_versions.mix(BAM_DEDUP_UMI_HISAT2.out.versions) ch_multiqc_files = ch_multiqc_files .mix(BAM_DEDUP_UMI_HISAT2.out.multiqc_files) From 476902522a6d8cfa6590229a17c115110977949e Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 10:26:56 +0000 Subject: [PATCH 07/14] fix selector --- workflows/rnaseq/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index 2eef40849..38fa58c50 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -130,7 +130,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.transcriptome.sorted" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, From 742be2fc8e3d8f556ee7298b2a12199671a7c031 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 10:27:17 +0000 Subject: [PATCH 08/14] misc --- subworkflows/local/bam_dedup_umi/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bam_dedup_umi/main.nf b/subworkflows/local/bam_dedup_umi/main.nf index a7f76f9a3..9390a0c43 100644 --- a/subworkflows/local/bam_dedup_umi/main.nf +++ b/subworkflows/local/bam_dedup_umi/main.nf @@ -23,7 +23,7 @@ workflow BAM_DEDUP_UMI { main: ch_versions = Channel.empty() - if (umi_dedup_tool == "umicollapse" && umi_dedup_tool != "umitools"){ + if (umi_dedup_tool != "umicollapse" && umi_dedup_tool != "umitools"){ error("Unknown umi_dedup_tool '${umi_dedup_tool}'") } @@ -64,7 +64,7 @@ workflow BAM_DEDUP_UMI { ch_sorted_transcriptome_bam ) UMI_DEDUP_TRANSCRIPTOME = BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME - ch_dedup_log = dedup_log.mix(UMI_DEDUP_GENOME.out.dedup_stats) + ch_dedup_log = ch_dedup_log.mix(UMI_DEDUP_GENOME.out.dedup_stats) } else if (umi_dedup_tool == "umitools") { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( From 9f06d707012e243e2fa86dc11ce6f7182cacd187 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 10:52:28 +0000 Subject: [PATCH 09/14] Fix process name in snap --- tests/umi.nf.test.snap | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/umi.nf.test.snap b/tests/umi.nf.test.snap index b12fa48d0..fd8700918 100644 --- a/tests/umi.nf.test.snap +++ b/tests/umi.nf.test.snap @@ -133,7 +133,7 @@ "UMITOOLS_EXTRACT": { "umitools": "1.1.5" }, - "UMITOOLS_PREPAREFORSALMON": { + "UMITOOLS_PREPAREFORRSEM": { "umitools": "1.1.5" }, "UNTAR_SALMON_INDEX": { @@ -2809,4 +2809,4 @@ }, "timestamp": "2024-12-11T18:08:48.404716766" } -} \ No newline at end of file +} From dbd3343b5077c2b9dfd033921f5a475e8323e6c1 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 10:52:47 +0000 Subject: [PATCH 10/14] Remove unneeded subworkflow include --- workflows/rnaseq/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 5d4085b81..63c3678de 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -55,7 +55,6 @@ include { samplesheetToList } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' include { FASTQ_ALIGN_HISAT2 } from '../../subworkflows/nf-core/fastq_align_hisat2' -include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' include { BAM_MARKDUPLICATES_PICARD } from '../../subworkflows/nf-core/bam_markduplicates_picard' include { BAM_RSEQC } from '../../subworkflows/nf-core/bam_rseqc' include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' From 1bc1b73bdf7ebd6a201402eb02c47a3976d8fec3 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 10:53:15 +0000 Subject: [PATCH 11/14] Fix more config selectors --- workflows/rnaseq/nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index 38fa58c50..f29916354 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -140,7 +140,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, @@ -149,7 +149,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } - withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_(STAR|HISAT2):BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_RNASEQ:RNASEQ:BAM_DEDUP_UMI_STAR:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.transcriptome.sorted.bam" } publishDir = [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}/samtools_stats" : params.outdir }, From 9196038ee8d06cbebde598ce7af37232c4833615 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 11:14:33 +0000 Subject: [PATCH 12/14] Don't mix transcriptome bam stats with genome ones for multiqc --- subworkflows/local/bam_dedup_umi/main.nf | 25 +++++++++--------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/subworkflows/local/bam_dedup_umi/main.nf b/subworkflows/local/bam_dedup_umi/main.nf index 9390a0c43..297267e1e 100644 --- a/subworkflows/local/bam_dedup_umi/main.nf +++ b/subworkflows/local/bam_dedup_umi/main.nf @@ -101,21 +101,14 @@ workflow BAM_DEDUP_UMI { ch_dedup_transcriptome_bam = ended_transcriptome_dedup_bam.single_end .mix(UMITOOLS_PREPAREFORRSEM.out.bam) - // Collect files useful for MultiQC into one helpful emission - - ch_stats = UMI_DEDUP_GENOME.out.stats - .mix(UMI_DEDUP_TRANSCRIPTOME.out.stats) - - ch_flagstat = UMI_DEDUP_GENOME.out.flagstat - .mix(UMI_DEDUP_TRANSCRIPTOME.out.flagstat) - - ch_idxstats = UMI_DEDUP_GENOME.out.idxstats - .mix(UMI_DEDUP_TRANSCRIPTOME.out.idxstats) + // Collect files useful for MultiQC into one helpful emission. Don't + // automatically add transcriptome stats- difficult to separate in multiqc + // without a bit more work ch_multiqc_files = ch_dedup_log - .mix(ch_stats) - .mix(ch_flagstat) - .mix(ch_idxstats) + .mix(UMI_DEDUP_GENOME.out.stats) + .mix(UMI_DEDUP_GENOME.out.flagstat) + .mix(UMI_DEDUP_GENOME.out.idxstats) .transpose() .map{it[1]} @@ -129,9 +122,9 @@ workflow BAM_DEDUP_UMI { bam = UMI_DEDUP_GENOME.out.bam // channel: [ val(meta), path(bam) ] bai = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] dedup_log = ch_dedup_log // channel: [ val(meta), path(log) ] - stats = ch_stats - flagstat = ch_flagstat - idxstats = ch_idxstats + stats = UMI_DEDUP_GENOME.out.stats.mix(UMI_DEDUP_TRANSCRIPTOME.out.stats) + flagstat = UMI_DEDUP_GENOME.out.flagstat.mix(UMI_DEDUP_TRANSCRIPTOME.out.flagstat) + idxstats = UMI_DEDUP_GENOME.out.idxstats.mix(UMI_DEDUP_TRANSCRIPTOME.out.idxstats) multiqc_files = ch_multiqc_files transcriptome_bam = ch_dedup_transcriptome_bam // channel: [ val(meta), path(bam) ] versions = ch_versions // channel: [ path(versions.yml) ] From bc3cffbd2def663d1c37ccb7bb6ea0d1cf931c20 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 17:10:43 +0000 Subject: [PATCH 13/14] Remove method from config, tidy up --- workflows/rnaseq/nextflow.config | 45 +++++++++++++------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index f29916354..e7a6290fa 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -21,31 +21,6 @@ includeConfig "../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/nextf includeConfig "../../subworkflows/nf-core/fastq_subsample_fq_salmon/nextflow.config" includeConfig "../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/nextflow.config" -def umi_dedup_args() { - if (params.umi_dedup_tool == "umicollapse") { - def algo = params.umitools_grouping_method - if (params.umitools_grouping_method == 'directional') { - algo = 'dir' - } else if (params.umitools_grouping_method == 'adjacency') { - algo = 'adj' - } else if (params.umitools_grouping_method == 'cluster') { - algo = 'cc' - } - return { [ - '--two-pass', - meta.single_end ? '' : '--paired --remove-unpaired --remove-chimeric', - params.umitools_grouping_method ? "--algo '${algo}'" : '', - params.umitools_umi_separator ? "--umi-sep '${params.umitools_umi_separator}'" : '' - ].join(' ').trim() } - } else { - return { [ - meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', - params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', - params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' - ].join(' ').trim() } - } -} - // // STAR Salmon alignment options // @@ -160,7 +135,6 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { } withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_TRANSCRIPTOME:UMI(COLLAPSE|TOOLS_DEDUP)' { - ext.args = umi_dedup_args() ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted" } publishDir = [ [ @@ -249,8 +223,25 @@ if (!params.skip_alignment) { if (params.with_umi && ['star_salmon','hisat2'].contains(params.aligner)) { process { + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_(GEN|TRANSCRIPT)OME:UMITOOLS_DEDUP' { + ext.args = { [ + meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', + params.umitools_grouping_method ? "--method='${params.umitools_grouping_method}'" : '', + params.umitools_umi_separator ? "--umi-separator='${params.umitools_umi_separator}'" : '' + ].join(' ').trim() } + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_(GEN|TRANSCRIPT)OME:UMICOLLAPSE' { + ext.args = { [ + '--two-pass', + meta.single_end ? '' : '--paired --remove-unpaired --remove-chimeric', + params.umitools_grouping_method ? "--algo '" + ['directional':'dir','adjacency':'adj','cluster':'cc'].get(params.umitools_grouping_method, '') + "'" : '', + params.umitools_umi_separator ? "--umi-sep '${params.umitools_umi_separator}'" : '', + ].join(' ').trim()} + } + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_GENOME:UMI(COLLAPSE|TOOLS_DEDUP)' { - ext.args = umi_dedup_args() ext.prefix = { "${meta.id}.umi_dedup.sorted" } publishDir = [ [ From 83777206e000b0ed2e89357f3899a4e3ea9d222e Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 12 Dec 2024 17:11:13 +0000 Subject: [PATCH 14/14] umi workflow from nf-core --- modules.json | 13 +- .../{local => nf-core}/bam_dedup_umi/main.nf | 33 ++-- subworkflows/nf-core/bam_dedup_umi/meta.yml | 181 ++++++++++++++++++ .../nf-core/bam_dedup_umi/tests/main.nf.test | 99 ++++++++++ .../bam_dedup_umi/tests/main.nf.test.snap | 109 +++++++++++ .../bam_dedup_umi/tests/nextflow.config | 38 ++++ workflows/rnaseq/main.nf | 8 +- 7 files changed, 455 insertions(+), 26 deletions(-) rename subworkflows/{local => nf-core}/bam_dedup_umi/main.nf (75%) create mode 100644 subworkflows/nf-core/bam_dedup_umi/meta.yml create mode 100644 subworkflows/nf-core/bam_dedup_umi/tests/main.nf.test create mode 100644 subworkflows/nf-core/bam_dedup_umi/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/bam_dedup_umi/tests/nextflow.config diff --git a/modules.json b/modules.json index 43fdc1f9a..8957c2ef4 100644 --- a/modules.json +++ b/modules.json @@ -212,7 +212,7 @@ "samtools/sort": { "branch": "master", "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", - "installed_by": ["bam_sort_stats_samtools"] + "installed_by": ["bam_dedup_umi", "bam_sort_stats_samtools"] }, "samtools/stats": { "branch": "master", @@ -290,7 +290,7 @@ "umitools/prepareforrsem": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": ["bam_dedup_umi", "modules"] }, "untar": { "branch": "master", @@ -304,11 +304,16 @@ "bam_dedup_stats_samtools_umicollapse": { "branch": "master", "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", - "installed_by": ["subworkflows"] + "installed_by": ["bam_dedup_umi", "subworkflows"] }, "bam_dedup_stats_samtools_umitools": { "branch": "master", "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", + "installed_by": ["bam_dedup_umi", "subworkflows"] + }, + "bam_dedup_umi": { + "branch": "master", + "git_sha": "b5828f47c17c41ce3a4c70b863c99207e3f6d37c", "installed_by": ["subworkflows"] }, "bam_markduplicates_picard": { @@ -324,7 +329,7 @@ "bam_sort_stats_samtools": { "branch": "master", "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", - "installed_by": ["fastq_align_hisat2"] + "installed_by": ["bam_dedup_umi", "fastq_align_hisat2"] }, "bam_stats_samtools": { "branch": "master", diff --git a/subworkflows/local/bam_dedup_umi/main.nf b/subworkflows/nf-core/bam_dedup_umi/main.nf similarity index 75% rename from subworkflows/local/bam_dedup_umi/main.nf rename to subworkflows/nf-core/bam_dedup_umi/main.nf index 297267e1e..d091a5824 100644 --- a/subworkflows/local/bam_dedup_umi/main.nf +++ b/subworkflows/nf-core/bam_dedup_umi/main.nf @@ -2,13 +2,14 @@ // BAM deduplication with UMI processing // -include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' -include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' -include { BAM_SORT_STATS_SAMTOOLS } from '../../../subworkflows/nf-core/bam_sort_stats_samtools' -include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem' -include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME } from '../bam_dedup_stats_samtools_umicollapse' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../bam_dedup_stats_samtools_umitools' +include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME } from '../bam_dedup_stats_samtools_umicollapse' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../bam_dedup_stats_samtools_umitools' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools' + +include { UMITOOLS_PREPAREFORRSEM } from '../../../modules/nf-core/umitools/prepareforrsem' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' workflow BAM_DEDUP_UMI { take: @@ -119,13 +120,13 @@ workflow BAM_DEDUP_UMI { .mix(UMITOOLS_PREPAREFORRSEM.out.versions) emit: - bam = UMI_DEDUP_GENOME.out.bam // channel: [ val(meta), path(bam) ] - bai = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] - dedup_log = ch_dedup_log // channel: [ val(meta), path(log) ] - stats = UMI_DEDUP_GENOME.out.stats.mix(UMI_DEDUP_TRANSCRIPTOME.out.stats) - flagstat = UMI_DEDUP_GENOME.out.flagstat.mix(UMI_DEDUP_TRANSCRIPTOME.out.flagstat) - idxstats = UMI_DEDUP_GENOME.out.idxstats.mix(UMI_DEDUP_TRANSCRIPTOME.out.idxstats) - multiqc_files = ch_multiqc_files - transcriptome_bam = ch_dedup_transcriptome_bam // channel: [ val(meta), path(bam) ] - versions = ch_versions // channel: [ path(versions.yml) ] + bam = UMI_DEDUP_GENOME.out.bam // channel: [ val(meta), path(bam) ] + bai = bam_csi_index ? UMI_DEDUP_GENOME.out.csi : UMI_DEDUP_GENOME.out.bai // channel: [ val(meta), path(bai) ] + dedup_log = ch_dedup_log // channel: [ val(meta), path(log) ] + stats = UMI_DEDUP_GENOME.out.stats.mix(UMI_DEDUP_TRANSCRIPTOME.out.stats) // channel: [ val(meta), path(stats)] + flagstat = UMI_DEDUP_GENOME.out.flagstat.mix(UMI_DEDUP_TRANSCRIPTOME.out.flagstat) // channel: [ val(meta), path(flagstat)] + idxstats = UMI_DEDUP_GENOME.out.idxstats.mix(UMI_DEDUP_TRANSCRIPTOME.out.idxstats) // channel: [ val(meta), path(idxstats)] + multiqc_files = ch_multiqc_files // channel: file + transcriptome_bam = ch_dedup_transcriptome_bam // channel: [ val(meta), path(bam) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/nf-core/bam_dedup_umi/meta.yml b/subworkflows/nf-core/bam_dedup_umi/meta.yml new file mode 100644 index 000000000..f88fa2f1c --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_umi/meta.yml @@ -0,0 +1,181 @@ +name: "bam_dedup_umi" +description: BAM deduplication with UMI processing for both genome and transcriptome alignments +keywords: + - deduplication + - UMI + - BAM + - genome + - transcriptome + - umicollapse + - umitools + +components: + - umitools/prepareforrsem + - samtools/sort + - bam_dedup_stats_samtools_umicollapse + - bam_dedup_stats_samtools_umitools + - bam_sort_stats_samtools + +input: + - ch_genome_bam: + description: Channel with genome BAM files + structure: + - meta: + type: map + description: Metadata map + - bam: + type: file + description: BAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index file + pattern: "*.bai" + - ch_fasta: + description: Channel with genome FASTA file + structure: + - meta: + type: map + description: Metadata map + - fasta: + type: file + description: Genome FASTA file + pattern: "*.{fa,fasta}" + - umi_dedup_tool: + description: UMI deduplication tool to use + structure: + - value: + type: string + description: Either 'umicollapse' or 'umitools' + - umitools_dedup_stats: + description: Whether to generate UMI-tools deduplication stats + structure: + - value: + type: boolean + description: True or False + - bam_csi_index: + description: Whether to generate CSI index + structure: + - value: + type: boolean + description: True or False + - ch_transcriptome_bam: + description: Channel with transcriptome BAM files + structure: + - meta: + type: map + description: Metadata map + - bam: + type: file + description: BAM file + pattern: "*.bam" + - ch_transcript_fasta: + description: Channel with transcript FASTA file + structure: + - meta: + type: map + description: Metadata map + - fasta: + type: file + description: Transcript FASTA file + pattern: "*.{fa,fasta}" + +output: + - bam: + description: Channel containing deduplicated genome BAM files + structure: + - meta: + type: map + description: Metadata map + - bam: + type: file + description: Deduplicated BAM file + pattern: "*.bam" + - bai: + description: Channel containing indexed BAM (BAI) files + structure: + - meta: + type: map + description: Metadata map + - bai: + type: file + description: BAM index file + pattern: "*.bai" + - csi: + description: Channel containing CSI files (if bam_csi_index is true) + structure: + - meta: + type: map + description: Metadata map + - csi: + type: file + description: CSI index file + pattern: "*.csi" + - dedup_log: + description: Channel containing deduplication log files + structure: + - meta: + type: map + description: Metadata map + - log: + type: file + description: Deduplication log file + pattern: "*.log" + - stats: + description: Channel containing BAM statistics files + structure: + - meta: + type: map + description: Metadata map + - stats: + type: file + description: BAM statistics file + pattern: "*.stats" + - flagstat: + description: Channel containing flagstat files + structure: + - meta: + type: map + description: Metadata map + - flagstat: + type: file + description: Flagstat file + pattern: "*.flagstat" + - idxstats: + description: Channel containing idxstats files + structure: + - meta: + type: map + description: Metadata map + - idxstats: + type: file + description: Idxstats file + pattern: "*.idxstats" + - multiqc_files: + description: Channel containing files for MultiQC + structure: + - file: + type: file + description: File for MultiQC + - transcriptome_bam: + description: Channel containing deduplicated transcriptome BAM files + structure: + - meta: + type: map + description: Metadata map + - bam: + type: file + description: Deduplicated transcriptome BAM file + pattern: "*.bam" + - versions: + description: Channel containing software versions file + structure: + - versions: + type: file + description: File containing versions of the software used + pattern: "versions.yml" + +authors: + - "@pinin4fjords" +maintainers: + - "@pinin4fjords" diff --git a/subworkflows/nf-core/bam_dedup_umi/tests/main.nf.test b/subworkflows/nf-core/bam_dedup_umi/tests/main.nf.test new file mode 100644 index 000000000..b8e9da9ab --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_umi/tests/main.nf.test @@ -0,0 +1,99 @@ +nextflow_workflow { + + name "Test Workflow BAM_DEDUP_UMI" + script "../main.nf" + workflow "BAM_DEDUP_UMI" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_dedup_umi" + tag "bam_dedup_stats_samtools_umicollapse" + tag "bam_dedup_stats_samtools_umitools" + tag "bam_sort_stats_samtools" + tag "umitools/prepareforrsem" + tag "samtools/sort" + + test("sarscov2_bam_bai - umitools - with transcriptome bams") { + + when { + workflow { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = 'umitools' + input[3] = false + input[4] = false + input[5] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true) ] + input[6] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/transcriptome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + bam(workflow.out.bam[0][1]).getReadsMD5(), + bam(workflow.out.transcriptome_bam[0][1]).getReadsMD5(), + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match() }, + { assert path(workflow.out.bai.get(0).get(1)).exists() } + ) + } + + } + + test("sarscov2_bam_bai - umicollapse - no transcriptome bams") { + + when { + workflow { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = 'umicollapse' + input[3] = false + input[4] = false + input[5] = Channel.empty() + input[6] = Channel.empty() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + bam(workflow.out.bam[0][1]).getReadsMD5(), + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match() }, + { assert path(workflow.out.bai.get(0).get(1)).exists() } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_dedup_umi/tests/main.nf.test.snap b/subworkflows/nf-core/bam_dedup_umi/tests/main.nf.test.snap new file mode 100644 index 000000000..94151f04b --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_umi/tests/main.nf.test.snap @@ -0,0 +1,109 @@ +{ + "sarscov2_bam_bai - umicollapse - no transcriptome bams": { + "content": [ + "c1917631c47d16320d002b867e226a2e", + [ + [ + { + "id": "test" + }, + "test.umi_dedup.genome.sorted.bam.stats:md5,a03e635b85a846a2650c7b747926c7ed" + ] + ], + [ + [ + { + "id": "test" + }, + "test.umi_dedup.genome.sorted.bam.flagstat:md5,18d602435a02a4d721b78d1812622159" + ] + ], + [ + [ + { + "id": "test" + }, + "test.umi_dedup.genome.sorted.bam.idxstats:md5,85d20a901eef23ca50c323638a2eb602" + ] + ], + [ + "versions.yml:md5,3fe6727d1a90130a932400464233ab5d", + "versions.yml:md5,5d91176bd409dc54816e7e3a7773b5ef", + "versions.yml:md5,7eaf0c81f627dde8e2710a57e19b4d87", + "versions.yml:md5,d6bba04463b79564f053e7e5033025dd", + "versions.yml:md5,f90574f645fd8876ba52b89b2272afc1" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-12T13:04:25.48875" + }, + "sarscov2_bam_bai - umitools - with transcriptome bams": { + "content": [ + "c1917631c47d16320d002b867e226a2e", + "fe38ce657d4208a850702bdbfaa062ca", + [ + [ + { + "id": "test" + }, + "test.umi_dedup.genome.sorted.bam.stats:md5,a03e635b85a846a2650c7b747926c7ed" + ], + [ + { + "id": "test" + }, + "test.umi_dedup.transcriptome.sorted.bam.stats:md5,720f03daf649606a50c2cccbf135edc5" + ] + ], + [ + [ + { + "id": "test" + }, + "test.umi_dedup.genome.sorted.bam.flagstat:md5,18d602435a02a4d721b78d1812622159" + ], + [ + { + "id": "test" + }, + "test.umi_dedup.transcriptome.sorted.bam.flagstat:md5,18d602435a02a4d721b78d1812622159" + ] + ], + [ + [ + { + "id": "test" + }, + "test.umi_dedup.genome.sorted.bam.idxstats:md5,85d20a901eef23ca50c323638a2eb602" + ], + [ + { + "id": "test" + }, + "test.umi_dedup.transcriptome.sorted.bam.idxstats:md5,85d20a901eef23ca50c323638a2eb602" + ] + ], + [ + "versions.yml:md5,017dce06e6fadba312ce46808e2b19ff", + "versions.yml:md5,0ca2bd583dd1951c792b526f768db343", + "versions.yml:md5,1e761d924c3e0f35284799aa566e7c1a", + "versions.yml:md5,43e34c4518b8d546b632b372bc282ac4", + "versions.yml:md5,9d7724974d6dc288b485b5509c89297b", + "versions.yml:md5,a4562f4cc5d04041a79ba57761e231d9", + "versions.yml:md5,acfef82b716e50563fb22f47f33de00a", + "versions.yml:md5,afd68b9511aecba9cc5d6f7dd4c34bbd", + "versions.yml:md5,c2fda58ea2b120a510502b9db24d8a16", + "versions.yml:md5,d531cd7e744b2f95659bdb544f2cd8a5", + "versions.yml:md5,e22174a7d3003395e8e8c34ebfba1719" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-12T13:04:12.182087" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_dedup_umi/tests/nextflow.config b/subworkflows/nf-core/bam_dedup_umi/tests/nextflow.config new file mode 100644 index 000000000..cd10b5c78 --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_umi/tests/nextflow.config @@ -0,0 +1,38 @@ +process { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_GENOME:UMI(COLLAPSE|TOOLS_DEDUP)' { + ext.prefix = { "${meta.id}_umi_dedup.genome" } + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_TRANSCRIPTOME:UMI(COLLAPSE|TOOLS_DEDUP)' { + ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted" } + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_GENOME:UMI(COLLAPSE|TOOLS_DEDUP)' { + ext.prefix = { "${meta.id}.umi_dedup.genome.sorted" } + } + + withName: 'UMITOOLS_PREPAREFORRSEM' { + ext.prefix = { "${meta.id}_preparedforrsem" } + } + + withName: 'BAM_DEDUP_UMI:SAMTOOLS_SORT' { + ext.args = '-n' + ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } + } + + withName: 'BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.transcriptome.sorted" } + } + + withName: 'BAM_DEDUP_UMI:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.transcriptome.sorted.bam" } + } + + withName: 'BAM_DEDUP_UMI:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_TRANSCRIPTOME:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted.bam" } + } + + withName: 'BAM_DEDUP_UMI:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_GENOME:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.umi_dedup.genome.sorted.bam" } + } +} diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 63c3678de..508aeb5fb 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -17,8 +17,8 @@ include { MULTIQC_CUSTOM_BIOTYPE } from '../../modules/local/multiqc // include { ALIGN_STAR } from '../../subworkflows/local/align_star' include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' -include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_STAR } from '../../subworkflows/local/bam_dedup_umi' -include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_HISAT2 } from '../../subworkflows/local/bam_dedup_umi' +include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_STAR } from '../../subworkflows/nf-core/bam_dedup_umi' +include { BAM_DEDUP_UMI as BAM_DEDUP_UMI_HISAT2 } from '../../subworkflows/nf-core/bam_dedup_umi' include { checkSamplesAfterGrouping } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' include { multiqcTsvFromList } from '../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness' @@ -57,10 +57,6 @@ include { softwareVersionsToYAML } from '../../subworkflows/nf-core/ut include { FASTQ_ALIGN_HISAT2 } from '../../subworkflows/nf-core/fastq_align_hisat2' include { BAM_MARKDUPLICATES_PICARD } from '../../subworkflows/nf-core/bam_markduplicates_picard' include { BAM_RSEQC } from '../../subworkflows/nf-core/bam_rseqc' -include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' -include { BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE as BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse' -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools' include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_FORWARD } from '../../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig' include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE } from '../../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig' include { QUANTIFY_PSEUDO_ALIGNMENT as QUANTIFY_STAR_SALMON } from '../../subworkflows/nf-core/quantify_pseudo_alignment'