diff --git a/modules/local/amber/main.nf b/modules/local/amber/main.nf index a360e0da..368a461e 100644 --- a/modules/local/amber/main.nf +++ b/modules/local/amber/main.nf @@ -5,7 +5,7 @@ process AMBER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-amber:4.0--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-amber:4.0--hdfd78af_0' }" + 'biocontainers/hmftools-amber:4.0--hdfd78af_0' }" input: tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) @@ -52,6 +52,7 @@ process AMBER { """ mkdir -p amber/ touch amber/placeholder + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/bamtools/main.nf b/modules/local/bamtools/main.nf index 64fa90c9..5b397403 100644 --- a/modules/local/bamtools/main.nf +++ b/modules/local/bamtools/main.nf @@ -5,7 +5,7 @@ process BAMTOOLS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-bam-tools:1.2.1--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-bam-tools:1.2.1--hdfd78af_0' }" + 'biocontainers/hmftools-bam-tools:1.2.1--hdfd78af_0' }" input: tuple val(meta), path(bam), path(bai) @@ -26,6 +26,7 @@ process BAMTOOLS { bamtools \\ -Xmx${Math.round(task.memory.bytes * 0.95)} \\ com.hartwig.hmftools.bamtools.metrics.BamMetrics \\ + ${args} \\ -sample ${meta.sample_id} \\ -bam_file ${bam} \\ -ref_genome ${genome_fasta} \\ @@ -44,6 +45,7 @@ process BAMTOOLS { stub: """ touch ${meta.sample_id}.wgsmetrics + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/bwa-mem2/mem/main.nf b/modules/local/bwa-mem2/mem/main.nf index bbd4a6bc..9c44d086 100644 --- a/modules/local/bwa-mem2/mem/main.nf +++ b/modules/local/bwa-mem2/mem/main.nf @@ -5,7 +5,7 @@ process BWAMEM2_ALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:544519c4a0ff7e9616a3b44afde1f143c52f10c3-0' : - 'quay.io/biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:544519c4a0ff7e9616a3b44afde1f143c52f10c3-0' }" + 'biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:544519c4a0ff7e9616a3b44afde1f143c52f10c3-0' }" input: tuple val(meta), path(reads_fwd), path(reads_rev) diff --git a/modules/local/chord/main.nf b/modules/local/chord/main.nf index 2fabc9a4..bd835166 100644 --- a/modules/local/chord/main.nf +++ b/modules/local/chord/main.nf @@ -5,7 +5,7 @@ process CHORD { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-chord:2.03--r43hdfd78af_0' : - 'quay.io/biocontainers/r-chord:2.03--r43hdfd78af_0' }" + 'biocontainers/r-chord:2.03--r43hdfd78af_0' }" input: tuple val(meta), path(smlv_vcf), path(sv_vcf) @@ -19,8 +19,6 @@ process CHORD { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - """ #!/usr/bin/env Rscript library('CHORD') @@ -82,6 +80,7 @@ process CHORD { mkdir -p chord/ touch chord/${meta.sample_id}_chord_signatures.txt touch chord/${meta.sample_id}_chord_prediction.txt + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/cobalt/main.nf b/modules/local/cobalt/main.nf index 52a40eba..db549a26 100644 --- a/modules/local/cobalt/main.nf +++ b/modules/local/cobalt/main.nf @@ -5,7 +5,7 @@ process COBALT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-cobalt:1.16--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-cobalt:1.16--hdfd78af_0' }" + 'biocontainers/hmftools-cobalt:1.16--hdfd78af_0' }" input: tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) diff --git a/modules/local/cuppa/main.nf b/modules/local/cuppa/main.nf index 213f9819..f7ba8e08 100644 --- a/modules/local/cuppa/main.nf +++ b/modules/local/cuppa/main.nf @@ -5,7 +5,7 @@ process CUPPA { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-cuppa:1.8.1--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-cuppa:1.8.1--hdfd78af_0' }" + 'biocontainers/hmftools-cuppa:1.8.1--hdfd78af_0' }" input: tuple val(meta), path(isofox_dir), path(purple_dir), path(linx_dir), path(virusinterpreter_dir) @@ -46,6 +46,7 @@ process CUPPA { cuppa \\ -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ -sample ${meta.sample_id} \\ -sample_data_dir sample_data/ \\ -categories ${classifier} \\ @@ -76,6 +77,7 @@ process CUPPA { touch cuppa/${meta.sample_id}.cup.report.summary.png touch cuppa/${meta.sample_id}.cup.report.features.png touch cuppa/${meta.sample_id}.cuppa.chart.png + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/custom/lilac_extract_and_index_contig/main.nf b/modules/local/custom/lilac_extract_and_index_contig/main.nf index 4dd4ae64..16aaa86c 100644 --- a/modules/local/custom/lilac_extract_and_index_contig/main.nf +++ b/modules/local/custom/lilac_extract_and_index_contig/main.nf @@ -5,7 +5,7 @@ process CUSTOM_EXTRACTCONTIG { conda "bwa-mem2=2.2.1 samtools=1.19.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:544519c4a0ff7e9616a3b44afde1f143c52f10c3-0' : - 'quay.io/biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:544519c4a0ff7e9616a3b44afde1f143c52f10c3-0' }" + 'biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:544519c4a0ff7e9616a3b44afde1f143c52f10c3-0' }" input: val contig_name diff --git a/modules/local/custom/lilac_realign_reads_lilac/main.nf b/modules/local/custom/lilac_realign_reads_lilac/main.nf index c9e02f0f..8337ab0e 100644 --- a/modules/local/custom/lilac_realign_reads_lilac/main.nf +++ b/modules/local/custom/lilac_realign_reads_lilac/main.nf @@ -5,7 +5,7 @@ process CUSTOM_REALIGNREADS { conda "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.19.2 bioconda::sambamba=1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:544519c4a0ff7e9616a3b44afde1f143c52f10c3-0' : - 'quay.io/biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:544519c4a0ff7e9616a3b44afde1f143c52f10c3-0' }" + 'biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:544519c4a0ff7e9616a3b44afde1f143c52f10c3-0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/local/custom/lilac_slice/main.nf b/modules/local/custom/lilac_slice/main.nf index b8013974..9a6e093a 100644 --- a/modules/local/custom/lilac_slice/main.nf +++ b/modules/local/custom/lilac_slice/main.nf @@ -5,7 +5,7 @@ process CUSTOM_SLICE { conda "samtools=1.19.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'quay.io/biocontainers/samtools:1.19.2--h50ea8bc_0' }" + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/local/fastp/environment.yml b/modules/local/fastp/environment.yml new file mode 100644 index 00000000..70389e66 --- /dev/null +++ b/modules/local/fastp/environment.yml @@ -0,0 +1,7 @@ +name: fastp +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/local/fastp/main.nf b/modules/local/fastp/main.nf index 8bd47159..e12cf1c2 100644 --- a/modules/local/fastp/main.nf +++ b/modules/local/fastp/main.nf @@ -1,13 +1,15 @@ process FASTP { tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--hadf994f_2' : - 'quay.io/biocontainers/fastp:0.23.4--hadf994f_2' }" + 'biocontainers/fastp:0.23.4--hadf994f_2' }" input: tuple val(meta), path(reads_fwd), path(reads_rev) - val(max_fastq_records) + val max_fastq_records output: tuple val(meta), path('*_R1.fastp.fastq.gz'), path('*_R2.fastp.fastq.gz'), emit: fastq @@ -17,12 +19,11 @@ process FASTP { task.ext.when == null || task.ext.when script: - """ - # * do not apply trimming/clipping, already done in BCL convert - # * turn off all filtering - # * do not process umis, already done for us + def args = task.ext.args ?: '' + """ fastp \\ + ${args} \\ --in1 ${reads_fwd} \\ --in2 ${reads_rev} \\ --disable_quality_filtering \\ diff --git a/modules/local/gridss/index/main.nf b/modules/local/gridss/index/main.nf index 340f4b4b..4fd7c346 100644 --- a/modules/local/gridss/index/main.nf +++ b/modules/local/gridss/index/main.nf @@ -6,7 +6,7 @@ process GRIDSS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h50ea8bc_3' : - 'quay.io/biocontainers/gridss:2.13.2--h50ea8bc_3' }" + 'biocontainers/gridss:2.13.2--h50ea8bc_3' }" input: path genome_fasta diff --git a/modules/local/gripss/germline/main.nf b/modules/local/gripss/germline/main.nf index ef2e5242..8f28b7a5 100644 --- a/modules/local/gripss/germline/main.nf +++ b/modules/local/gripss/germline/main.nf @@ -5,7 +5,7 @@ process GRIPSS_GERMLINE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-gripss:2.4--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-gripss:2.4--hdfd78af_0' }" + 'biocontainers/hmftools-gripss:2.4--hdfd78af_0' }" input: tuple val(meta), path(gridss_vcf) @@ -57,6 +57,7 @@ process GRIPSS_GERMLINE { touch ${meta.normal_id}.gripss.filtered.germline.vcf.gz.tbi touch ${meta.normal_id}.gripss.germline.vcf.gz touch ${meta.normal_id}.gripss.germline.vcf.gz.tbi + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/gripss/somatic/main.nf b/modules/local/gripss/somatic/main.nf index 14f08944..24815d4d 100644 --- a/modules/local/gripss/somatic/main.nf +++ b/modules/local/gripss/somatic/main.nf @@ -5,7 +5,7 @@ process GRIPSS_SOMATIC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-gripss:2.4--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-gripss:2.4--hdfd78af_0' }" + 'biocontainers/hmftools-gripss:2.4--hdfd78af_0' }" input: tuple val(meta), path(gridss_vcf) @@ -62,6 +62,7 @@ process GRIPSS_SOMATIC { touch ${meta.tumor_id}.gripss.filtered.somatic.vcf.gz.tbi touch ${meta.tumor_id}.gripss.somatic.vcf.gz touch ${meta.tumor_id}.gripss.somatic.vcf.gz.tbi + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/isofox/main.nf b/modules/local/isofox/main.nf index cd215d70..c987a988 100644 --- a/modules/local/isofox/main.nf +++ b/modules/local/isofox/main.nf @@ -5,7 +5,7 @@ process ISOFOX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_0': - 'quay.io/biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' }" + 'biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' }" input: tuple val(meta), path(bam), path(bai) @@ -68,6 +68,7 @@ process ISOFOX { """ mkdir -p isofox/ touch isofox/placeholder + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/lilac/main.nf b/modules/local/lilac/main.nf index 91936f07..33a9c1c6 100644 --- a/modules/local/lilac/main.nf +++ b/modules/local/lilac/main.nf @@ -5,7 +5,7 @@ process LILAC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-lilac:1.6--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-lilac:1.6--hdfd78af_0' }" + 'biocontainers/hmftools-lilac:1.6--hdfd78af_0' }" input: tuple val(meta), path(normal_dna_bam), path(normal_dna_bai), path(tumor_dna_bam), path(tumor_dna_bai), path(tumor_rna_bam), path(tumor_rna_bai), path(purple_dir) @@ -55,6 +55,7 @@ process LILAC { """ mkdir -p lilac/ touch lilac/placeholder + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/linx/germline/main.nf b/modules/local/linx/germline/main.nf index 14efc84c..515217ee 100644 --- a/modules/local/linx/germline/main.nf +++ b/modules/local/linx/germline/main.nf @@ -5,7 +5,7 @@ process LINX_GERMLINE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-linx:1.25--hdfd78af_0': - 'quay.io/biocontainers/hmftools-linx:1.25--hdfd78af_0' }" + 'biocontainers/hmftools-linx:1.25--hdfd78af_0' }" input: tuple val(meta), path(sv_vcf) @@ -45,6 +45,7 @@ process LINX_GERMLINE { """ mkdir linx_germline/ touch linx_germline/placeholder + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/linx/somatic/main.nf b/modules/local/linx/somatic/main.nf index a826e94d..9a33033d 100644 --- a/modules/local/linx/somatic/main.nf +++ b/modules/local/linx/somatic/main.nf @@ -5,7 +5,7 @@ process LINX_SOMATIC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-linx:1.25--hdfd78af_0': - 'quay.io/biocontainers/hmftools-linx:1.25--hdfd78af_0' }" + 'biocontainers/hmftools-linx:1.25--hdfd78af_0' }" input: tuple val(meta), path(purple_dir) @@ -48,6 +48,7 @@ process LINX_SOMATIC { """ mkdir linx_somatic/ touch linx_somatic/placeholder + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/linx/visualiser/main.nf b/modules/local/linx/visualiser/main.nf index 5ff176c5..71b7628d 100644 --- a/modules/local/linx/visualiser/main.nf +++ b/modules/local/linx/visualiser/main.nf @@ -5,7 +5,7 @@ process LINX_VISUALISER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-linx:1.25--hdfd78af_0': - 'quay.io/biocontainers/hmftools-linx:1.25--hdfd78af_0' }" + 'biocontainers/hmftools-linx:1.25--hdfd78af_0' }" input: tuple val(meta), path(linx_annotation_dir) @@ -93,6 +93,7 @@ process LINX_VISUALISER { """ mkdir -p plots/{all,reportable}/ touch plots/{all,reportable}/placeholder + echo -e '${task.process}:\n stub: noversions\n' > versions.yml """ } diff --git a/modules/local/linxreport/main.nf b/modules/local/linxreport/main.nf index da526c1d..f3d3a55e 100644 --- a/modules/local/linxreport/main.nf +++ b/modules/local/linxreport/main.nf @@ -5,7 +5,7 @@ process LINXREPORT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-linxreport:1.0.0--r43hdfd78af_0' : - 'quay.io/biocontainers/r-linxreport:1.0.0--r43hdfd78af_0' }" + 'biocontainers/r-linxreport:1.0.0--r43hdfd78af_0' }" input: tuple val(meta), path(linx_annotation_dir), path(linx_visualiser_dir) @@ -45,6 +45,7 @@ process LINXREPORT { stub: """ touch ${meta.sample_id}_linx.html + echo -e '${task.process}:\n stub: noversions\n' > versions.yml """ } diff --git a/modules/local/markdups/environment.yml b/modules/local/markdups/environment.yml new file mode 100644 index 00000000..6557fc27 --- /dev/null +++ b/modules/local/markdups/environment.yml @@ -0,0 +1,7 @@ +name: markdups +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-mark-dups=1.1.5 diff --git a/modules/local/markdups/main.nf b/modules/local/markdups/main.nf index d4d0f443..c5dacc50 100644 --- a/modules/local/markdups/main.nf +++ b/modules/local/markdups/main.nf @@ -4,7 +4,7 @@ process MARKDUPS { container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-mark-dups:1.1.5--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-mark-dups:1.1.5--hdfd78af_0' }" + 'biocontainers/hmftools-mark-dups:1.1.5--hdfd78af_0' }" input: tuple val(meta), path(bams), path(bais) @@ -24,10 +24,12 @@ process MARKDUPS { task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' def umi_flags = has_umis ? '-umi_enabled -umi_duplex -umi_duplex_delim +' : '' """ markdups \\ + ${args} \\ -Xmx${Math.round(task.memory.bytes * 0.95)} \\ \\ -samtools \$(which samtools) \\ diff --git a/modules/local/orange/main.nf b/modules/local/orange/main.nf index ed66236f..bff0d884 100644 --- a/modules/local/orange/main.nf +++ b/modules/local/orange/main.nf @@ -5,7 +5,7 @@ process ORANGE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-orange:2.7.1--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-orange:2.7.1--hdfd78af_0' }" + 'biocontainers/hmftools-orange:2.7.1--hdfd78af_0' }" input: tuple val(meta), path(bam_metrics_somatic), path(bam_metrics_germline), path(flagstat_somatic), path(flagstat_germline), path(sage_somatic_dir), path(sage_germline_dir), path(smlv_somatic_vcf), path(smlv_germline_vcf), path(purple_dir), path(linx_somatic_anno_dir), path(linx_somatic_plot_dir), path(linx_germline_anno_dir), path(virusinterpreter_dir), path(chord_dir), path(sigs_dir), path(lilac_dir), path(cuppa_dir), path(isofox_dir) @@ -105,6 +105,7 @@ process ORANGE { --add-opens java.base/java.time=ALL-UNNAMED \\ -Xmx${Math.round(task.memory.bytes * 0.95)} \\ -jar \${orange_jar} \\ + ${args} \\ \\ -experiment_date \$(date +%y%m%d) \\ -add_disclaimer \\ @@ -156,6 +157,7 @@ process ORANGE { mkdir -p output/ touch output/${meta.tumor_id}.orange.json touch output/${meta.tumor_id}.orange.pdf + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/pave/germline/main.nf b/modules/local/pave/germline/main.nf index 1d85ada9..adc54589 100644 --- a/modules/local/pave/germline/main.nf +++ b/modules/local/pave/germline/main.nf @@ -9,7 +9,7 @@ process PAVE_GERMLINE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.6--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-pave:1.6--hdfd78af_0' }" + 'biocontainers/hmftools-pave:1.6--hdfd78af_0' }" input: tuple val(meta), path(sage_vcf), path(sage_tbi) @@ -73,6 +73,7 @@ process PAVE_GERMLINE { stub: """ touch ${meta.sample_id}.sage.pave_germline.vcf.gz{,.tbi} + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/pave/somatic/main.nf b/modules/local/pave/somatic/main.nf index a2e782b9..ccdeea62 100644 --- a/modules/local/pave/somatic/main.nf +++ b/modules/local/pave/somatic/main.nf @@ -5,7 +5,7 @@ process PAVE_SOMATIC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.6--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-pave:1.6--hdfd78af_0' }" + 'biocontainers/hmftools-pave:1.6--hdfd78af_0' }" input: tuple val(meta), path(sage_vcf), path(sage_tbi) @@ -32,7 +32,6 @@ process PAVE_SOMATIC { script: def args = task.ext.args ?: '' - def pon_filters def gnomad_args if (genome_ver.toString() == '37') { @@ -54,6 +53,7 @@ process PAVE_SOMATIC { """ pave \\ + ${args} \\ -Xmx${Math.round(task.memory.bytes * 0.95)} \\ -sample ${meta.sample_id} \\ -vcf_file ${sage_vcf} \\ @@ -82,6 +82,7 @@ process PAVE_SOMATIC { stub: """ touch ${meta.sample_id}.sage.pave_somatic.vcf.gz{,.tbi} + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/purple/main.nf b/modules/local/purple/main.nf index 73fcdeba..b571bd07 100644 --- a/modules/local/purple/main.nf +++ b/modules/local/purple/main.nf @@ -5,7 +5,7 @@ process PURPLE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-purple:4.0.2--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-purple:4.0.2--hdfd78af_0' }" + 'biocontainers/hmftools-purple:4.0.2--hdfd78af_0' }" input: tuple val(meta), path(amber), path(cobalt), path(sv_tumor_vcf), path(sv_tumor_tbi), path(sv_tumor_unfiltered_vcf), path(sv_tumor_unfiltered_tbi), path(sv_normal_vcf), path(sv_normal_tbi), path(smlv_tumor_vcf), path(smlv_normal_vcf) @@ -98,6 +98,7 @@ process PURPLE { touch purple/${meta.tumor_id}.purple.somatic.vcf.gz touch purple/${meta.tumor_id}.purple.sv.germline.vcf.gz touch purple/${meta.tumor_id}.purple.sv.vcf.gz + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/sage/append/main.nf b/modules/local/sage/append/main.nf index 2540a13f..e0fadee0 100644 --- a/modules/local/sage/append/main.nf +++ b/modules/local/sage/append/main.nf @@ -5,7 +5,7 @@ process SAGE_APPEND { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.4--hdfd78af_1' : - 'quay.io/biocontainers/hmftools-sage:3.4--hdfd78af_1' }" + 'biocontainers/hmftools-sage:3.4--hdfd78af_1' }" input: tuple val(meta), path(vcf), path(bam), path(bai) @@ -46,6 +46,7 @@ process SAGE_APPEND { stub: """ touch "${meta.dna_id}.sage.append.vcf.gz" + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/sage/germline/main.nf b/modules/local/sage/germline/main.nf index 7bb46f31..847719d6 100644 --- a/modules/local/sage/germline/main.nf +++ b/modules/local/sage/germline/main.nf @@ -5,7 +5,7 @@ process SAGE_GERMLINE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.4--hdfd78af_1' : - 'quay.io/biocontainers/hmftools-sage:3.4--hdfd78af_1' }" + 'biocontainers/hmftools-sage:3.4--hdfd78af_1' }" input: tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) @@ -76,6 +76,7 @@ process SAGE_GERMLINE { touch germline/${meta.normal_id}.sage.bqr.png touch germline/${meta.normal_id}.sage.bqr.tsv touch germline/${meta.normal_id}.gene.coverage.tsv + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/sage/somatic/main.nf b/modules/local/sage/somatic/main.nf index 3b11a984..12028be6 100644 --- a/modules/local/sage/somatic/main.nf +++ b/modules/local/sage/somatic/main.nf @@ -7,7 +7,7 @@ process SAGE_SOMATIC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-sage:3.4--hdfd78af_1' : - 'quay.io/biocontainers/hmftools-sage:3.4--hdfd78af_1' }" + 'biocontainers/hmftools-sage:3.4--hdfd78af_1' }" input: tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) @@ -31,7 +31,6 @@ process SAGE_SOMATIC { script: def args = task.ext.args ?: '' - def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : '' def reference_bam_arg = normal_bam ? "-reference_bam ${normal_bam}" : '' @@ -73,6 +72,7 @@ process SAGE_SOMATIC { touch somatic/${meta.tumor_id}.sage.bqr.tsv touch somatic/${meta.normal_id}.sage.bqr.png touch somatic/${meta.normal_id}.sage.bqr.tsv + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/sambamba/merge/environment.yml b/modules/local/sambamba/merge/environment.yml new file mode 100644 index 00000000..e98f53f9 --- /dev/null +++ b/modules/local/sambamba/merge/environment.yml @@ -0,0 +1,7 @@ +name: sambamba_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sambamba=1.0 diff --git a/modules/local/sambamba/merge/main.nf b/modules/local/sambamba/merge/main.nf index 1bbb9646..08e8c2af 100644 --- a/modules/local/sambamba/merge/main.nf +++ b/modules/local/sambamba/merge/main.nf @@ -1,9 +1,10 @@ process SAMBAMBA_MERGE { tag "${meta.id}" + label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/sambamba:1.0--h98b6b92_0' : - 'quay.io/biocontainers/sambamba:1.0--h98b6b92_0' }" + 'biocontainers/sambamba:1.0--h98b6b92_0' }" input: tuple val(meta), path(bams) @@ -12,9 +13,15 @@ process SAMBAMBA_MERGE { tuple val(meta), path('*bam'), emit: bam path 'versions.yml' , emit: versions + when: + task.ext.when == null || task.ext.when + script: + def args = task.ext.args ?: '' + """ sambamba merge \\ + ${args} \\ --nthreads ${task.cpus} \\ ${meta.sample_id}.bam \\ ${bams} @@ -28,6 +35,7 @@ process SAMBAMBA_MERGE { stub: """ touch ${meta.sample_id}.bam + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/sigs/main.nf b/modules/local/sigs/main.nf index 56a011e9..929bfda3 100644 --- a/modules/local/sigs/main.nf +++ b/modules/local/sigs/main.nf @@ -5,7 +5,7 @@ process SIGS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-sigs:1.2.1--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-sigs:1.2.1--hdfd78af_0' }" + 'biocontainers/hmftools-sigs:1.2.1--hdfd78af_0' }" input: tuple val(meta), path(smlv_vcf) @@ -26,6 +26,7 @@ process SIGS { sigs \\ -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ -sample ${meta.sample_id} \\ -somatic_vcf_file ${smlv_vcf} \\ -signatures_file ${signatures} \\ @@ -41,6 +42,7 @@ process SIGS { """ mkdir -p sigs/ touch sigs/placeholder + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/star/align/environment.yml b/modules/local/star/align/environment.yml new file mode 100644 index 00000000..e694b27f --- /dev/null +++ b/modules/local/star/align/environment.yml @@ -0,0 +1,7 @@ +name: star_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::star=2.7.3a diff --git a/modules/local/star/main.nf b/modules/local/star/align/main.nf similarity index 93% rename from modules/local/star/main.nf rename to modules/local/star/align/main.nf index 7aa4503f..6694a23e 100644 --- a/modules/local/star/main.nf +++ b/modules/local/star/align/main.nf @@ -1,10 +1,10 @@ -process STAR { +process STAR_ALIGN { tag "${meta.id}" label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/star:2.7.3a--0' : - 'quay.io/biocontainers/star:2.7.3a--0' }" + 'biocontainers/star:2.7.3a--0' }" input: tuple val(meta), path(reads_fwd), path(reads_rev) @@ -18,8 +18,11 @@ process STAR { task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' + """ STAR \\ + ${args} \\ --readFilesIn ${reads_fwd} ${reads_rev} \\ --genomeDir ${genome_star_index} \\ --runThreadN ${task.cpus} \\ @@ -57,6 +60,7 @@ process STAR { stub: """ touch Aligned.out.bam + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/star/meta.yml b/modules/local/star/align/meta.yml similarity index 98% rename from modules/local/star/meta.yml rename to modules/local/star/align/meta.yml index 0bbc3329..19bb83c5 100644 --- a/modules/local/star/meta.yml +++ b/modules/local/star/align/meta.yml @@ -1,4 +1,4 @@ -name: star +name: star_align description: An ultrafast universal RNA-seq aligner keywords: - rna-seq diff --git a/modules/local/svprep/assemble/main.nf b/modules/local/svprep/assemble/main.nf index 532909d2..075d82f5 100644 --- a/modules/local/svprep/assemble/main.nf +++ b/modules/local/svprep/assemble/main.nf @@ -5,7 +5,7 @@ process GRIDSS_ASSEMBLE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.3--hdfd78af_1' : - 'quay.io/biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" + 'biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" input: tuple val(meta), path(bams), path(bams_filtered), path(preprocess_dirs), val(labels) @@ -97,6 +97,7 @@ process GRIDSS_ASSEMBLE { """ mkdir -p gridss_assemble/ touch gridss_assemble/placeholder + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/svprep/call/main.nf b/modules/local/svprep/call/main.nf index 56dd5833..2ed37f75 100644 --- a/modules/local/svprep/call/main.nf +++ b/modules/local/svprep/call/main.nf @@ -5,7 +5,7 @@ process GRIDSS_CALL { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.3--hdfd78af_1' : - 'quay.io/biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" + 'biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" input: tuple val(meta), path(bams), path(bams_filtered), path(assemble_dir), val(labels) @@ -101,6 +101,7 @@ process GRIDSS_CALL { #CHROM POS ID REF ALT QUAL FILTER INFO . . . . . . . EOF + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/svprep/depth_annotator/main.nf b/modules/local/svprep/depth_annotator/main.nf index 02858a86..5f4d43c1 100644 --- a/modules/local/svprep/depth_annotator/main.nf +++ b/modules/local/svprep/depth_annotator/main.nf @@ -5,7 +5,7 @@ process SVPREP_DEPTH_ANNOTATOR { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.3--hdfd78af_1' : - 'quay.io/biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" + 'biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" input: tuple val(meta), path(bams), path(bais), path(vcf), val(labels) @@ -51,6 +51,7 @@ process SVPREP_DEPTH_ANNOTATOR { """ touch ${meta.tumor_id}.gridss.vcf.gz touch ${meta.tumor_id}.gridss.vcf.gz.tbi + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/svprep/preprocess/main.nf b/modules/local/svprep/preprocess/main.nf index 71a8ddef..fde547ba 100644 --- a/modules/local/svprep/preprocess/main.nf +++ b/modules/local/svprep/preprocess/main.nf @@ -5,7 +5,7 @@ process GRIDSS_PREPROCESS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.3--hdfd78af_1' : - 'quay.io/biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" + 'biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" input: tuple val(meta), path(bam), path(bam_filtered) @@ -54,6 +54,7 @@ process GRIDSS_PREPROCESS { """ mkdir -p gridss_preprocess/${meta.sample_id}.sv_prep.sorted.bam.gridss.working/ touch gridss_preprocess/${meta.sample_id}.sv_prep.sorted.bam.gridss.working/placeholder + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/svprep/svprep/main.nf b/modules/local/svprep/svprep/main.nf index b7a15ce8..c0fabc23 100644 --- a/modules/local/svprep/svprep/main.nf +++ b/modules/local/svprep/svprep/main.nf @@ -5,7 +5,7 @@ process SVPREP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-sv-prep:1.2.3--hdfd78af_1' : - 'quay.io/biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" + 'biocontainers/hmftools-sv-prep:1.2.3--hdfd78af_1' }" input: tuple val(meta), path(bam), path(bai), path(junctions) @@ -59,6 +59,7 @@ process SVPREP { """ touch "${meta.sample_id}.sv_prep.sorted.bam" touch "${meta.sample_id}.sv_prep.junctions.tsv" + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/virusbreakend/main.nf b/modules/local/virusbreakend/main.nf index de689215..d7056d6f 100644 --- a/modules/local/virusbreakend/main.nf +++ b/modules/local/virusbreakend/main.nf @@ -32,6 +32,7 @@ process VIRUSBREAKEND { ln -s \$(find -L ${genome_gridss_index} -type f) ./ virusbreakend \\ + ${args} \\ --gridssargs "--jvmheap ${Math.round(task.memory.bytes * 0.95)}" \\ --threads ${task.cpus} \\ --db ${virusbreakenddb.toString().replaceAll("/\$", "")}/ \\ @@ -48,6 +49,7 @@ process VIRUSBREAKEND { stub: """ touch ${meta.sample_id}.virusbreakend.vcf ${meta.sample_id}.summary.tsv + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/virusinterpreter/main.nf b/modules/local/virusinterpreter/main.nf index 0d788545..fac520e8 100644 --- a/modules/local/virusinterpreter/main.nf +++ b/modules/local/virusinterpreter/main.nf @@ -5,7 +5,7 @@ process VIRUSINTERPRETER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmftools-virus-interpreter:1.3--hdfd78af_0' : - 'quay.io/biocontainers/hmftools-virus-interpreter:1.3--hdfd78af_0' }" + 'biocontainers/hmftools-virus-interpreter:1.3--hdfd78af_0' }" input: tuple val(meta), path(virus_tsv), path(purple_dir), path(wgs_metrics) @@ -27,6 +27,7 @@ process VIRUSINTERPRETER { virusinterpreter \\ -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ -sample ${meta.sample_id} \\ -purple_dir ${purple_dir} \\ -tumor_sample_wgs_metrics_file ${wgs_metrics} \\ @@ -45,6 +46,7 @@ process VIRUSINTERPRETER { """ mkdir -p virusinterpreter/ touch virusinterpreter/${meta.sample_id}.virus.annotated.tsv + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf index 356cac0f..7e0cffa7 100644 --- a/modules/nf-core/gatk4/markduplicates/main.nf +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -5,7 +5,7 @@ process GATK4_MARKDUPLICATES { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..ca39fb67 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/samtools/dict/main.nf b/modules/nf-core/samtools/dict/main.nf index 4a1522e9..fb019510 100644 --- a/modules/nf-core/samtools/dict/main.nf +++ b/modules/nf-core/samtools/dict/main.nf @@ -5,7 +5,7 @@ process SAMTOOLS_DICT { conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'biocontainers/samtools:1.16.1--h6899075_1' }" input: path fasta diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 20c0e67e..75bfdb96 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -5,7 +5,7 @@ process SAMTOOLS_FAIDX { conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'biocontainers/samtools:1.16.1--h6899075_1' }" input: path fasta diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index 74fbaa5d..610178dd 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -5,7 +5,7 @@ process SAMTOOLS_FLAGSTAT { conda "bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'biocontainers/samtools:1.16.1--h6899075_1' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 8aaf9a5b..18a3e1dc 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -5,7 +5,7 @@ process SAMTOOLS_SORT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'quay.io/biocontainers/samtools:1.18--h50ea8bc_1' }" + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(bam) diff --git a/subworkflows/local/amber_profiling/main.nf b/subworkflows/local/amber_profiling/main.nf index a4cb59f0..179888bf 100644 --- a/subworkflows/local/amber_profiling/main.nf +++ b/subworkflows/local/amber_profiling/main.nf @@ -9,82 +9,82 @@ include { AMBER } from '../../../modules/local/amber/main' workflow AMBER_PROFILING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] - ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] - // Reference data - genome_version // channel: [mandatory] genome version - heterozygous_sites // channel: [optional] /path/to/heterozygous_sites - target_region_bed // channel: [optional] /path/to/target_region_bed + // Reference data + genome_version // channel: [mandatory] genome version + heterozygous_sites // channel: [optional] /path/to/heterozygous_sites + target_region_bed // channel: [optional] /path/to/target_region_bed main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() - // Select input sources and sort - // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai] - // channel: skip: [ meta ] - ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( - ch_tumor_bam, - ch_normal_bam, - ) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - return [ - meta, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), - tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), - Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), - normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), - ] - } - .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.AMBER_DIR) - runnable: tumor_bam && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_amber, tumor_bam, normal_bam, tumor_bai, normal_bai ] - ch_amber_inputs = ch_inputs_sorted.runnable - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + // Select input sources and sort + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.AMBER_DIR) + runnable: tumor_bam && !has_existing + skip: true + return meta + } - def meta_amber = [ - key: meta.group_id, - id: meta.group_id, - tumor_id: Utils.getTumorDnaSampleName(meta), - ] + // Create process input channel + // channel: [ meta_amber, tumor_bam, normal_bam, tumor_bai, normal_bai ] + ch_amber_inputs = ch_inputs_sorted.runnable + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - if (normal_bam) { - meta_amber.normal_id = Utils.getNormalDnaSampleName(meta) - } + def meta_amber = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] - [meta_amber, tumor_bam, normal_bam, tumor_bai, normal_bai] + if (normal_bam) { + meta_amber.normal_id = Utils.getNormalDnaSampleName(meta) } - // Run process - AMBER( - ch_amber_inputs, - genome_version, - heterozygous_sites, - target_region_bed, - ) + [meta_amber, tumor_bam, normal_bam, tumor_bai, normal_bai] + } + + // Run process + AMBER( + ch_amber_inputs, + genome_version, + heterozygous_sites, + target_region_bed, + ) - ch_versions = ch_versions.mix(AMBER.out.versions) + ch_versions = ch_versions.mix(AMBER.out.versions) - // Set outputs, restoring original meta - // channel: [ meta, amber_dir ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(AMBER.out.amber_dir, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) + // Set outputs, restoring original meta + // channel: [ meta, amber_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(AMBER.out.amber_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) emit: - amber_dir = ch_outputs // channel: [ meta, amber_dir ] + amber_dir = ch_outputs // channel: [ meta, amber_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/bamtools_metrics/main.nf b/subworkflows/local/bamtools_metrics/main.nf index e71ca2bb..6e16c412 100644 --- a/subworkflows/local/bamtools_metrics/main.nf +++ b/subworkflows/local/bamtools_metrics/main.nf @@ -9,109 +9,109 @@ include { BAMTOOLS } from '../../../modules/local/bamtools/main' workflow BAMTOOLS_METRICS { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] - ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Sort inputs, separate by tumor and normal - // channel: runnable: [ meta, bam, bai ] - // channel: skip: [ meta ] - ch_inputs_tumor_sorted = ch_tumor_bam - .map { meta, bam, bai -> - return [ - meta, - Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), - bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), - ] - } - .branch { meta, bam, bai -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAMTOOLS_TUMOR) - runnable: bam && !has_existing - skip: true - return meta - } - - // channel: runnable: [ meta, bam, bai ] - // channel: skip: [ meta ] - ch_inputs_normal_sorted = ch_normal_bam - .map { meta, bam, bai -> - return [ - meta, - Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), - bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), - ] - } - .branch { meta, bam, bai -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAMTOOLS_NORMAL) - runnable: bam && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_bamtools, bam, bai ] - ch_bamtools_inputs = Channel.empty() - .mix( - ch_inputs_tumor_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getTumorDnaSample(meta), 'tumor', bam, bai] }, - ch_inputs_normal_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getNormalDnaSample(meta), 'normal', bam, bai] }, - ) - .map { meta, meta_sample, sample_type, bam, bai -> - - def meta_bamtools = [ - key: meta.group_id, - id: "${meta.group_id}_${meta_sample.sample_id}", - sample_id: meta_sample.sample_id, - sample_type: sample_type, - ] - - return [meta_bamtools, bam, bai] - } - - // Run process - BAMTOOLS( - ch_bamtools_inputs, - genome_fasta, - genome_version, + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs, separate by tumor and normal + // channel: runnable: [ meta, bam, bai ] + // channel: skip: [ meta ] + ch_inputs_tumor_sorted = ch_tumor_bam + .map { meta, bam, bai -> + return [ + meta, + Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + ] + } + .branch { meta, bam, bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAMTOOLS_TUMOR) + runnable: bam && !has_existing + skip: true + return meta + } + + // channel: runnable: [ meta, bam, bai ] + // channel: skip: [ meta ] + ch_inputs_normal_sorted = ch_normal_bam + .map { meta, bam, bai -> + return [ + meta, + Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, bam, bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAMTOOLS_NORMAL) + runnable: bam && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_bamtools, bam, bai ] + ch_bamtools_inputs = Channel.empty() + .mix( + ch_inputs_tumor_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getTumorDnaSample(meta), 'tumor', bam, bai] }, + ch_inputs_normal_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getNormalDnaSample(meta), 'normal', bam, bai] }, + ) + .map { meta, meta_sample, sample_type, bam, bai -> + + def meta_bamtools = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + sample_type: sample_type, + ] + + return [meta_bamtools, bam, bai] + } + + // Run process + BAMTOOLS( + ch_bamtools_inputs, + genome_fasta, + genome_version, + ) + + ch_versions = ch_versions.mix(BAMTOOLS.out.versions) + + // Sort into a tumor and normal channel + ch_bamtools_out = BAMTOOLS.out.metrics + .branch { meta_bamtools, metrics -> + assert ['tumor', 'normal'].contains(meta_bamtools.sample_type) + tumor: meta_bamtools.sample_type == 'tumor' + normal: meta_bamtools.sample_type == 'normal' + placeholder: true + } + + // Set outputs, restoring original meta + // channel: [ meta, metrics ] + ch_somatic_metrics = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_bamtools_out.tumor, ch_inputs), + ch_inputs_tumor_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(BAMTOOLS.out.versions) - - // Sort into a tumor and normal channel - ch_bamtools_out = BAMTOOLS.out.metrics - .branch { meta_bamtools, metrics -> - assert ['tumor', 'normal'].contains(meta_bamtools.sample_type) - tumor: meta_bamtools.sample_type == 'tumor' - normal: meta_bamtools.sample_type == 'normal' - placeholder: true - } - - // Set outputs, restoring original meta - // channel: [ meta, metrics ] - ch_somatic_metrics = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ch_bamtools_out.tumor, ch_inputs), - ch_inputs_tumor_sorted.skip.map { meta -> [meta, []] }, - ) - - ch_germline_metrics = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ch_bamtools_out.normal, ch_inputs), - ch_inputs_normal_sorted.skip.map { meta -> [meta, []] }, - ) + ch_germline_metrics = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_bamtools_out.normal, ch_inputs), + ch_inputs_normal_sorted.skip.map { meta -> [meta, []] }, + ) emit: - somatic = ch_somatic_metrics // channel: [ meta, metrics ] - germline = ch_germline_metrics // channel: [ meta, metrics ] + somatic = ch_somatic_metrics // channel: [ meta, metrics ] + germline = ch_germline_metrics // channel: [ meta, metrics ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/chord_prediction/main.nf b/subworkflows/local/chord_prediction/main.nf index e482e5d2..a26d068f 100644 --- a/subworkflows/local/chord_prediction/main.nf +++ b/subworkflows/local/chord_prediction/main.nf @@ -9,87 +9,87 @@ include { CHORD } from '../../../modules/local/chord/main' workflow CHORD_PREDICTION { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_purple // channel: [mandatory] [ meta, purple_dir ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] - // Reference data - genome_version // channel: [mandatory] genome version + // Reference data + genome_version // channel: [mandatory] genome version main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources - // channel: [ meta, purple_dir ] - ch_inputs_selected = ch_purple - .map { meta, purple_dir -> - return [meta, Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR)] + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources + // channel: [ meta, purple_dir ] + ch_inputs_selected = ch_purple + .map { meta, purple_dir -> + return [meta, Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR)] + } + + // Sort inputs + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { meta, purple_dir -> + + def has_dna = Utils.hasTumorDna(meta) + + def tumor_id + def has_smlv_vcf + def has_sv_vcf + + if (has_dna) { + tumor_id = Utils.getTumorDnaSampleName(meta) + has_smlv_vcf = purple_dir ? file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") : [] + has_sv_vcf = purple_dir ? file(purple_dir).resolve("${tumor_id}.purple.sv.vcf.gz") : [] } - // Sort inputs - // channel: runnable: [ meta, purple_dir ] - // channel: skip: [ meta ] - ch_inputs_sorted = ch_inputs_selected - .branch { meta, purple_dir -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.CHORD_DIR) - def has_dna = Utils.hasTumorDna(meta) + runnable: has_dna && purple_dir && has_smlv_vcf && has_sv_vcf && !has_existing + skip: true + return meta + } - def tumor_id - def has_smlv_vcf - def has_sv_vcf + // Create process input channel + // channel: [ meta_chord, smlv_vcf, sv_vcf ] + ch_chord_inputs = ch_inputs_sorted.runnable + .map { meta, purple_dir -> - if (has_dna) { - tumor_id = Utils.getTumorDnaSampleName(meta) - has_smlv_vcf = purple_dir ? file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") : [] - has_sv_vcf = purple_dir ? file(purple_dir).resolve("${tumor_id}.purple.sv.vcf.gz") : [] - } + def tumor_id = Utils.getTumorDnaSampleName(meta) - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.CHORD_DIR) + def meta_chord = [ + key: meta.group_id, + id: meta.group_id, + sample_id: tumor_id, + ] - runnable: has_dna && purple_dir && has_smlv_vcf && has_sv_vcf && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_chord, smlv_vcf, sv_vcf ] - ch_chord_inputs = ch_inputs_sorted.runnable - .map { meta, purple_dir -> - - def tumor_id = Utils.getTumorDnaSampleName(meta) + def smlv_vcf = file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") + def sv_vcf = file(purple_dir).resolve("${tumor_id}.purple.sv.vcf.gz") - def meta_chord = [ - key: meta.group_id, - id: meta.group_id, - sample_id: tumor_id, - ] + return [meta_chord, smlv_vcf, sv_vcf] + } - def smlv_vcf = file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") - def sv_vcf = file(purple_dir).resolve("${tumor_id}.purple.sv.vcf.gz") + // Run process + CHORD( + ch_chord_inputs, + genome_version, + ) - return [meta_chord, smlv_vcf, sv_vcf] - } + ch_versions = ch_versions.mix(CHORD.out.versions) - // Run process - CHORD( - ch_chord_inputs, - genome_version, + // Set outputs, restoring original meta + // channel: [ meta, chord_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(CHORD.out.chord_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(CHORD.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, chord_dir ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(CHORD.out.chord_dir, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - emit: - chord_dir = ch_outputs // channel: [ meta, chord_dir ] + chord_dir = ch_outputs // channel: [ meta, chord_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/cobalt_profiling/main.nf b/subworkflows/local/cobalt_profiling/main.nf index 13102dee..10b06485 100644 --- a/subworkflows/local/cobalt_profiling/main.nf +++ b/subworkflows/local/cobalt_profiling/main.nf @@ -9,95 +9,95 @@ include { COBALT } from '../../../modules/local/cobalt/main' workflow COBALT_PROFILING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] - ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] - // Reference data - gc_profile // channel: [mandatory] /path/to/gc_profile - diploid_bed // channel: [optional] /path/to/diploid_bed - target_region_normalisation // channel: [optional] /path/to/target_region_normalisation + // Reference data + gc_profile // channel: [mandatory] /path/to/gc_profile + diploid_bed // channel: [optional] /path/to/diploid_bed + target_region_normalisation // channel: [optional] /path/to/target_region_normalisation main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources and sort - // NOTE(SW): germline mode is not currently supported - // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai] - // channel: skip: [ meta ] - ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( - ch_tumor_bam, - ch_normal_bam, + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // NOTE(SW): germline mode is not currently supported + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.COBALT_DIR) + runnable_tn: tumor_bam && normal_bam && !has_existing + runnable_to: tumor_bam && !has_existing + skip: true + return meta + } + + // First set diploid BED input for tumor/normal and tumor only samples + // NOTE(SW): since the diploid BED is provided as a channel, I seem to be only able to include via channel ops + // channel: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai, diploid_bed ] + ch_inputs_runnable = Channel.empty() + .mix( + ch_inputs_sorted.runnable_tn.map { [*it, []] }, + ch_inputs_sorted.runnable_to.combine(diploid_bed), ) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - return [ - meta, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), - tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), - Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), - normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), - ] - } - .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.COBALT_DIR) - runnable_tn: tumor_bam && normal_bam && !has_existing - runnable_to: tumor_bam && !has_existing - skip: true - return meta - } - // First set diploid BED input for tumor/normal and tumor only samples - // NOTE(SW): since the diploid BED is provided as a channel, I seem to be only able to include via channel ops - // channel: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai, diploid_bed ] - ch_inputs_runnable = Channel.empty() - .mix( - ch_inputs_sorted.runnable_tn.map { [*it, []] }, - ch_inputs_sorted.runnable_to.combine(diploid_bed), - ) - - // Create process input channel - // channel: sample_data: [ meta_cobalt, tumor_bam, normal_bam, tumor_bai, normal_bai ] - // channel: diploid_bed: [ diploid_bed ] - ch_cobalt_inputs = ch_inputs_runnable - .multiMap { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, diploid_bed -> - - def meta_cobalt = [ - key: meta.group_id, - id: meta.group_id, - tumor_id: Utils.getTumorDnaSampleName(meta), - ] - - if (normal_bam) { - meta_cobalt.normal_id = Utils.getNormalDnaSampleName(meta) - } - - sample_data: [meta_cobalt, tumor_bam, normal_bam, tumor_bai, normal_bai] - diploid_bed: diploid_bed - } + // Create process input channel + // channel: sample_data: [ meta_cobalt, tumor_bam, normal_bam, tumor_bai, normal_bai ] + // channel: diploid_bed: [ diploid_bed ] + ch_cobalt_inputs = ch_inputs_runnable + .multiMap { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, diploid_bed -> - // Run process - COBALT( - ch_cobalt_inputs.sample_data, - gc_profile, - ch_cobalt_inputs.diploid_bed, - target_region_normalisation, - ) + def meta_cobalt = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] - ch_versions = ch_versions.mix(COBALT.out.versions) + if (normal_bam) { + meta_cobalt.normal_id = Utils.getNormalDnaSampleName(meta) + } - // Set outputs, restoring original meta - // channel: [ meta, cobalt_dir ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(COBALT.out.cobalt_dir, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) + sample_data: [meta_cobalt, tumor_bam, normal_bam, tumor_bai, normal_bai] + diploid_bed: diploid_bed + } + + // Run process + COBALT( + ch_cobalt_inputs.sample_data, + gc_profile, + ch_cobalt_inputs.diploid_bed, + target_region_normalisation, + ) + + ch_versions = ch_versions.mix(COBALT.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, cobalt_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(COBALT.out.cobalt_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) emit: - cobalt_dir = ch_outputs // channel: [ meta, cobalt_dir ] + cobalt_dir = ch_outputs // channel: [ meta, cobalt_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/cuppa_prediction/main.nf b/subworkflows/local/cuppa_prediction/main.nf index 2b74a441..5c839073 100644 --- a/subworkflows/local/cuppa_prediction/main.nf +++ b/subworkflows/local/cuppa_prediction/main.nf @@ -9,141 +9,141 @@ include { CUPPA } from '../../../modules/local/cuppa/main' workflow CUPPA_PREDICTION { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_isofox // channel: [mandatory] [ meta, isofox_dir ] - ch_purple // channel: [mandatory] [ meta, purple_dir ] - ch_linx // channel: [mandatory] [ meta, linx_annotation_dir ] - ch_virusinterpreter // channel: [mandatory] [ meta, virusinterpreter_dir ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_isofox // channel: [mandatory] [ meta, isofox_dir ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + ch_linx // channel: [mandatory] [ meta, linx_annotation_dir ] + ch_virusinterpreter // channel: [mandatory] [ meta, virusinterpreter_dir ] - // Reference data - genome_version // channel: [mandatory] genome version - cuppa_resources // channel: [mandatory] /path/to/cuppa_resources/ + // Reference data + genome_version // channel: [mandatory] genome version + cuppa_resources // channel: [mandatory] /path/to/cuppa_resources/ main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources - // channel: [ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir ] - ch_inputs_selected = WorkflowOncoanalyser.groupByMeta( - ch_isofox, - ch_purple, - ch_linx, - ch_virusinterpreter, - ) - .map { meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir -> + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() - def inputs = [ - Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR), - Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), - Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), - Utils.selectCurrentOrExisting(virusinterpreter_dir, meta, Constants.INPUT.VIRUSINTERPRETER_DIR), - ] + // Select input sources + // channel: [ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir ] + ch_inputs_selected = WorkflowOncoanalyser.groupByMeta( + ch_isofox, + ch_purple, + ch_linx, + ch_virusinterpreter, + ) + .map { meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir -> - return [meta, *inputs] - } + def inputs = [ + Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR), + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), + Utils.selectCurrentOrExisting(virusinterpreter_dir, meta, Constants.INPUT.VIRUSINTERPRETER_DIR), + ] - // Sort inputs - // channel: runnable: [ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir ] - // channel: skip: [ meta ] - ch_inputs_sorted = ch_inputs_selected - .branch { meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir -> - - // Run the following: - // - tumor DNA and normal DNA - // - tumor DNA and normal DNA, and tumor RNA - // - tumor RNA only - // - // Do not run the following: - // - tumor DNA only - // - panel mode (controlled by excluded from targeted subworkflow) - // - // (run exclusions currently done basis for presence of normal DNA) - - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.CUPPA_DIR) - def has_normal_dna = Utils.hasNormalDna(meta) - - def has_runnable_inputs = isofox_dir || (purple_dir && linx_annotation_dir && has_normal_dna) - - runnable: has_runnable_inputs && !has_existing - skip: true - return meta - } + return [meta, *inputs] + } - // Create process input channel - // channel: sample_data: [ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir ] - // channel: classifer: [ classifier ] - ch_cuppa_inputs = ch_inputs_sorted.runnable - .multiMap{ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir -> + // Sort inputs + // channel: runnable: [ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir -> - def meta_cuppa = [ - key: meta.group_id, - id: meta.group_id, - ] + // Run the following: + // - tumor DNA and normal DNA + // - tumor DNA and normal DNA, and tumor RNA + // - tumor RNA only + // + // Do not run the following: + // - tumor DNA only + // - panel mode (controlled by excluded from targeted subworkflow) + // + // (run exclusions currently done basis for presence of normal DNA) - def has_tumor_dna = Utils.hasTumorDna(meta) - def has_normal_dna = Utils.hasNormalDna(meta) - def has_tumor_rna = Utils.hasTumorRna(meta) + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.CUPPA_DIR) + def has_normal_dna = Utils.hasNormalDna(meta) - def has_dna_inputs = (purple_dir && linx_annotation_dir) - def has_rna_inputs = isofox_dir + def has_runnable_inputs = isofox_dir || (purple_dir && linx_annotation_dir && has_normal_dna) - def run_dna = has_dna_inputs && has_tumor_dna && has_normal_dna - def run_rna = has_rna_inputs && has_tumor_rna + runnable: has_runnable_inputs && !has_existing + skip: true + return meta + } - def classifier + // Create process input channel + // channel: sample_data: [ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir ] + // channel: classifer: [ classifier ] + ch_cuppa_inputs = ch_inputs_sorted.runnable + .multiMap{ meta, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir -> - if (run_dna && run_rna) { + def meta_cuppa = [ + key: meta.group_id, + id: meta.group_id, + ] - classifier = 'ALL' + def has_tumor_dna = Utils.hasTumorDna(meta) + def has_normal_dna = Utils.hasNormalDna(meta) + def has_tumor_rna = Utils.hasTumorRna(meta) - meta_cuppa.sample_id = Utils.getTumorDnaSampleName(meta) - meta_cuppa.sample_rna_id = Utils.getTumorRnaSampleName(meta) + def has_dna_inputs = (purple_dir && linx_annotation_dir) + def has_rna_inputs = isofox_dir - } else if (run_dna) { + def run_dna = has_dna_inputs && has_tumor_dna && has_normal_dna + def run_rna = has_rna_inputs && has_tumor_rna - classifier = 'DNA' + def classifier - meta_cuppa.sample_id = Utils.getTumorDnaSampleName(meta) + if (run_dna && run_rna) { - } else if (run_rna) { + classifier = 'ALL' - classifier = 'RNA' + meta_cuppa.sample_id = Utils.getTumorDnaSampleName(meta) + meta_cuppa.sample_rna_id = Utils.getTumorRnaSampleName(meta) - meta_cuppa.sample_id = Utils.getTumorRnaSampleName(meta) + } else if (run_dna) { - } else { + classifier = 'DNA' - assert false + meta_cuppa.sample_id = Utils.getTumorDnaSampleName(meta) - } + } else if (run_rna) { - sample_data: [meta_cuppa, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir] - classifier: classifier - } + classifier = 'RNA' - // Run process - CUPPA( - ch_cuppa_inputs.sample_data, - genome_version, - cuppa_resources, - ch_cuppa_inputs.classifier, - ) + meta_cuppa.sample_id = Utils.getTumorRnaSampleName(meta) - ch_versions = ch_versions.mix(CUPPA.out.versions) + } else { - // Set outputs, restoring original meta - // channel: [ meta, cuppa_dir ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(CUPPA.out.cuppa_dir, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) + assert false + + } + + sample_data: [meta_cuppa, isofox_dir, purple_dir, linx_annotation_dir, virusinterpreter_dir] + classifier: classifier + } + + // Run process + CUPPA( + ch_cuppa_inputs.sample_data, + genome_version, + cuppa_resources, + ch_cuppa_inputs.classifier, + ) + + ch_versions = ch_versions.mix(CUPPA.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, cuppa_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(CUPPA.out.cuppa_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) emit: - cuppa_dir = ch_outputs // channel: [ meta, cuppa_dir ] + cuppa_dir = ch_outputs // channel: [ meta, cuppa_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/flagstat_metrics/main.nf b/subworkflows/local/flagstat_metrics/main.nf index 9f0a5f78..2ad4798a 100644 --- a/subworkflows/local/flagstat_metrics/main.nf +++ b/subworkflows/local/flagstat_metrics/main.nf @@ -9,103 +9,103 @@ include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/m workflow FLAGSTAT_METRICS { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] - ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Sort inputs, separate by tumor and normal - // channel: runnable: [ meta, bam, bai ] - // channel: skip: [ meta ] - ch_inputs_tumor_sorted = ch_tumor_bam - .map { meta, bam, bai -> - return [ - meta, - Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), - bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), - ] - } - .branch { meta, bam, bai -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.FLAGSTAT_TUMOR) - runnable: bam && !has_existing - skip: true - return meta - } - - // channel: runnable: [ meta, bam, bai ] - // channel: skip: [ meta ] - ch_inputs_normal_sorted = ch_normal_bam - .map { meta, bam, bai -> - return [ - meta, - Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), - bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), - ] - } - .branch { meta, bam, bai -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.FLAGSTAT_NORMAL) - runnable: bam && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_flagstat, bam, bai ] - ch_flagstat_inputs = Channel.empty() - .mix( - ch_inputs_tumor_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getTumorDnaSample(meta), 'tumor', bam, bai] }, - ch_inputs_normal_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getNormalDnaSample(meta), 'normal', bam, bai] }, - ) - .map { meta, meta_sample, sample_type, bam, bai -> - - def meta_flagstat = [ - key: meta.group_id, - id: "${meta.group_id}_${meta_sample.sample_id}", - sample_id: meta_sample.sample_id, - sample_type: sample_type, - ] - - return [meta_flagstat, bam, bai] - } - - // Run process - SAMTOOLS_FLAGSTAT( - ch_flagstat_inputs, + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs, separate by tumor and normal + // channel: runnable: [ meta, bam, bai ] + // channel: skip: [ meta ] + ch_inputs_tumor_sorted = ch_tumor_bam + .map { meta, bam, bai -> + return [ + meta, + Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + ] + } + .branch { meta, bam, bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.FLAGSTAT_TUMOR) + runnable: bam && !has_existing + skip: true + return meta + } + + // channel: runnable: [ meta, bam, bai ] + // channel: skip: [ meta ] + ch_inputs_normal_sorted = ch_normal_bam + .map { meta, bam, bai -> + return [ + meta, + Utils.selectCurrentOrExisting(bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, bam, bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.FLAGSTAT_NORMAL) + runnable: bam && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_flagstat, bam, bai ] + ch_flagstat_inputs = Channel.empty() + .mix( + ch_inputs_tumor_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getTumorDnaSample(meta), 'tumor', bam, bai] }, + ch_inputs_normal_sorted.runnable.map { meta, bam, bai -> [meta, Utils.getNormalDnaSample(meta), 'normal', bam, bai] }, + ) + .map { meta, meta_sample, sample_type, bam, bai -> + + def meta_flagstat = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + sample_type: sample_type, + ] + + return [meta_flagstat, bam, bai] + } + + // Run process + SAMTOOLS_FLAGSTAT( + ch_flagstat_inputs, + ) + + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + // Sort into a tumor and normal channel + ch_flagstat_out = SAMTOOLS_FLAGSTAT.out.flagstat + .branch { meta_flagstat, flagstat -> + assert ['tumor', 'normal'].contains(meta_flagstat.sample_type) + tumor: meta_flagstat.sample_type == 'tumor' + normal: meta_flagstat.sample_type == 'normal' + placeholder: true + } + + // Set outputs, restoring original meta + // channel: [ meta, flagstat ] + ch_somatic_flagstat = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_flagstat_out.tumor, ch_inputs), + ch_inputs_tumor_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) - - // Sort into a tumor and normal channel - ch_flagstat_out = SAMTOOLS_FLAGSTAT.out.flagstat - .branch { meta_flagstat, flagstat -> - assert ['tumor', 'normal'].contains(meta_flagstat.sample_type) - tumor: meta_flagstat.sample_type == 'tumor' - normal: meta_flagstat.sample_type == 'normal' - placeholder: true - } - - // Set outputs, restoring original meta - // channel: [ meta, flagstat ] - ch_somatic_flagstat = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ch_flagstat_out.tumor, ch_inputs), - ch_inputs_tumor_sorted.skip.map { meta -> [meta, []] }, - ) - - ch_germline_flagstat = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ch_flagstat_out.normal, ch_inputs), - ch_inputs_normal_sorted.skip.map { meta -> [meta, []] }, - ) + ch_germline_flagstat = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_flagstat_out.normal, ch_inputs), + ch_inputs_normal_sorted.skip.map { meta -> [meta, []] }, + ) emit: - somatic = ch_somatic_flagstat // channel: [ meta, flagstat ] - germline = ch_germline_flagstat // channel: [ meta, flagstat ] + somatic = ch_somatic_flagstat // channel: [ meta, flagstat ] + germline = ch_germline_flagstat // channel: [ meta, flagstat ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/gridss_svprep_calling/main.nf b/subworkflows/local/gridss_svprep_calling/main.nf index 77bdb27a..59ec337e 100644 --- a/subworkflows/local/gridss_svprep_calling/main.nf +++ b/subworkflows/local/gridss_svprep_calling/main.nf @@ -15,369 +15,369 @@ include { SVPREP as SVPREP_TUMOR } from '../../../modules/loc workflow GRIDSS_SVPREP_CALLING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] - ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] - - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - genome_dict // channel: [mandatory] /path/to/genome_dict - genome_gridss_index // channel: [mandatory] /path/to/genome_gridss_index - gridss_blocklist // channel: [mandatory] /path/to/gridss_blocklist - sv_prep_blocklist // channel: [mandatory] /path/to/sv_prep_blocklist - known_fusions // channel: [mandatory] /path/to/known_fusions - - // Params - gridss_config // channel: [optional] /path/to/gridss_config + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + genome_gridss_index // channel: [mandatory] /path/to/genome_gridss_index + gridss_blocklist // channel: [mandatory] /path/to/gridss_blocklist + sv_prep_blocklist // channel: [mandatory] /path/to/sv_prep_blocklist + known_fusions // channel: [mandatory] /path/to/known_fusions + + // Params + gridss_config // channel: [optional] /path/to/gridss_config main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources and sort - // channel: runnable_tn: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] - // channel: runnable_to: [ meta, tumor_bam, tumor_bai ] - // channel: skip: [ meta ] - ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( - ch_tumor_bam, - ch_normal_bam, - ) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - return [ - meta, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), - tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), - Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), - normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), - ] - } - .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.GRIDSS_VCF) - - runnable_tn: tumor_bam && normal_bam && !has_existing - runnable_to: tumor_bam && !has_existing - return [meta, tumor_bam, tumor_bai] - skip: true - return meta - } - - // - // MODULE: SV Prep (tumor) - // - // Create process input channel - // channel: [ meta_svprep, bam_tumor, bai_tumor, [] ] - ch_svprep_tumor_inputs = Channel.empty() - .mix( - ch_inputs_sorted.runnable_to.map { [*it, [], []] }, - ch_inputs_sorted.runnable_tn, - ) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - - def meta_svprep = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - sample_type: 'tumor', - // NOTE(SW): slightly redundant since we have this information then lose it with .mix above - group_size: normal_bam ? 2 : 1 - ] - - return [meta_svprep, tumor_bam, tumor_bai, []] - - } - - // Run process - SVPREP_TUMOR( - ch_svprep_tumor_inputs, - genome_fasta, - genome_version, - sv_prep_blocklist, - known_fusions, - 'JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST', // -write_types argument - ) - - ch_versions = ch_versions.mix(SVPREP_TUMOR.out.versions) - - // channel: [ meta_gridss, bam_tumor, bam_tumor_filtered ] - ch_preprocess_inputs_tumor = WorkflowOncoanalyser.groupByMeta( - SVPREP_TUMOR.out.bam, - ch_svprep_tumor_inputs, - ) - .map { meta_svprep, bam_filtered, bam, bai, jnc_optional -> - return [meta_svprep, bam, bam_filtered] - } - - // - // MODULE: SV Prep (normal) - // - // Create process input channel - // channel: [ meta_svprep, bam_normal, bai_normal, junctions_tumor ] - ch_svprep_normal_inputs = WorkflowOncoanalyser.groupByMeta( + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable_tn: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] + // channel: runnable_to: [ meta, tumor_bam, tumor_bai ] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.GRIDSS_VCF) + + runnable_tn: tumor_bam && normal_bam && !has_existing + runnable_to: tumor_bam && !has_existing + return [meta, tumor_bam, tumor_bai] + skip: true + return meta + } + + // + // MODULE: SV Prep (tumor) + // + // Create process input channel + // channel: [ meta_svprep, bam_tumor, bai_tumor, [] ] + ch_svprep_tumor_inputs = Channel.empty() + .mix( + ch_inputs_sorted.runnable_to.map { [*it, [], []] }, ch_inputs_sorted.runnable_tn, - // NOTE(SW): this implicitly selects only entries present in ch_inputs_sorted.runnable_tn - WorkflowOncoanalyser.restoreMeta(SVPREP_TUMOR.out.junctions, ch_inputs_sorted.runnable_tn.map { it[0] }) ) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, junctions_tumor -> - - def meta_svprep = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getNormalDnaSampleName(meta), - sample_type: 'normal', - group_size: 2, // Assumption holds since germline only is not supported and we source from runnable_tn - ] - - return [meta_svprep, normal_bam, normal_bai, junctions_tumor] - - } - - // Run process - SVPREP_NORMAL( - ch_svprep_normal_inputs, - genome_fasta, - genome_version, - sv_prep_blocklist, - known_fusions, - 'JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST', // -write_types argument + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def meta_svprep = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + sample_type: 'tumor', + // NOTE(SW): slightly redundant since we have this information then lose it with .mix above + group_size: normal_bam ? 2 : 1 + ] + + return [meta_svprep, tumor_bam, tumor_bai, []] + + } + + // Run process + SVPREP_TUMOR( + ch_svprep_tumor_inputs, + genome_fasta, + genome_version, + sv_prep_blocklist, + known_fusions, + 'JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST', // -write_types argument + ) + + ch_versions = ch_versions.mix(SVPREP_TUMOR.out.versions) + + // channel: [ meta_gridss, bam_tumor, bam_tumor_filtered ] + ch_preprocess_inputs_tumor = WorkflowOncoanalyser.groupByMeta( + SVPREP_TUMOR.out.bam, + ch_svprep_tumor_inputs, + ) + .map { meta_svprep, bam_filtered, bam, bai, jnc_optional -> + return [meta_svprep, bam, bam_filtered] + } + + // + // MODULE: SV Prep (normal) + // + // Create process input channel + // channel: [ meta_svprep, bam_normal, bai_normal, junctions_tumor ] + ch_svprep_normal_inputs = WorkflowOncoanalyser.groupByMeta( + ch_inputs_sorted.runnable_tn, + // NOTE(SW): this implicitly selects only entries present in ch_inputs_sorted.runnable_tn + WorkflowOncoanalyser.restoreMeta(SVPREP_TUMOR.out.junctions, ch_inputs_sorted.runnable_tn.map { it[0] }) + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, junctions_tumor -> + + def meta_svprep = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getNormalDnaSampleName(meta), + sample_type: 'normal', + group_size: 2, // Assumption holds since germline only is not supported and we source from runnable_tn + ] + + return [meta_svprep, normal_bam, normal_bai, junctions_tumor] + + } + + // Run process + SVPREP_NORMAL( + ch_svprep_normal_inputs, + genome_fasta, + genome_version, + sv_prep_blocklist, + known_fusions, + 'JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST', // -write_types argument + ) + + ch_versions = ch_versions.mix(SVPREP_NORMAL.out.versions) + + // channel: [ meta_gridss, bam_normal, bam_normal_filtered ] + ch_preprocess_inputs_normal = WorkflowOncoanalyser.groupByMeta( + SVPREP_NORMAL.out.bam, + ch_svprep_normal_inputs, + ) + // Switching meta name here from meta_svprep + .map { meta_gridss, bam_filtered, bam, bai, junctions -> + return [meta_gridss, bam, bam_filtered] + } + + // + // MODULE: GRIDSS preprocess + // + // Create process input channel + // channel: [ meta_gridss, bam, bam_filtered ] + ch_preprocess_inputs = Channel.empty() + .mix( + ch_preprocess_inputs_tumor, + ch_preprocess_inputs_normal, ) - - ch_versions = ch_versions.mix(SVPREP_NORMAL.out.versions) - - // channel: [ meta_gridss, bam_normal, bam_normal_filtered ] - ch_preprocess_inputs_normal = WorkflowOncoanalyser.groupByMeta( - SVPREP_NORMAL.out.bam, - ch_svprep_normal_inputs, - ) - // Switching meta name here from meta_svprep - .map { meta_gridss, bam_filtered, bam, bai, junctions -> - return [meta_gridss, bam, bam_filtered] - } - - // - // MODULE: GRIDSS preprocess - // - // Create process input channel - // channel: [ meta_gridss, bam, bam_filtered ] - ch_preprocess_inputs = Channel.empty() - .mix( - ch_preprocess_inputs_tumor, - ch_preprocess_inputs_normal, - ) - .map { meta_svprep, bam, bam_filtered -> - - def meta_gridss = [ - key: meta_svprep.key, - id: "${meta_svprep.id}__${meta_svprep.sample_id}", - sample_id: meta_svprep.sample_id, - sample_type: meta_svprep.sample_type, - group_size: meta_svprep.group_size, + .map { meta_svprep, bam, bam_filtered -> + + def meta_gridss = [ + key: meta_svprep.key, + id: "${meta_svprep.id}__${meta_svprep.sample_id}", + sample_id: meta_svprep.sample_id, + sample_type: meta_svprep.sample_type, + group_size: meta_svprep.group_size, + ] + + return [meta_gridss, bam, bam_filtered] + } + + // Run process + PREPROCESS( + ch_preprocess_inputs, + gridss_config, + genome_fasta, + genome_fai, + genome_dict, + genome_gridss_index, + ) + + ch_versions = ch_versions.mix(PREPROCESS.out.versions) + + // Gather BAMs and outputs from preprocessing for each tumor/normal and tumor only set + // channel: [key, [[meta_gridss, bam, bam_filtered, preprocess_dir], ...] ] + ch_bams_and_preprocess = WorkflowOncoanalyser.groupByMeta( + ch_preprocess_inputs, + PREPROCESS.out.preprocess_dir, + ) + .map { + def meta_gridss = it[0] + def other = it[1..-1] + [groupKey(meta_gridss.key, meta_gridss.group_size), [meta_gridss, *other]] + } + .groupTuple() + + // + // MODULE: GRIDSS assemble + // + // Create process input channel + // channel: tumor/normal: [ meta_gridss, [bams], [bams_filtered], [preprocess_dirs], [labels] ] + // channel: tumor only: [ meta_gridss, bam, bam_filtered, preprocess_dir, label ] + ch_assemble_inputs = ch_bams_and_preprocess + .map { key, entries -> + + assert entries.size() == 1 || entries.size() == 2 + + def tumor_entry = entries.find { e -> e[0].sample_type == 'tumor' } + def normal_entry = entries.find { e -> e[0].sample_type == 'normal' } + + assert tumor_entry !== null + + def (tmeta, tbam, tbam_filtered, tpreprocess) = tumor_entry + def meta_gridss = [ + // Effectively meta.group_id, and both are required. Reminder: + // * key: channel element grouping + // * id: task tag + key: tmeta.key, + id: tmeta.key, + ] + + def data = [] + + if (normal_entry === null) { + + data = [ + meta_gridss, + tbam, + tbam_filtered, + tpreprocess, + tmeta.sample_id, ] - return [meta_gridss, bam, bam_filtered] - } - - // Run process - PREPROCESS( - ch_preprocess_inputs, - gridss_config, - genome_fasta, - genome_fai, - genome_dict, - genome_gridss_index, - ) - - ch_versions = ch_versions.mix(PREPROCESS.out.versions) + } else { - // Gather BAMs and outputs from preprocessing for each tumor/normal and tumor only set - // channel: [key, [[meta_gridss, bam, bam_filtered, preprocess_dir], ...] ] - ch_bams_and_preprocess = WorkflowOncoanalyser.groupByMeta( - ch_preprocess_inputs, - PREPROCESS.out.preprocess_dir, - ) - .map { - def meta_gridss = it[0] - def other = it[1..-1] - [groupKey(meta_gridss.key, meta_gridss.group_size), [meta_gridss, *other]] - } - .groupTuple() - - // - // MODULE: GRIDSS assemble - // - // Create process input channel - // channel: tumor/normal: [ meta_gridss, [bams], [bams_filtered], [preprocess_dirs], [labels] ] - // channel: tumor only: [ meta_gridss, bam, bam_filtered, preprocess_dir, label ] - ch_assemble_inputs = ch_bams_and_preprocess - .map { key, entries -> - - assert entries.size() == 1 || entries.size() == 2 - - def tumor_entry = entries.find { e -> e[0].sample_type == 'tumor' } - def normal_entry = entries.find { e -> e[0].sample_type == 'normal' } - - assert tumor_entry !== null - - def (tmeta, tbam, tbam_filtered, tpreprocess) = tumor_entry - def meta_gridss = [ - // Effectively meta.group_id, and both are required. Reminder: - // * key: channel element grouping - // * id: task tag - key: tmeta.key, - id: tmeta.key, + def (nmeta, nbam, nbam_filtered, npreprocess) = normal_entry + data = [ + meta_gridss, + [nbam, tbam], + [nbam_filtered, tbam_filtered], + [npreprocess, tpreprocess], + [nmeta.sample_id, tmeta.sample_id], ] - def data = [] - - if (normal_entry === null) { - - data = [ - meta_gridss, - tbam, - tbam_filtered, - tpreprocess, - tmeta.sample_id, - ] - - } else { - - def (nmeta, nbam, nbam_filtered, npreprocess) = normal_entry - data = [ - meta_gridss, - [nbam, tbam], - [nbam_filtered, tbam_filtered], - [npreprocess, tpreprocess], - [nmeta.sample_id, tmeta.sample_id], - ] - - } - - return data } - // Run process - ASSEMBLE( - ch_assemble_inputs, - gridss_config, - genome_fasta, - genome_fai, - genome_dict, - genome_gridss_index, - gridss_blocklist, - ) - - ch_versions = ch_versions.mix(ASSEMBLE.out.versions) - - // - // MODULE: GRIDSS call - // - // Create process input channel - // channel: [ meta_gridss, [bams], [bams_filtered], assemble_dir, [labels] ] - ch_call_inputs = WorkflowOncoanalyser.groupByMeta( - ch_assemble_inputs, - ASSEMBLE.out.assemble_dir, - flatten: false, + return data + } + + // Run process + ASSEMBLE( + ch_assemble_inputs, + gridss_config, + genome_fasta, + genome_fai, + genome_dict, + genome_gridss_index, + gridss_blocklist, + ) + + ch_versions = ch_versions.mix(ASSEMBLE.out.versions) + + // + // MODULE: GRIDSS call + // + // Create process input channel + // channel: [ meta_gridss, [bams], [bams_filtered], assemble_dir, [labels] ] + ch_call_inputs = WorkflowOncoanalyser.groupByMeta( + ch_assemble_inputs, + ASSEMBLE.out.assemble_dir, + flatten: false, + ) + .map { data -> + def meta_gridss = data[0] + def (bams, bams_filtered, preprocess_dirs, labels) = data[1] + def (assemble_dir) = data[2] + return [meta_gridss, bams, bams_filtered, assemble_dir, labels] + } + + // Run process + CALL( + ch_call_inputs, + gridss_config, + genome_fasta, + genome_fai, + genome_dict, + genome_gridss_index, + gridss_blocklist, + ) + + ch_versions = ch_versions.mix(CALL.out.versions) + + // + // MODULE: SV Prep depth annotation + // + // Restore original meta, create process input channel + // channel: [ meta, [bams], [bais], vcf, [labels] ] + ch_depth_inputs_tn = WorkflowOncoanalyser.groupByMeta( + ch_inputs_sorted.runnable_tn, + // NOTE(SW): this implicitly selects only entries present in ch_inputs_sorted.runnable_tn + WorkflowOncoanalyser.restoreMeta(CALL.out.vcf, ch_inputs_sorted.runnable_tn.map { it[0] }) + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf -> + return [ + meta, + [normal_bam, tumor_bam], + [normal_bai, tumor_bai], + vcf, + [Utils.getNormalDnaSampleName(meta), Utils.getTumorDnaSampleName(meta)], + ] + } + + // channel: [ meta, bam, bai, vcf, label ] + ch_depth_inputs_to = WorkflowOncoanalyser.groupByMeta( + ch_inputs_sorted.runnable_to, + // NOTE(SW): this implicitly selects only entries present in ch_inputs_sorted.runnable_to + WorkflowOncoanalyser.restoreMeta(CALL.out.vcf, ch_inputs_sorted.runnable_to.map { it[0] }) + ) + .map { meta, tumor_bam, tumor_bai, vcf -> + return [ + meta, + tumor_bam, + tumor_bai, + vcf, + Utils.getTumorDnaSampleName(meta), + ] + } + + // channel: runnable_tn: [ meta_svprep, [bams], [bais], vcf, [labels] ] + // channel: runnable_to: [ meta_svprep, bam, bai, vcf, label ] + ch_depth_inputs = Channel.empty() + .mix( + ch_depth_inputs_tn, + ch_depth_inputs_to, ) - .map { data -> - def meta_gridss = data[0] - def (bams, bams_filtered, preprocess_dirs, labels) = data[1] - def (assemble_dir) = data[2] - return [meta_gridss, bams, bams_filtered, assemble_dir, labels] - } - - // Run process - CALL( - ch_call_inputs, - gridss_config, - genome_fasta, - genome_fai, - genome_dict, - genome_gridss_index, - gridss_blocklist, + .map { d -> + + def meta = d[0] + def fps = d[1..-1] + + def meta_svprep = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta) + ] + + return [meta_svprep, *fps] + } + + // Add depth annotations to calls + DEPTH_ANNOTATOR( + ch_depth_inputs, + genome_fasta, + genome_version, + ) + + ch_versions = ch_versions.mix(DEPTH_ANNOTATOR.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, gridss_vcf ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(DEPTH_ANNOTATOR.out.vcf, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(CALL.out.versions) - - // - // MODULE: SV Prep depth annotation - // - // Restore original meta, create process input channel - // channel: [ meta, [bams], [bais], vcf, [labels] ] - ch_depth_inputs_tn = WorkflowOncoanalyser.groupByMeta( - ch_inputs_sorted.runnable_tn, - // NOTE(SW): this implicitly selects only entries present in ch_inputs_sorted.runnable_tn - WorkflowOncoanalyser.restoreMeta(CALL.out.vcf, ch_inputs_sorted.runnable_tn.map { it[0] }) - ) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf -> - return [ - meta, - [normal_bam, tumor_bam], - [normal_bai, tumor_bai], - vcf, - [Utils.getNormalDnaSampleName(meta), Utils.getTumorDnaSampleName(meta)], - ] - } - - // channel: [ meta, bam, bai, vcf, label ] - ch_depth_inputs_to = WorkflowOncoanalyser.groupByMeta( - ch_inputs_sorted.runnable_to, - // NOTE(SW): this implicitly selects only entries present in ch_inputs_sorted.runnable_to - WorkflowOncoanalyser.restoreMeta(CALL.out.vcf, ch_inputs_sorted.runnable_to.map { it[0] }) - ) - .map { meta, tumor_bam, tumor_bai, vcf -> - return [ - meta, - tumor_bam, - tumor_bai, - vcf, - Utils.getTumorDnaSampleName(meta), - ] - } - - // channel: runnable_tn: [ meta_svprep, [bams], [bais], vcf, [labels] ] - // channel: runnable_to: [ meta_svprep, bam, bai, vcf, label ] - ch_depth_inputs = Channel.empty() - .mix( - ch_depth_inputs_tn, - ch_depth_inputs_to, - ) - .map { d -> - - def meta = d[0] - def fps = d[1..-1] - - def meta_svprep = [ - key: meta.group_id, - id: meta.group_id, - tumor_id: Utils.getTumorDnaSampleName(meta) - ] - - return [meta_svprep, *fps] - } - - // Add depth annotations to calls - DEPTH_ANNOTATOR( - ch_depth_inputs, - genome_fasta, - genome_version, - ) - - ch_versions = ch_versions.mix(DEPTH_ANNOTATOR.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, gridss_vcf ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(DEPTH_ANNOTATOR.out.vcf, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - emit: - vcf = ch_outputs // channel: [ meta, vcf ] + vcf = ch_outputs // channel: [ meta, vcf ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/gripss_filtering/main.nf b/subworkflows/local/gripss_filtering/main.nf index 4d9bfcb2..324e6761 100644 --- a/subworkflows/local/gripss_filtering/main.nf +++ b/subworkflows/local/gripss_filtering/main.nf @@ -10,170 +10,170 @@ include { GRIPSS_SOMATIC as SOMATIC } from '../../../modules/local/gripss/soma workflow GRIPSS_FILTERING { take: - // Sample inputs - ch_inputs // channel: [mandatory] [ meta ] - ch_gridss // channel: [mandatory] [ meta, gridss_vcf ] - - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - breakend_pon // channel: [mandatory] /path/to/breakend_pon - breakpoint_pon // channel: [mandatory] /path/to/breakpoint_pon - known_fusions // channel: [mandatory] /path/to/known_fusions - repeatmasker_annotations // channel: [mandatory] /path/to/repeatmasker_annotations - target_region_bed // channel: [optional] /path/to/target_region_bed + // Sample inputs + ch_inputs // channel: [mandatory] [ meta ] + ch_gridss // channel: [mandatory] [ meta, gridss_vcf ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + breakend_pon // channel: [mandatory] /path/to/breakend_pon + breakpoint_pon // channel: [mandatory] /path/to/breakpoint_pon + known_fusions // channel: [mandatory] /path/to/known_fusions + repeatmasker_annotations // channel: [mandatory] /path/to/repeatmasker_annotations + target_region_bed // channel: [optional] /path/to/target_region_bed main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources and sort - // channel: runnable: [ meta, gridss_vcf ] - // channel: skip: [ meta ] - ch_inputs_sorted = ch_gridss - .map { meta, gridss_vcf -> - return [ - meta, - Utils.selectCurrentOrExisting(gridss_vcf, meta, Constants.INPUT.GRIDSS_VCF), - ] + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, gridss_vcf ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_gridss + .map { meta, gridss_vcf -> + return [ + meta, + Utils.selectCurrentOrExisting(gridss_vcf, meta, Constants.INPUT.GRIDSS_VCF), + ] + } + .branch { meta, gridss_vcf -> + runnable: gridss_vcf + skip: true + return meta + } + + // + // MODULE: GRIPSS germline + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, gridss_vcf ] + // channel: skip: [ meta ] + ch_inputs_germline_sorted = ch_inputs_sorted.runnable + .branch { meta, gridss_vcf -> + def has_tumor_normal = Utils.hasTumorDna(meta) && Utils.hasNormalDna(meta) + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.GRIPSS_VCF_NORMAL) + + runnable: has_tumor_normal && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_gripss, gridss_vcf ] + ch_gripss_germline_inputs = ch_inputs_germline_sorted.runnable + .map { meta, gridss_vcf -> + + def meta_gripss = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + normal_id: Utils.getNormalDnaSampleName(meta), + ] + + return [meta_gripss, gridss_vcf] + } + + // Run process + GERMLINE( + ch_gripss_germline_inputs, + genome_fasta, + genome_version, + genome_fai, + breakend_pon, + breakpoint_pon, + known_fusions, + repeatmasker_annotations, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: GRIPSS somatic + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, gridss_vcf ] + // channel: skip: [ meta ] + ch_inputs_somatic_sorted = ch_inputs_sorted.runnable + .branch { meta, gridss_vcf -> + def has_tumor = Utils.hasTumorDna(meta) + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.GRIPSS_VCF_TUMOR) + + runnable: has_tumor && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_gripss, gridss_vcf ] + ch_gripss_somatic_inputs = ch_inputs_somatic_sorted.runnable + .map { meta, gridss_vcf -> + + def meta_gripss = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] + + if (Utils.hasNormalDna(meta)) { + meta_gripss.normal_id = Utils.getNormalDnaSampleName(meta) } - .branch { meta, gridss_vcf -> - runnable: gridss_vcf - skip: true - return meta - } - - // - // MODULE: GRIPSS germline - // - // Select inputs that are eligible to run - // channel: runnable: [ meta, gridss_vcf ] - // channel: skip: [ meta ] - ch_inputs_germline_sorted = ch_inputs_sorted.runnable - .branch { meta, gridss_vcf -> - def has_tumor_normal = Utils.hasTumorDna(meta) && Utils.hasNormalDna(meta) - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.GRIPSS_VCF_NORMAL) - - runnable: has_tumor_normal && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_gripss, gridss_vcf ] - ch_gripss_germline_inputs = ch_inputs_germline_sorted.runnable - .map { meta, gridss_vcf -> - def meta_gripss = [ - key: meta.group_id, - id: meta.group_id, - tumor_id: Utils.getTumorDnaSampleName(meta), - normal_id: Utils.getNormalDnaSampleName(meta), - ] - - return [meta_gripss, gridss_vcf] - } - - // Run process - GERMLINE( - ch_gripss_germline_inputs, - genome_fasta, - genome_version, - genome_fai, - breakend_pon, - breakpoint_pon, - known_fusions, - repeatmasker_annotations, + return [meta_gripss, gridss_vcf] + } + + // Run process + SOMATIC( + ch_gripss_somatic_inputs, + genome_fasta, + genome_version, + genome_fai, + breakend_pon, + breakpoint_pon, + known_fusions, + repeatmasker_annotations, + target_region_bed, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, gripss_vcf, gripss_tbi ] + ch_somatic_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, ) - ch_versions = ch_versions.mix(GERMLINE.out.versions) - - // - // MODULE: GRIPSS somatic - // - // Select inputs that are eligible to run - // channel: runnable: [ meta, gridss_vcf ] - // channel: skip: [ meta ] - ch_inputs_somatic_sorted = ch_inputs_sorted.runnable - .branch { meta, gridss_vcf -> - def has_tumor = Utils.hasTumorDna(meta) - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.GRIPSS_VCF_TUMOR) - - runnable: has_tumor && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_gripss, gridss_vcf ] - ch_gripss_somatic_inputs = ch_inputs_somatic_sorted.runnable - .map { meta, gridss_vcf -> - - def meta_gripss = [ - key: meta.group_id, - id: meta.group_id, - tumor_id: Utils.getTumorDnaSampleName(meta), - ] - - if (Utils.hasNormalDna(meta)) { - meta_gripss.normal_id = Utils.getNormalDnaSampleName(meta) - } - - return [meta_gripss, gridss_vcf] - } + ch_somatic_unfiltered_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf_unfiltered, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) - // Run process - SOMATIC( - ch_gripss_somatic_inputs, - genome_fasta, - genome_version, - genome_fai, - breakend_pon, - breakpoint_pon, - known_fusions, - repeatmasker_annotations, - target_region_bed, + ch_germline_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, ) - ch_versions = ch_versions.mix(SOMATIC.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, gripss_vcf, gripss_tbi ] - ch_somatic_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), - ch_inputs_somatic_sorted.skip.map { meta -> [meta, [], []] }, - ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, - ) - - ch_somatic_unfiltered_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf_unfiltered, ch_inputs), - ch_inputs_somatic_sorted.skip.map { meta -> [meta, [], []] }, - ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, - ) - - ch_germline_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), - ch_inputs_germline_sorted.skip.map { meta -> [meta, [], []] }, - ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, - ) - - ch_germline_unfiltered_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf_unfiltered, ch_inputs), - ch_inputs_germline_sorted.skip.map { meta -> [meta, [], []] }, - ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, - ) + ch_germline_unfiltered_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf_unfiltered, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) emit: - somatic = ch_somatic_out // channel: [ meta, gripss_vcf, gripss_tbi ] - germline = ch_germline_out // channel: [ meta, gripss_vcf, gripss_tbi ] - somatic_unfiltered = ch_somatic_unfiltered_out // channel: [ meta, gripss_vcf, gripss_tbi ] - germline_unfiltered = ch_germline_unfiltered_out // channel: [ meta, gripss_vcf, gripss_tbi ] + somatic = ch_somatic_out // channel: [ meta, gripss_vcf, gripss_tbi ] + germline = ch_germline_out // channel: [ meta, gripss_vcf, gripss_tbi ] + somatic_unfiltered = ch_somatic_unfiltered_out // channel: [ meta, gripss_vcf, gripss_tbi ] + germline_unfiltered = ch_germline_unfiltered_out // channel: [ meta, gripss_vcf, gripss_tbi ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/isofox_quantification/main.nf b/subworkflows/local/isofox_quantification/main.nf index 8de357f9..5cb79e4b 100644 --- a/subworkflows/local/isofox_quantification/main.nf +++ b/subworkflows/local/isofox_quantification/main.nf @@ -9,88 +9,88 @@ include { ISOFOX } from '../../../modules/local/isofox/main' workflow ISOFOX_QUANTIFICATION { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ - isofox_counts // channel: [mandatory] /path/to/isofox_counts - isofox_gc_ratios // channel: [mandatory] /path/to/isofox_gc_ratios - isofox_gene_ids // channel: [optional] /path/to/gene_ids - isofox_tpm_norm // channel: [optional] /path/to/tpm_norm + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + isofox_counts // channel: [mandatory] /path/to/isofox_counts + isofox_gc_ratios // channel: [mandatory] /path/to/isofox_gc_ratios + isofox_gene_ids // channel: [optional] /path/to/gene_ids + isofox_tpm_norm // channel: [optional] /path/to/tpm_norm - // Params - isofox_functions // string: [optional] Isofox functions - isofox_read_length // string: [mandatory] Isofox read length + // Params + isofox_functions // string: [optional] Isofox functions + isofox_read_length // string: [mandatory] Isofox read length main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() - // Select input sources and sort - // channel: runnable: [ meta, tumor_bam, tumor_bai ] - // channel: skip: [ meta ] - ch_inputs_sorted = ch_tumor_rna_bam - .map { meta, tumor_bam, tumor_bai -> - return [ - meta, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), - Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), - ] - } - .branch { meta, tumor_bam, tumor_bai -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.ISOFOX_DIR) - runnable: tumor_bam && !has_existing - skip: true - return meta - } + // Select input sources and sort + // channel: runnable: [ meta, tumor_bam, tumor_bai ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_tumor_rna_bam + .map { meta, tumor_bam, tumor_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), + ] + } + .branch { meta, tumor_bam, tumor_bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.ISOFOX_DIR) + runnable: tumor_bam && !has_existing + skip: true + return meta + } - // Create process input channel - // channel: [ meta_isofox, tumor_bam, tumor_bai ] - ch_isofox_inputs = ch_inputs_sorted.runnable - .map { meta, tumor_bam, tumor_bai -> + // Create process input channel + // channel: [ meta_isofox, tumor_bam, tumor_bai ] + ch_isofox_inputs = ch_inputs_sorted.runnable + .map { meta, tumor_bam, tumor_bai -> - def meta_isofox = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorRnaSampleName(meta), - ] + def meta_isofox = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorRnaSampleName(meta), + ] - return [meta_isofox, tumor_bam, tumor_bai] - } + return [meta_isofox, tumor_bam, tumor_bai] + } - // Run process - ISOFOX( - ch_isofox_inputs, - isofox_functions, - isofox_read_length, - genome_fasta, - genome_version, - genome_fai, - ensembl_data_resources, - isofox_counts, - isofox_gc_ratios, - isofox_gene_ids, - isofox_tpm_norm, - ) + // Run process + ISOFOX( + ch_isofox_inputs, + isofox_functions, + isofox_read_length, + genome_fasta, + genome_version, + genome_fai, + ensembl_data_resources, + isofox_counts, + isofox_gc_ratios, + isofox_gene_ids, + isofox_tpm_norm, + ) - ch_versions = ch_versions.mix(ISOFOX.out.versions) + ch_versions = ch_versions.mix(ISOFOX.out.versions) - // Set outputs, restoring original meta - // channel: [ meta, isofox_dir ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ISOFOX.out.isofox_dir, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) + // Set outputs, restoring original meta + // channel: [ meta, isofox_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ISOFOX.out.isofox_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) emit: - isofox_dir = ch_outputs // channel: [ meta, isofox_dir ] + isofox_dir = ch_outputs // channel: [ meta, isofox_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/lilac_calling/main.nf b/subworkflows/local/lilac_calling/main.nf index e73f77c3..f267902e 100644 --- a/subworkflows/local/lilac_calling/main.nf +++ b/subworkflows/local/lilac_calling/main.nf @@ -12,213 +12,213 @@ include { LILAC } from '../../../modules/local/l workflow LILAC_CALLING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] - ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] - ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] - ch_purple // channel: [mandatory] [ meta, purple_dir ] - - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - lilac_resource_dir // channel: [mandatory] /path/to/lilac_resource_dir/ - hla_slice_bed // channel: [mandatory] /path/to/hla_slice_bed + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + lilac_resource_dir // channel: [mandatory] /path/to/lilac_resource_dir/ + hla_slice_bed // channel: [mandatory] /path/to/hla_slice_bed main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources and sort for DNA BAMs - // channel: runnable: [ meta, tumor_dna_bam, tumor_dna_bai, normal_dna_bam, normal_dna_bai ] - // channel: skip: [ meta ] - ch_dna_inputs_sorted = WorkflowOncoanalyser.groupByMeta( - ch_tumor_bam, - ch_normal_bam, - ) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort for DNA BAMs + // channel: runnable: [ meta, tumor_dna_bam, tumor_dna_bai, normal_dna_bam, normal_dna_bai ] + // channel: skip: [ meta ] + ch_dna_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.LILAC_DIR) + + runnable: (tumor_bam || normal_bam) && !has_existing + skip: true + return meta + } + + // Realign reads mapping to HLA regions and homologus regions if using reference genome with ALT contigs + // NOTE(SW): the aim of this process is to take reads mapping to ALT contigs and align them to the three + // relevant HLA genes on chr6. All reads including those previously mapped to chr6 are realigned for + // consistency. + if (params.genome_type == 'alt') { + + // Flatten into BAM/BAI pairs, select inputs that are eligible to run + // channel: runnable: [ meta_extra, bam, bai ] + // channel: skip: [ meta_extra ] + ch_realign_inputs_sorted = ch_dna_inputs_sorted.runnable + .flatMap { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def tumor_sample_id = Utils.hasTumorDna(meta) ? Utils.getTumorDnaSampleName(meta) : [] + def normal_sample_id = Utils.hasNormalDna(meta) ? Utils.getNormalDnaSampleName(meta) : [] + return [ - meta, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), - tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), - Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), - normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + [[key: meta.group_id, *:meta, sample_id: tumor_sample_id, sample_type: 'tumor'], tumor_bam, tumor_bai], + [[key: meta.group_id, *:meta, sample_id: normal_sample_id, sample_type: 'normal'], normal_bam, normal_bai], ] } - .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.LILAC_DIR) - - runnable: (tumor_bam || normal_bam) && !has_existing + .branch { meta_extra, bam, bai -> + runnable: bam && bai skip: true - return meta + return meta_extra } - // Realign reads mapping to HLA regions and homologus regions if using reference genome with ALT contigs - // NOTE(SW): the aim of this process is to take reads mapping to ALT contigs and align them to the three - // relevant HLA genes on chr6. All reads including those previously mapped to chr6 are realigned for - // consistency. - if (params.genome_type == 'alt') { - - // Flatten into BAM/BAI pairs, select inputs that are eligible to run - // channel: runnable: [ meta_extra, bam, bai ] - // channel: skip: [ meta_extra ] - ch_realign_inputs_sorted = ch_dna_inputs_sorted.runnable - .flatMap { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - - def tumor_sample_id = Utils.hasTumorDna(meta) ? Utils.getTumorDnaSampleName(meta) : [] - def normal_sample_id = Utils.hasNormalDna(meta) ? Utils.getNormalDnaSampleName(meta) : [] - - return [ - [[key: meta.group_id, *:meta, sample_id: tumor_sample_id, sample_type: 'tumor'], tumor_bam, tumor_bai], - [[key: meta.group_id, *:meta, sample_id: normal_sample_id, sample_type: 'normal'], normal_bam, normal_bai], - ] - } - .branch { meta_extra, bam, bai -> - runnable: bam && bai - skip: true - return meta_extra - } - - // - // MODULE: Custom BAM slice (LILAC) - // - // Create process input channel - // channel: [ meta_realign, bam, bai ] - ch_slice_inputs = ch_realign_inputs_sorted.runnable - .map { meta_extra, bam, bai -> - - def meta_realign = [ - key: meta_extra.group_id, - id: "${meta_extra.group_id}__${meta_extra.sample_id}", - sample_id: meta_extra.sample_id, - sample_type: meta_extra.sample_type, - ] - - return [meta_realign, bam, bai] - } - - // Run process - SLICEBAM( - ch_slice_inputs, - hla_slice_bed, - ) - - ch_versions = ch_versions.mix(SLICEBAM.out.versions) - - // - // MODULE: Custom extract contig (LILAC) - // - // Only run if we have runnable inputs, no blocking since operating only on input metas - ch_extract_contig_run = ch_realign_inputs_sorted.runnable - .toList() - .map { !it.isEmpty() } - - EXTRACTCONTIG( - 'chr6', - genome_fasta, - genome_fai, - ch_extract_contig_run, - ) + // + // MODULE: Custom BAM slice (LILAC) + // + // Create process input channel + // channel: [ meta_realign, bam, bai ] + ch_slice_inputs = ch_realign_inputs_sorted.runnable + .map { meta_extra, bam, bai -> + + def meta_realign = [ + key: meta_extra.group_id, + id: "${meta_extra.group_id}__${meta_extra.sample_id}", + sample_id: meta_extra.sample_id, + sample_type: meta_extra.sample_type, + ] - ch_versions = ch_versions.mix(EXTRACTCONTIG.out.versions) + return [meta_realign, bam, bai] + } - // - // MODULE: Custom realign reads (LILAC) - // - REALIGNREADS( - SLICEBAM.out.bam, - EXTRACTCONTIG.out.contig, - EXTRACTCONTIG.out.bwa_index, - ) + // Run process + SLICEBAM( + ch_slice_inputs, + hla_slice_bed, + ) - ch_versions = ch_versions.mix(REALIGNREADS.out.versions) - - // Separate all BAMs by sample type so they can be merged with desired order - // channel: [ < meta_extra OR meta_realign >, bam, bai ] - ch_slice_reunited_bams = Channel.empty() - .mix( - ch_realign_inputs_sorted.skip.map { meta_extra -> [meta_extra, [], []] }, - REALIGNREADS.out.bam, - ) - .branch { meta_ambiguous, bam, bai -> - tumor: meta_ambiguous.sample_type == 'tumor' - normal: meta_ambiguous.sample_type == 'normal' - } - - // Restore meta, pair tumor and normal BAMs - // channel: [ meta, tumor_dna_bam, tumor_dna_bai, normal_dna_bam, normal_dna_bai ] - ch_dna_inputs_ready = WorkflowOncoanalyser.groupByMeta( - WorkflowOncoanalyser.restoreMeta(ch_slice_reunited_bams.tumor, ch_inputs), - WorkflowOncoanalyser.restoreMeta(ch_slice_reunited_bams.normal, ch_inputs), - ) + ch_versions = ch_versions.mix(SLICEBAM.out.versions) - } else { + // + // MODULE: Custom extract contig (LILAC) + // + // Only run if we have runnable inputs, no blocking since operating only on input metas + ch_extract_contig_run = ch_realign_inputs_sorted.runnable + .toList() + .map { !it.isEmpty() } - // channel: [ meta, tumor_dna_bam, tumor_dna_bai, normal_dna_bam, normal_dna_bai ] - ch_dna_inputs_ready = ch_dna_inputs_sorted.runnable + EXTRACTCONTIG( + 'chr6', + genome_fasta, + genome_fai, + ch_extract_contig_run, + ) - } + ch_versions = ch_versions.mix(EXTRACTCONTIG.out.versions) // - // MODULE: LILAC + // MODULE: Custom realign reads (LILAC) // - // Create process input channel - // channel: [ meta_lilac, normal_dna_bam, normal_dna_bai, tumor_dna_bam, tumor_dna_bai, tumor_rna_bam, tumor_rna_bai, purple_dir ] - ch_lilac_inputs = WorkflowOncoanalyser.groupByMeta( - ch_dna_inputs_ready, - ch_tumor_rna_bam, - ch_purple, + REALIGNREADS( + SLICEBAM.out.bam, + EXTRACTCONTIG.out.contig, + EXTRACTCONTIG.out.bwa_index, ) - .map { meta, tbam_dna, tbai_dna, nbam_dna, nbai_dna, tbam_rna, tbai_rna, purple_dir -> - def meta_lilac = [ - key: meta.group_id, - id: meta.group_id, - ] + ch_versions = ch_versions.mix(REALIGNREADS.out.versions) + + // Separate all BAMs by sample type so they can be merged with desired order + // channel: [ < meta_extra OR meta_realign >, bam, bai ] + ch_slice_reunited_bams = Channel.empty() + .mix( + ch_realign_inputs_sorted.skip.map { meta_extra -> [meta_extra, [], []] }, + REALIGNREADS.out.bam, + ) + .branch { meta_ambiguous, bam, bai -> + tumor: meta_ambiguous.sample_type == 'tumor' + normal: meta_ambiguous.sample_type == 'normal' + } - if (Utils.hasTumorDna(meta)) { - meta_lilac.tumor_id = Utils.getTumorDnaSampleName(meta) - } + // Restore meta, pair tumor and normal BAMs + // channel: [ meta, tumor_dna_bam, tumor_dna_bai, normal_dna_bam, normal_dna_bai ] + ch_dna_inputs_ready = WorkflowOncoanalyser.groupByMeta( + WorkflowOncoanalyser.restoreMeta(ch_slice_reunited_bams.tumor, ch_inputs), + WorkflowOncoanalyser.restoreMeta(ch_slice_reunited_bams.normal, ch_inputs), + ) - if (Utils.hasNormalDna(meta)) { - meta_lilac.normal_id = Utils.getNormalDnaSampleName(meta) - } + } else { - return [ - meta_lilac, - nbam_dna, - nbai_dna, - tbam_dna, - tbai_dna, - tbam_rna, - tbai_rna, - Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), - ] + // channel: [ meta, tumor_dna_bam, tumor_dna_bai, normal_dna_bam, normal_dna_bai ] + ch_dna_inputs_ready = ch_dna_inputs_sorted.runnable + + } + + // + // MODULE: LILAC + // + // Create process input channel + // channel: [ meta_lilac, normal_dna_bam, normal_dna_bai, tumor_dna_bam, tumor_dna_bai, tumor_rna_bam, tumor_rna_bai, purple_dir ] + ch_lilac_inputs = WorkflowOncoanalyser.groupByMeta( + ch_dna_inputs_ready, + ch_tumor_rna_bam, + ch_purple, + ) + .map { meta, tbam_dna, tbai_dna, nbam_dna, nbai_dna, tbam_rna, tbai_rna, purple_dir -> + + def meta_lilac = [ + key: meta.group_id, + id: meta.group_id, + ] + + if (Utils.hasTumorDna(meta)) { + meta_lilac.tumor_id = Utils.getTumorDnaSampleName(meta) } - // Run process - LILAC( - ch_lilac_inputs, - genome_fasta, - genome_version, - lilac_resource_dir, - ) + if (Utils.hasNormalDna(meta)) { + meta_lilac.normal_id = Utils.getNormalDnaSampleName(meta) + } - ch_versions = ch_versions.mix(LILAC.out.versions) + return [ + meta_lilac, + nbam_dna, + nbai_dna, + tbam_dna, + tbai_dna, + tbam_rna, + tbai_rna, + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + ] + } - // Set outputs, restoring original meta - // channel: [ meta, amber_dir ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(LILAC.out.lilac_dir, ch_inputs), - ch_dna_inputs_sorted.skip.map { meta -> [meta, []] }, - ) + // Run process + LILAC( + ch_lilac_inputs, + genome_fasta, + genome_version, + lilac_resource_dir, + ) + + ch_versions = ch_versions.mix(LILAC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, amber_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(LILAC.out.lilac_dir, ch_inputs), + ch_dna_inputs_sorted.skip.map { meta -> [meta, []] }, + ) emit: - lilac_dir = ch_outputs // channel: [ meta, lilac_dir ] + lilac_dir = ch_outputs // channel: [ meta, lilac_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/linx_annotation/main.nf b/subworkflows/local/linx_annotation/main.nf index 6cf17747..0381a05c 100644 --- a/subworkflows/local/linx_annotation/main.nf +++ b/subworkflows/local/linx_annotation/main.nf @@ -10,147 +10,147 @@ include { LINX_SOMATIC as SOMATIC } from '../../../modules/local/linx/somatic/ workflow LINX_ANNOTATION { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_purple // channel: [mandatory] [ meta, purple_dir ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] - // Reference data - genome_version // channel: [mandatory] genome version - ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ - known_fusion_data // channel: [mandatory] /path/to/known_fusion_data - driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + // Reference data + genome_version // channel: [mandatory] genome version + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + known_fusion_data // channel: [mandatory] /path/to/known_fusion_data + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel main: - // Channel for versions.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources and sort - // channel: runnable: [ meta, purple_dir ] - // channel: skip: [ meta ] - ch_inputs_sorted = ch_purple - .map { meta, purple_dir -> - return [ - meta, - Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), - ] - } - .branch { meta, purple_dir -> - runnable: purple_dir - skip: true - return meta - } - - // - // MODULE: LINX germline annotation - // - // Select inputs that are eligible to run - // channel: runnable: [ meta, purple_dir ] - // channel: skip: [ meta ] - ch_inputs_germline_sorted = ch_inputs_sorted.runnable - .branch { meta, purple_dir -> - - def tumor_id = Utils.getTumorDnaSampleName(meta) - - def has_tumor_normal = Utils.hasTumorDna(meta) && Utils.hasNormalDna(meta) - def has_sv_germline_vcf = file(purple_dir).resolve("${tumor_id}.purple.sv.germline.vcf.gz") - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PURPLE_DIR) - - runnable: has_tumor_normal && has_sv_germline_vcf && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta, sv_vcf ] - ch_linx_germline_inputs = ch_inputs_germline_sorted.runnable - .map { meta, purple_dir -> - - def tumor_id = Utils.getTumorDnaSampleName(meta) - - def meta_linx = [ - key: meta.group_id, - id: meta.group_id, - sample_id: tumor_id, - ] - - def sv_vcf = file(purple_dir).resolve("${tumor_id}.purple.sv.germline.vcf.gz") - - return [meta_linx, sv_vcf] - } - - // Run process - GERMLINE( - ch_linx_germline_inputs, - genome_version, - ensembl_data_resources, - driver_gene_panel, + // Channel for versions.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_purple + .map { meta, purple_dir -> + return [ + meta, + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + ] + } + .branch { meta, purple_dir -> + runnable: purple_dir + skip: true + return meta + } + + // + // MODULE: LINX germline annotation + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_germline_sorted = ch_inputs_sorted.runnable + .branch { meta, purple_dir -> + + def tumor_id = Utils.getTumorDnaSampleName(meta) + + def has_tumor_normal = Utils.hasTumorDna(meta) && Utils.hasNormalDna(meta) + def has_sv_germline_vcf = file(purple_dir).resolve("${tumor_id}.purple.sv.germline.vcf.gz") + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PURPLE_DIR) + + runnable: has_tumor_normal && has_sv_germline_vcf && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta, sv_vcf ] + ch_linx_germline_inputs = ch_inputs_germline_sorted.runnable + .map { meta, purple_dir -> + + def tumor_id = Utils.getTumorDnaSampleName(meta) + + def meta_linx = [ + key: meta.group_id, + id: meta.group_id, + sample_id: tumor_id, + ] + + def sv_vcf = file(purple_dir).resolve("${tumor_id}.purple.sv.germline.vcf.gz") + + return [meta_linx, sv_vcf] + } + + // Run process + GERMLINE( + ch_linx_germline_inputs, + genome_version, + ensembl_data_resources, + driver_gene_panel, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: LINX somatic annotation + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_somatic_sorted = ch_inputs_sorted.runnable + .branch { meta, purple_dir -> + + def has_tumor = Utils.hasTumorDna(meta) + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PURPLE_DIR) + + runnable: has_tumor && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta, purple_dir ] + ch_linx_somatic_inputs = ch_inputs_somatic_sorted.runnable + .map { meta, purple_dir -> + + def meta_linx = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_linx, purple_dir] + } + + // Run process + SOMATIC( + ch_linx_somatic_inputs, + genome_version, + ensembl_data_resources, + known_fusion_data, + driver_gene_panel, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + + // Set outputs, restoring original meta + // channel: [ meta, linx_annotation_dir ] + ch_somatic_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.annotation_dir, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(GERMLINE.out.versions) - - // - // MODULE: LINX somatic annotation - // - // Select inputs that are eligible to run - // channel: runnable: [ meta, purple_dir ] - // channel: skip: [ meta ] - ch_inputs_somatic_sorted = ch_inputs_sorted.runnable - .branch { meta, purple_dir -> - - def has_tumor = Utils.hasTumorDna(meta) - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PURPLE_DIR) - - runnable: has_tumor && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta, purple_dir ] - ch_linx_somatic_inputs = ch_inputs_somatic_sorted.runnable - .map { meta, purple_dir -> - - def meta_linx = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - ] - - return [meta_linx, purple_dir] - } - - // Run process - SOMATIC( - ch_linx_somatic_inputs, - genome_version, - ensembl_data_resources, - known_fusion_data, - driver_gene_panel, + ch_germline_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.annotation_dir, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(SOMATIC.out.versions) - - - // Set outputs, restoring original meta - // channel: [ meta, linx_annotation_dir ] - ch_somatic_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(SOMATIC.out.annotation_dir, ch_inputs), - ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - - ch_germline_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(GERMLINE.out.annotation_dir, ch_inputs), - ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - emit: - somatic = ch_somatic_out // channel: [ meta, linx_annotation_dir ] - germline = ch_germline_out // channel: [ meta, linx_annotation_dir ] + somatic = ch_somatic_out // channel: [ meta, linx_annotation_dir ] + germline = ch_germline_out // channel: [ meta, linx_annotation_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/linx_plotting/main.nf b/subworkflows/local/linx_plotting/main.nf index de4665a4..4b52da0f 100644 --- a/subworkflows/local/linx_plotting/main.nf +++ b/subworkflows/local/linx_plotting/main.nf @@ -10,101 +10,101 @@ include { LINX_VISUALISER as VISUALISER } from '../../../modules/local/linx/visu workflow LINX_PLOTTING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_annotations // channel: [mandatory] [ meta, annotation_dir ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_annotations // channel: [mandatory] [ meta, annotation_dir ] - // Reference data - genome_version // channel: [mandatory] genome version - ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + // Reference data + genome_version // channel: [mandatory] genome version + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ main: - // Channel for versions.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources and sort - // channel: runnable: [ meta, annotation_dir ] - // channel: skip: [ meta ] - ch_inputs_sorted = ch_annotations - .map { meta, annotation_dir -> - return [ - meta, - Utils.selectCurrentOrExisting(annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), - ] - } - .branch { meta, annotation_dir -> - - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.LINX_PLOT_DIR_TUMOR) - - runnable: annotation_dir && !has_existing - skip: true - return meta - } - - // - // MODULE: LINX visualiser - // - // Create process input channel - // channel: [ meta_linx, annotation_dir ] - ch_linx_visualiser_inputs = ch_inputs_sorted.runnable - .map { meta, annotation_dir -> - - def meta_linx = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - ] - - return [meta_linx, annotation_dir] - } - - // Run process - VISUALISER( - ch_linx_visualiser_inputs, - genome_version, - ensembl_data_resources, - ) - - ch_versions = ch_versions.mix(VISUALISER.out.versions) - - // - // MODULE: gpgr LINX report - // - // Create process input channel - // channel: [ meta_gpgr, annotation_dir, visualiser_dir ] - ch_gpgr_linx_inputs = WorkflowOncoanalyser.groupByMeta( - ch_inputs_sorted.runnable, + // Channel for versions.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, annotation_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_annotations + .map { meta, annotation_dir -> + return [ + meta, + Utils.selectCurrentOrExisting(annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), + ] + } + .branch { meta, annotation_dir -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.LINX_PLOT_DIR_TUMOR) + + runnable: annotation_dir && !has_existing + skip: true + return meta + } + + // + // MODULE: LINX visualiser + // + // Create process input channel + // channel: [ meta_linx, annotation_dir ] + ch_linx_visualiser_inputs = ch_inputs_sorted.runnable + .map { meta, annotation_dir -> + + def meta_linx = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_linx, annotation_dir] + } + + // Run process + VISUALISER( + ch_linx_visualiser_inputs, + genome_version, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(VISUALISER.out.versions) + + // + // MODULE: gpgr LINX report + // + // Create process input channel + // channel: [ meta_gpgr, annotation_dir, visualiser_dir ] + ch_gpgr_linx_inputs = WorkflowOncoanalyser.groupByMeta( + ch_inputs_sorted.runnable, + WorkflowOncoanalyser.restoreMeta(VISUALISER.out.plots, ch_inputs), + ) + .map { meta, annotation_dir, visualiser_dir -> + + def meta_gpgr_linx = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_gpgr_linx, annotation_dir, visualiser_dir] + } + + // Run process + REPORT( + ch_gpgr_linx_inputs, + ) + + ch_versions = ch_versions.mix(REPORT.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, visualiser_dir ] + ch_visualiser_dir_out = Channel.empty() + .mix( WorkflowOncoanalyser.restoreMeta(VISUALISER.out.plots, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - .map { meta, annotation_dir, visualiser_dir -> - - def meta_gpgr_linx = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - ] - - return [meta_gpgr_linx, annotation_dir, visualiser_dir] - } - - // Run process - REPORT( - ch_gpgr_linx_inputs, - ) - - ch_versions = ch_versions.mix(REPORT.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, visualiser_dir ] - ch_visualiser_dir_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(VISUALISER.out.plots, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) emit: - visualiser_dir = ch_visualiser_dir_out // channel: [ meta, visualiser_dir ] + visualiser_dir = ch_visualiser_dir_out // channel: [ meta, visualiser_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/orange_reporting/main.nf b/subworkflows/local/orange_reporting/main.nf index fcaada4c..451fa9d0 100644 --- a/subworkflows/local/orange_reporting/main.nf +++ b/subworkflows/local/orange_reporting/main.nf @@ -9,234 +9,234 @@ include { ORANGE } from '../../../modules/local/orange/main' workflow ORANGE_REPORTING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_bamtools_somatic // channel: [mandatory] [ meta, metrics ] - ch_bamtools_germline // channel: [mandatory] [ meta, metrics ] - ch_flagstat_somatic // channel: [mandatory] [ meta, metrics ] - ch_flagstat_germline // channel: [mandatory] [ meta, metrics ] - ch_sage_somatic // channel: [mandatory] [ meta, sage_dir ] - ch_sage_germline // channel: [mandatory] [ meta, sage_dir ] - ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] - ch_sage_germline_append // channel: [mandatory] [ meta, sage_append_vcf ] - ch_purple // channel: [mandatory] [ meta, purple_dir ] - ch_linx_somatic_annotation // channel: [mandatory] [ meta, linx_annotation_dir ] - ch_linx_somatic_plot // channel: [mandatory] [ meta, linx_visualiser_dir ] - ch_linx_germline_annotation // channel: [mandatory] [ meta, linx_annotation_dir ] - ch_virusinterpreter // channel: [mandatory] [ meta, virusinterpreter_dir ] - ch_chord // channel: [mandatory] [ meta, chord_dir ] - ch_sigs // channel: [mandatory] [ meta, sigs_dir ] - ch_lilac // channel: [mandatory] [ meta, lilac_dir ] - ch_cuppa // channel: [mandatory] [ meta, cuppa_dir ] - ch_isofox // channel: [mandatory] [ meta, isofox_dir ] - - // Reference data - genome_version // channel: [mandatory] genome version - disease_ontology // channel: [mandatory] /path/to/disease_ontology - cohort_mapping // channel: [mandatory] /path/to/cohort_mapping - cohort_percentiles // channel: [mandatory] /path/to/cohort_percentiles - known_fusion_data // channel: [mandatory] /path/to/known_fusion_data - driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel - ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ - isofox_alt_sj // channel: [optional] /path/to/isofox_alt_sj - isofox_gene_distribution // channel: [optional] /path/to/isofox_gene_distribution + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_bamtools_somatic // channel: [mandatory] [ meta, metrics ] + ch_bamtools_germline // channel: [mandatory] [ meta, metrics ] + ch_flagstat_somatic // channel: [mandatory] [ meta, metrics ] + ch_flagstat_germline // channel: [mandatory] [ meta, metrics ] + ch_sage_somatic // channel: [mandatory] [ meta, sage_dir ] + ch_sage_germline // channel: [mandatory] [ meta, sage_dir ] + ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] + ch_sage_germline_append // channel: [mandatory] [ meta, sage_append_vcf ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + ch_linx_somatic_annotation // channel: [mandatory] [ meta, linx_annotation_dir ] + ch_linx_somatic_plot // channel: [mandatory] [ meta, linx_visualiser_dir ] + ch_linx_germline_annotation // channel: [mandatory] [ meta, linx_annotation_dir ] + ch_virusinterpreter // channel: [mandatory] [ meta, virusinterpreter_dir ] + ch_chord // channel: [mandatory] [ meta, chord_dir ] + ch_sigs // channel: [mandatory] [ meta, sigs_dir ] + ch_lilac // channel: [mandatory] [ meta, lilac_dir ] + ch_cuppa // channel: [mandatory] [ meta, cuppa_dir ] + ch_isofox // channel: [mandatory] [ meta, isofox_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + disease_ontology // channel: [mandatory] /path/to/disease_ontology + cohort_mapping // channel: [mandatory] /path/to/cohort_mapping + cohort_percentiles // channel: [mandatory] /path/to/cohort_percentiles + known_fusion_data // channel: [mandatory] /path/to/known_fusion_data + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + isofox_alt_sj // channel: [optional] /path/to/isofox_alt_sj + isofox_gene_distribution // channel: [optional] /path/to/isofox_gene_distribution main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Set expected input ordering and size - input_expected_size = 18 - - dna_tumor_input_indexes = [ - 0, // bamtools_somatic - 2, // flagstat_somatic - 4, // sage_somatic - 8, // purple_dir - 9, // linx_somatic_annotation - 10, // linx_somatic_plot_dir - 15, // lilac_dir - ] - - dna_normal_input_indexes = [ - 1, // bamtools_germline - 3, // flagstat_germline - 5, // sage_germline - 11, // linx_germline_annotation - ] - - rna_tumor_input_indexes = [ - 6, // sage_somatic_append - 17, // isofox_dir - ] - - rna_sage_germline_append_index = 7 // sage_germline_append - - // Select input sources - // channel: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir ] - ch_inputs_selected = WorkflowOncoanalyser.groupByMeta( - ch_bamtools_somatic, - ch_bamtools_germline, - ch_flagstat_somatic, - ch_flagstat_germline, - ch_sage_somatic, - ch_sage_germline, - ch_sage_somatic_append, - ch_sage_germline_append, - ch_purple, - ch_linx_somatic_annotation, - ch_linx_somatic_plot, - ch_linx_germline_annotation, - ch_virusinterpreter, - ch_chord, - ch_sigs, - ch_lilac, - ch_cuppa, - ch_isofox, + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Set expected input ordering and size + input_expected_size = 18 + + dna_tumor_input_indexes = [ + 0, // bamtools_somatic + 2, // flagstat_somatic + 4, // sage_somatic + 8, // purple_dir + 9, // linx_somatic_annotation + 10, // linx_somatic_plot_dir + 15, // lilac_dir + ] + + dna_normal_input_indexes = [ + 1, // bamtools_germline + 3, // flagstat_germline + 5, // sage_germline + 11, // linx_germline_annotation + ] + + rna_tumor_input_indexes = [ + 6, // sage_somatic_append + 17, // isofox_dir + ] + + rna_sage_germline_append_index = 7 // sage_germline_append + + // Select input sources + // channel: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir ] + ch_inputs_selected = WorkflowOncoanalyser.groupByMeta( + ch_bamtools_somatic, + ch_bamtools_germline, + ch_flagstat_somatic, + ch_flagstat_germline, + ch_sage_somatic, + ch_sage_germline, + ch_sage_somatic_append, + ch_sage_germline_append, + ch_purple, + ch_linx_somatic_annotation, + ch_linx_somatic_plot, + ch_linx_germline_annotation, + ch_virusinterpreter, + ch_chord, + ch_sigs, + ch_lilac, + ch_cuppa, + ch_isofox, + ) + .map { d -> + + def meta = d[0] + def inputs = d[1..-1] + + assert inputs.size() == input_expected_size + + // NOTE(SW): avoiding further complexity with loops etc + + def inputs_selected = [ + Utils.selectCurrentOrExisting(inputs[0], meta, Constants.INPUT.BAMTOOLS_TUMOR), + Utils.selectCurrentOrExisting(inputs[1], meta, Constants.INPUT.BAMTOOLS_NORMAL), + Utils.selectCurrentOrExisting(inputs[2], meta, Constants.INPUT.FLAGSTAT_TUMOR), + Utils.selectCurrentOrExisting(inputs[3], meta, Constants.INPUT.FLAGSTAT_NORMAL), + Utils.selectCurrentOrExisting(inputs[4], meta, Constants.INPUT.SAGE_DIR_TUMOR), + Utils.selectCurrentOrExisting(inputs[5], meta, Constants.INPUT.SAGE_DIR_NORMAL), + Utils.selectCurrentOrExisting(inputs[6], meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), + Utils.selectCurrentOrExisting(inputs[7], meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL), + Utils.selectCurrentOrExisting(inputs[8], meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(inputs[9], meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), + Utils.selectCurrentOrExisting(inputs[10], meta, Constants.INPUT.LINX_PLOT_DIR_TUMOR), + Utils.selectCurrentOrExisting(inputs[11], meta, Constants.INPUT.LINX_ANNO_DIR_NORMAL), + Utils.selectCurrentOrExisting(inputs[12], meta, Constants.INPUT.VIRUSINTERPRETER_DIR), + Utils.selectCurrentOrExisting(inputs[13], meta, Constants.INPUT.CHORD_DIR), + Utils.selectCurrentOrExisting(inputs[14], meta, Constants.INPUT.SIGS_DIR), + Utils.selectCurrentOrExisting(inputs[15], meta, Constants.INPUT.LILAC_DIR), + Utils.selectCurrentOrExisting(inputs[16], meta, Constants.INPUT.CUPPA_DIR), + Utils.selectCurrentOrExisting(inputs[17], meta, Constants.INPUT.ISOFOX_DIR), + ] + + return [meta, *inputs_selected] + } + + // Sort inputs + // channel: runnable: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { d -> + + def meta = d[0] + def inputs = d[1..-1] + + def has_dna_tumor = dna_tumor_input_indexes + .collect { i -> inputs[i] } + .every() + + def has_rna_tumor = rna_tumor_input_indexes + .collect { i -> inputs[i] } + .every() + + runnable_dna_and_rna: has_dna_tumor && has_rna_tumor + runnable_dna: has_dna_tumor + skip: true + return meta + } + + // First set RNA reference files + // NOTE(SW): since the RNA reference files are provided as channels, I seem to be only able to include via channel ops + // channel: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir, isofox_alt_sj, isofox_gene_distribution ] + ch_inputs_runnable = Channel.empty() + .mix( + ch_inputs_sorted.runnable_dna.map { d -> [*d, [], []] }, + ch_inputs_sorted.runnable_dna_and_rna + .combine(isofox_alt_sj) + .combine(isofox_gene_distribution), ) - .map { d -> - - def meta = d[0] - def inputs = d[1..-1] - - assert inputs.size() == input_expected_size - - // NOTE(SW): avoiding further complexity with loops etc - - def inputs_selected = [ - Utils.selectCurrentOrExisting(inputs[0], meta, Constants.INPUT.BAMTOOLS_TUMOR), - Utils.selectCurrentOrExisting(inputs[1], meta, Constants.INPUT.BAMTOOLS_NORMAL), - Utils.selectCurrentOrExisting(inputs[2], meta, Constants.INPUT.FLAGSTAT_TUMOR), - Utils.selectCurrentOrExisting(inputs[3], meta, Constants.INPUT.FLAGSTAT_NORMAL), - Utils.selectCurrentOrExisting(inputs[4], meta, Constants.INPUT.SAGE_DIR_TUMOR), - Utils.selectCurrentOrExisting(inputs[5], meta, Constants.INPUT.SAGE_DIR_NORMAL), - Utils.selectCurrentOrExisting(inputs[6], meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), - Utils.selectCurrentOrExisting(inputs[7], meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL), - Utils.selectCurrentOrExisting(inputs[8], meta, Constants.INPUT.PURPLE_DIR), - Utils.selectCurrentOrExisting(inputs[9], meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), - Utils.selectCurrentOrExisting(inputs[10], meta, Constants.INPUT.LINX_PLOT_DIR_TUMOR), - Utils.selectCurrentOrExisting(inputs[11], meta, Constants.INPUT.LINX_ANNO_DIR_NORMAL), - Utils.selectCurrentOrExisting(inputs[12], meta, Constants.INPUT.VIRUSINTERPRETER_DIR), - Utils.selectCurrentOrExisting(inputs[13], meta, Constants.INPUT.CHORD_DIR), - Utils.selectCurrentOrExisting(inputs[14], meta, Constants.INPUT.SIGS_DIR), - Utils.selectCurrentOrExisting(inputs[15], meta, Constants.INPUT.LILAC_DIR), - Utils.selectCurrentOrExisting(inputs[16], meta, Constants.INPUT.CUPPA_DIR), - Utils.selectCurrentOrExisting(inputs[17], meta, Constants.INPUT.ISOFOX_DIR), - ] - - return [meta, *inputs_selected] - } - // Sort inputs - // channel: runnable: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir ] - // channel: skip: [ meta ] - ch_inputs_sorted = ch_inputs_selected - .branch { d -> + // Create process input channel + // channel: sample_data: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsmlv_vcf, nsmlv_vcf, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir ] + // channel: isofox_alt_sj: [ isofox_alt_sj ] + // channel: isofox_gene_distribution: [ isofox_gene_distribution ] + ch_orange_inputs = ch_inputs_runnable + .multiMap { d -> + + def meta = d[0] + def inputs = d[1..-3] + + def isofox_alt_sj = d[-2] + def isofox_gene_distribution = d[-1] - def meta = d[0] - def inputs = d[1..-1] + def meta_orange = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] - def has_dna_tumor = dna_tumor_input_indexes - .collect { i -> inputs[i] } - .every() + def inputs_selected = inputs.clone() - def has_rna_tumor = rna_tumor_input_indexes - .collect { i -> inputs[i] } - .every() + // Require all normal DNA inputs to be present else clear them + def has_dna_normal = dna_normal_input_indexes + .collect { i -> inputs[i] } + .every() - runnable_dna_and_rna: has_dna_tumor && has_rna_tumor - runnable_dna: has_dna_tumor - skip: true - return meta + if (has_dna_normal) { + meta_orange.normal_dna_id = Utils.getNormalDnaSampleName(meta) + } else { + dna_normal_input_indexes.each { i -> inputs_selected[i] = [] } } - // First set RNA reference files - // NOTE(SW): since the RNA reference files are provided as channels, I seem to be only able to include via channel ops - // channel: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir, isofox_alt_sj, isofox_gene_distribution ] - ch_inputs_runnable = Channel.empty() - .mix( - ch_inputs_sorted.runnable_dna.map { d -> [*d, [], []] }, - ch_inputs_sorted.runnable_dna_and_rna - .combine(isofox_alt_sj) - .combine(isofox_gene_distribution), - ) - - // Create process input channel - // channel: sample_data: [ meta, tbt_metrics, nbt_metrics, tfs_metrics, nfs_metrics, tsage_dir, nsage_dir, tsmlv_vcf, nsmlv_vcf, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, isofox_dir ] - // channel: isofox_alt_sj: [ isofox_alt_sj ] - // channel: isofox_gene_distribution: [ isofox_gene_distribution ] - ch_orange_inputs = ch_inputs_runnable - .multiMap { d -> - - def meta = d[0] - def inputs = d[1..-3] - - def isofox_alt_sj = d[-2] - def isofox_gene_distribution = d[-1] - - def meta_orange = [ - key: meta.group_id, - id: meta.group_id, - tumor_id: Utils.getTumorDnaSampleName(meta), - ] - - def inputs_selected = inputs.clone() - - // Require all normal DNA inputs to be present else clear them - def has_dna_normal = dna_normal_input_indexes - .collect { i -> inputs[i] } - .every() - - if (has_dna_normal) { - meta_orange.normal_dna_id = Utils.getNormalDnaSampleName(meta) - } else { - dna_normal_input_indexes.each { i -> inputs_selected[i] = [] } - } - - // Require all tumor RNA inputs to be present else clear them - // SAGE append germline is only required when normal DNA is present - def rna_tumor_input_indexes_ready - if (has_dna_normal) { - rna_tumor_input_indexes_ready = [*rna_tumor_input_indexes, rna_sage_germline_append_index] - } else { - rna_tumor_input_indexes_ready = rna_tumor_input_indexes.clone() - } - - def has_rna_tumor = rna_tumor_input_indexes_ready - .collect { i -> inputs[i] } - .every() - - if (has_rna_tumor) { - meta_orange.tumor_rna_id = Utils.getTumorRnaSampleName(meta) - } else { - rna_tumor_input_indexes.each { i -> inputs_selected[i] = [] } - } - - assert inputs_selected.size() == input_expected_size - - sample_data: [meta_orange, *inputs_selected] - isofox_alt_sj: isofox_alt_sj - isofox_gene_distribution: isofox_gene_distribution + // Require all tumor RNA inputs to be present else clear them + // SAGE append germline is only required when normal DNA is present + def rna_tumor_input_indexes_ready + if (has_dna_normal) { + rna_tumor_input_indexes_ready = [*rna_tumor_input_indexes, rna_sage_germline_append_index] + } else { + rna_tumor_input_indexes_ready = rna_tumor_input_indexes.clone() } - // Run process - ORANGE( - ch_orange_inputs.sample_data, - genome_version, - disease_ontology, - cohort_mapping, - cohort_percentiles, - known_fusion_data, - driver_gene_panel, - ensembl_data_resources, - ch_orange_inputs.isofox_alt_sj, - ch_orange_inputs.isofox_gene_distribution, - "5.34 [oncoanalyser]", - ) + def has_rna_tumor = rna_tumor_input_indexes_ready + .collect { i -> inputs[i] } + .every() + + if (has_rna_tumor) { + meta_orange.tumor_rna_id = Utils.getTumorRnaSampleName(meta) + } else { + rna_tumor_input_indexes.each { i -> inputs_selected[i] = [] } + } - ch_versions = ch_versions.mix(ORANGE.out.versions) + assert inputs_selected.size() == input_expected_size + + sample_data: [meta_orange, *inputs_selected] + isofox_alt_sj: isofox_alt_sj + isofox_gene_distribution: isofox_gene_distribution + } + + // Run process + ORANGE( + ch_orange_inputs.sample_data, + genome_version, + disease_ontology, + cohort_mapping, + cohort_percentiles, + known_fusion_data, + driver_gene_panel, + ensembl_data_resources, + ch_orange_inputs.isofox_alt_sj, + ch_orange_inputs.isofox_gene_distribution, + "5.34 [oncoanalyser]", + ) + + ch_versions = ch_versions.mix(ORANGE.out.versions) emit: - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/pave_annotation/main.nf b/subworkflows/local/pave_annotation/main.nf index 1517f92e..2092581e 100644 --- a/subworkflows/local/pave_annotation/main.nf +++ b/subworkflows/local/pave_annotation/main.nf @@ -10,170 +10,170 @@ include { PAVE_SOMATIC as SOMATIC } from '../../../modules/local/pave/somatic/ workflow PAVE_ANNOTATION { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_sage_germline_vcf // channel: [mandatory] [ meta, sage_germline_vcf, sage_somatic_tbi ] - ch_sage_somatic_vcf // channel: [mandatory] [ meta, sage_somatic_vcf, sage_somatic_tbi ] - - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - sage_pon // channel: [mandatory] /path/to/sage_pon - pon_artefacts // channel: [optional] /path/to/pon_artefacts - sage_blocklist_regions // channel: [mandatory] /path/to/sage_blocklist_regions - sage_blocklist_sites // channel: [mandatory] /path/to/sage_blocklist_sites - clinvar_annotations // channel: [mandatory] /path/to/clinvar_annotations - segment_mappability // channel: [mandatory] /path/to/segment_mappability - driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel - ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ - gnomad_resource // channel: [mandatory] /path/to/gnomad_resource + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_sage_germline_vcf // channel: [mandatory] [ meta, sage_germline_vcf, sage_somatic_tbi ] + ch_sage_somatic_vcf // channel: [mandatory] [ meta, sage_somatic_vcf, sage_somatic_tbi ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + sage_pon // channel: [mandatory] /path/to/sage_pon + pon_artefacts // channel: [optional] /path/to/pon_artefacts + sage_blocklist_regions // channel: [mandatory] /path/to/sage_blocklist_regions + sage_blocklist_sites // channel: [mandatory] /path/to/sage_blocklist_sites + clinvar_annotations // channel: [mandatory] /path/to/clinvar_annotations + segment_mappability // channel: [mandatory] /path/to/segment_mappability + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + gnomad_resource // channel: [mandatory] /path/to/gnomad_resource main: - // Channel for version.yml files - ch_versions = Channel.empty() - - // - // MODULE: PAVE germline - // - // Select input sources and sort - // channel: runnable: [ meta, sage_vcf, sage_tbi ] - // channel: skip: [ meta ] - ch_sage_germline_inputs_sorted = ch_sage_germline_vcf - .map { meta, sage_vcf, sage_tbi -> - return [ - meta, - Utils.selectCurrentOrExisting(sage_vcf, meta, Constants.INPUT.SAGE_VCF_NORMAL), - Utils.selectCurrentOrExisting(sage_tbi, meta, Constants.INPUT.SAGE_VCF_TBI_NORMAL), - ] - } - .branch { meta, sage_vcf, sage_tbi -> - - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PAVE_VCF_NORMAL) - - runnable: Utils.hasTumorDna(meta) && Utils.hasNormalDna(meta) && sage_vcf && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_pave, sage_vcf, sage_tbi ] - ch_pave_germline_inputs = ch_sage_germline_inputs_sorted.runnable - .map { meta, sage_vcf, sage_tbi -> - - def meta_pave = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - ] - - return [meta_pave, sage_vcf, sage_tbi] - } - - // Run process - GERMLINE( - ch_pave_germline_inputs, - genome_fasta, - genome_version, - genome_fai, - sage_blocklist_regions, - sage_blocklist_sites, - clinvar_annotations, - segment_mappability, - driver_gene_panel, - ensembl_data_resources, - gnomad_resource, - ) + // Channel for version.yml files + ch_versions = Channel.empty() + + // + // MODULE: PAVE germline + // + // Select input sources and sort + // channel: runnable: [ meta, sage_vcf, sage_tbi ] + // channel: skip: [ meta ] + ch_sage_germline_inputs_sorted = ch_sage_germline_vcf + .map { meta, sage_vcf, sage_tbi -> + return [ + meta, + Utils.selectCurrentOrExisting(sage_vcf, meta, Constants.INPUT.SAGE_VCF_NORMAL), + Utils.selectCurrentOrExisting(sage_tbi, meta, Constants.INPUT.SAGE_VCF_TBI_NORMAL), + ] + } + .branch { meta, sage_vcf, sage_tbi -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PAVE_VCF_NORMAL) - ch_versions = ch_versions.mix(GERMLINE.out.versions) - - // - // MODULE: PAVE somatic - // - // Select input sources and sort - // channel: runnable: [ meta, sage_vcf, sage_tbi ] - // channel: skip: [ meta ] - ch_sage_somatic_inputs_sorted = ch_sage_somatic_vcf - .map { meta, sage_vcf, sage_tbi -> - return [ - meta, - Utils.selectCurrentOrExisting(sage_vcf, meta, Constants.INPUT.SAGE_VCF_TUMOR), - Utils.selectCurrentOrExisting(sage_tbi, meta, Constants.INPUT.SAGE_VCF_TBI_TUMOR), - ] - } - .branch { meta, sage_vcf, sage_tbi -> - - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PAVE_VCF_TUMOR) - - runnable: Utils.hasTumorDna(meta) && sage_vcf && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_pave, sage_vcf, sage_tbi ] - ch_pave_somatic_inputs = ch_sage_somatic_inputs_sorted.runnable - .map { meta, sage_vcf, sage_tbi -> - - def meta_pave = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - ] - - return [meta_pave, sage_vcf, sage_tbi] - } - - // Set resource files according to run mode - // NOTE(SW): required since certain files can be used in germline and somatic depending on mode - // but want to avoid duplicating as multiple inputs - // NOTE(SW): this pattern should be used only sparingly; implicit config from workflows is prefered - sage_blocklist_regions_somatic = sage_blocklist_regions - sage_blocklist_sites_somatic = sage_blocklist_sites - clinvar_annotations_somatic = clinvar_annotations - run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) - if (run_mode === Constants.RunMode.WGTS) { - sage_blocklist_regions_somatic = [] - sage_blocklist_sites_somatic = [] - clinvar_annotations_somatic = [] + runnable: Utils.hasTumorDna(meta) && Utils.hasNormalDna(meta) && sage_vcf && !has_existing + skip: true + return meta } - // Run process - SOMATIC( - ch_pave_somatic_inputs, - genome_fasta, - genome_version, - genome_fai, - sage_pon, - pon_artefacts, - sage_blocklist_regions_somatic, - sage_blocklist_sites_somatic, - clinvar_annotations_somatic, - segment_mappability, - driver_gene_panel, - ensembl_data_resources, - gnomad_resource, - ) + // Create process input channel + // channel: [ meta_pave, sage_vcf, sage_tbi ] + ch_pave_germline_inputs = ch_sage_germline_inputs_sorted.runnable + .map { meta, sage_vcf, sage_tbi -> - ch_versions = ch_versions.mix(SOMATIC.out.versions) + def meta_pave = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] - // Set outputs, restoring original meta - // channel: [ meta, pave_vcf ] - ch_somatic_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), - ch_sage_somatic_inputs_sorted.skip.map { meta -> [meta, []] }, - ) + return [meta_pave, sage_vcf, sage_tbi] + } - ch_germline_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), - ch_sage_germline_inputs_sorted.skip.map { meta -> [meta, []] }, - ) + // Run process + GERMLINE( + ch_pave_germline_inputs, + genome_fasta, + genome_version, + genome_fai, + sage_blocklist_regions, + sage_blocklist_sites, + clinvar_annotations, + segment_mappability, + driver_gene_panel, + ensembl_data_resources, + gnomad_resource, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: PAVE somatic + // + // Select input sources and sort + // channel: runnable: [ meta, sage_vcf, sage_tbi ] + // channel: skip: [ meta ] + ch_sage_somatic_inputs_sorted = ch_sage_somatic_vcf + .map { meta, sage_vcf, sage_tbi -> + return [ + meta, + Utils.selectCurrentOrExisting(sage_vcf, meta, Constants.INPUT.SAGE_VCF_TUMOR), + Utils.selectCurrentOrExisting(sage_tbi, meta, Constants.INPUT.SAGE_VCF_TBI_TUMOR), + ] + } + .branch { meta, sage_vcf, sage_tbi -> + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PAVE_VCF_TUMOR) + + runnable: Utils.hasTumorDna(meta) && sage_vcf && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_pave, sage_vcf, sage_tbi ] + ch_pave_somatic_inputs = ch_sage_somatic_inputs_sorted.runnable + .map { meta, sage_vcf, sage_tbi -> + + def meta_pave = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_pave, sage_vcf, sage_tbi] + } + + // Set resource files according to run mode + // NOTE(SW): required since certain files can be used in germline and somatic depending on mode + // but want to avoid duplicating as multiple inputs + // NOTE(SW): this pattern should be used only sparingly; implicit config from workflows is prefered + sage_blocklist_regions_somatic = sage_blocklist_regions + sage_blocklist_sites_somatic = sage_blocklist_sites + clinvar_annotations_somatic = clinvar_annotations + run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) + if (run_mode === Constants.RunMode.WGTS) { + sage_blocklist_regions_somatic = [] + sage_blocklist_sites_somatic = [] + clinvar_annotations_somatic = [] + } + + // Run process + SOMATIC( + ch_pave_somatic_inputs, + genome_fasta, + genome_version, + genome_fai, + sage_pon, + pon_artefacts, + sage_blocklist_regions_somatic, + sage_blocklist_sites_somatic, + clinvar_annotations_somatic, + segment_mappability, + driver_gene_panel, + ensembl_data_resources, + gnomad_resource, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, pave_vcf ] + ch_somatic_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), + ch_sage_somatic_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + ch_germline_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), + ch_sage_germline_inputs_sorted.skip.map { meta -> [meta, []] }, + ) emit: - germline = ch_germline_out // channel: [ meta, pave_vcf ] - somatic = ch_somatic_out // channel: [ meta, pave_vcf ] + germline = ch_germline_out // channel: [ meta, pave_vcf ] + somatic = ch_somatic_out // channel: [ meta, pave_vcf ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/prepare_inputs/main.nf b/subworkflows/local/prepare_inputs/main.nf index 18abe8eb..8a10e735 100644 --- a/subworkflows/local/prepare_inputs/main.nf +++ b/subworkflows/local/prepare_inputs/main.nf @@ -12,14 +12,13 @@ import Utils workflow PREPARE_INPUTS { take: - input_fp_str + input_fp_str main: - - ch_inputs = Channel.fromList( - Utils.parseInput(input_fp_str, workflow.stubRun, log) - ) + ch_inputs = Channel.fromList( + Utils.parseInput(input_fp_str, workflow.stubRun, log) + ) emit: - inputs = ch_inputs // channel: [ meta ] + inputs = ch_inputs // channel: [ meta ] } diff --git a/subworkflows/local/prepare_reference/main.nf b/subworkflows/local/prepare_reference/main.nf index 3d0ab042..1b1bf8d5 100644 --- a/subworkflows/local/prepare_reference/main.nf +++ b/subworkflows/local/prepare_reference/main.nf @@ -21,253 +21,253 @@ include { WRITE_REFERENCE_DATA } from '../../../modu workflow PREPARE_REFERENCE { take: - run_config // channel: [mandatory] run configuration + run_config // channel: [mandatory] run configuration main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // - // Set some variables for brevity - // - ch_genome_fasta = Channel.fromPath(params.ref_data_genome_fasta) - ch_genome_version = Channel.value(params.genome_version) - run_virusinterpreter = run_config.mode !== Constants.RunMode.TARGETED && run_config.stages.virusinterpreter - - // - // Set .fai and .dict indexes, create if required - // - ch_genome_fai = getRefFileChannel('ref_data_genome_fai') - if (!params.ref_data_genome_fai) { - SAMTOOLS_FAIDX(ch_genome_fasta) - ch_genome_fai = SAMTOOLS_FAIDX.out.fai - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - } - - ch_genome_dict = getRefFileChannel('ref_data_genome_dict') - if (!params.ref_data_genome_dict) { - SAMTOOLS_DICT(ch_genome_fasta) - ch_genome_dict = SAMTOOLS_DICT.out.dict - ch_versions = ch_versions.mix(SAMTOOLS_DICT.out.versions) - } + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // + // Set some variables for brevity + // + ch_genome_fasta = Channel.fromPath(params.ref_data_genome_fasta) + ch_genome_version = Channel.value(params.genome_version) + run_virusinterpreter = run_config.mode !== Constants.RunMode.TARGETED && run_config.stages.virusinterpreter + + // + // Set .fai and .dict indexes, create if required + // + ch_genome_fai = getRefFileChannel('ref_data_genome_fai') + if (!params.ref_data_genome_fai) { + SAMTOOLS_FAIDX(ch_genome_fasta) + ch_genome_fai = SAMTOOLS_FAIDX.out.fai + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + } + + ch_genome_dict = getRefFileChannel('ref_data_genome_dict') + if (!params.ref_data_genome_dict) { + SAMTOOLS_DICT(ch_genome_fasta) + ch_genome_dict = SAMTOOLS_DICT.out.dict + ch_versions = ch_versions.mix(SAMTOOLS_DICT.out.versions) + } + + // + // Set bwa-mem2 index, unpack or create if required + // + ch_genome_bwa_index = Channel.empty() + if (run_config.has_dna && run_config.stages.alignment) { + if (!params.ref_data_genome_bwa_index) { + + BWAMEM2_INDEX( + ch_genome_fasta, + params.ref_data_genome_alt ? file(params.ref_data_genome_alt) : [], + ) + ch_genome_bwa_index = BWAMEM2_INDEX.out.index + ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) - // - // Set bwa-mem2 index, unpack or create if required - // - ch_genome_bwa_index = Channel.empty() - if (run_config.has_dna && run_config.stages.alignment) { - if (!params.ref_data_genome_bwa_index) { + } else if (params.ref_data_genome_bwa_index.endsWith('.tar.gz')) { - BWAMEM2_INDEX( - ch_genome_fasta, - params.ref_data_genome_alt ? file(params.ref_data_genome_alt) : [], - ) - ch_genome_bwa_index = BWAMEM2_INDEX.out.index - ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) + ch_genome_bwa_index_inputs = Channel.fromPath(params.ref_data_genome_bwa_index) + .map { [[id: "bwa-mem2_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } - } else if (params.ref_data_genome_bwa_index.endsWith('.tar.gz')) { + DECOMP_BWAMEM2_INDEX(ch_genome_bwa_index_inputs) + ch_genome_bwa_index = DECOMP_BWAMEM2_INDEX.out.dir - ch_genome_bwa_index_inputs = Channel.fromPath(params.ref_data_genome_bwa_index) - .map { [[id: "bwa-mem2_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + } else { - DECOMP_BWAMEM2_INDEX(ch_genome_bwa_index_inputs) - ch_genome_bwa_index = DECOMP_BWAMEM2_INDEX.out.dir + ch_genome_bwa_index = getRefFileChannel('ref_data_genome_bwa_index') - } else { + } + } + + // + // Set and GRIDSS index, unpack or create if required + // + ch_genome_gridss_index = Channel.empty() + if (run_config.has_dna && (run_config.stages.gridss || run_virusinterpreter)) { + if (!params.ref_data_genome_gridss_index) { + + BWA_INDEX( + ch_genome_fasta, + params.ref_data_genome_alt ? file(params.ref_data_genome_alt) : [], + ) + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) - ch_genome_bwa_index = getRefFileChannel('ref_data_genome_bwa_index') + GRIDSS_INDEX( + ch_genome_fasta, + ch_genome_fai, + ch_genome_dict, + BWA_INDEX.out.index, + ) + ch_genome_gridss_index = GRIDSS_INDEX.out.index + ch_versions = ch_versions.mix(GRIDSS_INDEX.out.versions) - } - } + } else if (params.ref_data_genome_gridss_index.endsWith('.tar.gz')) { - // - // Set and GRIDSS index, unpack or create if required - // - ch_genome_gridss_index = Channel.empty() - if (run_config.has_dna && (run_config.stages.gridss || run_virusinterpreter)) { - if (!params.ref_data_genome_gridss_index) { + ch_genome_gridss_index_inputs = Channel.fromPath(params.ref_data_genome_gridss_index) + .map { [[id: "gridss_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } - BWA_INDEX( - ch_genome_fasta, - params.ref_data_genome_alt ? file(params.ref_data_genome_alt) : [], - ) - ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + DECOMP_GRIDSS_INDEX(ch_genome_gridss_index_inputs) + ch_genome_gridss_index = DECOMP_GRIDSS_INDEX.out.dir - GRIDSS_INDEX( - ch_genome_fasta, - ch_genome_fai, - ch_genome_dict, - BWA_INDEX.out.index, - ) - ch_genome_gridss_index = GRIDSS_INDEX.out.index - ch_versions = ch_versions.mix(GRIDSS_INDEX.out.versions) + } else { - } else if (params.ref_data_genome_gridss_index.endsWith('.tar.gz')) { + ch_genome_gridss_index = getRefFileChannel('ref_data_genome_gridss_index') - ch_genome_gridss_index_inputs = Channel.fromPath(params.ref_data_genome_gridss_index) - .map { [[id: "gridss_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + } + } + + // + // Set STAR index path, unpack or create if required + // + ch_genome_star_index = Channel.empty() + if (run_config.has_rna_fastq && run_config.stages.alignment) { + if (!params.ref_data_genome_star_index) { + + STAR_GENOMEGENERATE( + ch_genome_fasta, + file(params.ref_data_genome_gtf), + ) + ch_genome_star_index = STAR_GENOMEGENERATE.out.index + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) - DECOMP_GRIDSS_INDEX(ch_genome_gridss_index_inputs) - ch_genome_gridss_index = DECOMP_GRIDSS_INDEX.out.dir + } else if (params.ref_data_genome_star_index.endsWith('.tar.gz')) { - } else { + ch_genome_star_index_inputs = Channel.fromPath(params.ref_data_genome_star_index) + .map { [[id: "star_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } - ch_genome_gridss_index = getRefFileChannel('ref_data_genome_gridss_index') + DECOMP_STAR_INDEX(ch_genome_star_index_inputs) + ch_genome_star_index = DECOMP_STAR_INDEX.out.dir - } - } + } else { - // - // Set STAR index path, unpack or create if required - // - ch_genome_star_index = Channel.empty() - if (run_config.has_rna_fastq && run_config.stages.alignment) { - if (!params.ref_data_genome_star_index) { + ch_genome_star_index = getRefFileChannel('ref_data_genome_star_index') - STAR_GENOMEGENERATE( - ch_genome_fasta, - file(params.ref_data_genome_gtf), - ) - ch_genome_star_index = STAR_GENOMEGENERATE.out.index - ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + } + } - } else if (params.ref_data_genome_star_index.endsWith('.tar.gz')) { + // + // Set VIRUSBreakend database path, unpack if required + // + ch_virusbreakenddb = Channel.empty() + if (run_config.has_dna && run_virusinterpreter) { + if (params.ref_data_virusbreakenddb_path.endsWith('.tar.gz')) { - ch_genome_star_index_inputs = Channel.fromPath(params.ref_data_genome_star_index) - .map { [[id: "star_index_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + ch_virusbreakenddb_inputs = Channel.fromPath(params.ref_data_virusbreakenddb_path) + .map { [[id: it.name.replaceAll('\\.tar\\.gz$', '')], it] } - DECOMP_STAR_INDEX(ch_genome_star_index_inputs) - ch_genome_star_index = DECOMP_STAR_INDEX.out.dir + DECOMP_VIRUSBREAKEND_DB(ch_virusbreakenddb_inputs) + ch_virusbreakenddb = DECOMP_VIRUSBREAKEND_DB.out.dir - } else { + } else { - ch_genome_star_index = getRefFileChannel('ref_data_genome_star_index') + ch_virusbreakenddb = Channel.fromPath(params.ref_data_virusbreakenddb_path) - } } + } + + // + // Set HMF reference paths, unpack if required + // + ch_hmf_data = Channel.empty() + hmf_data_paths = params.hmf_data_paths[params.genome_version.toString()] + if (params.ref_data_hmf_data_path.endsWith('tar.gz')) { + + ch_hmf_data_inputs = Channel.fromPath(params.ref_data_hmf_data_path) + .map { [[id: "hmf_data_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + + DECOMP_HMF_DATA(ch_hmf_data_inputs) + + ch_hmf_data = DECOMP_HMF_DATA.out.dir + .collect() + .map { dir_list -> + assert dir_list.size() == 1 + def dirpath = dir_list[0].toUriString() + return createDataMap(hmf_data_paths, dirpath) + } - // - // Set VIRUSBreakend database path, unpack if required - // - ch_virusbreakenddb = Channel.empty() - if (run_config.has_dna && run_virusinterpreter) { - if (params.ref_data_virusbreakenddb_path.endsWith('.tar.gz')) { + } else { - ch_virusbreakenddb_inputs = Channel.fromPath(params.ref_data_virusbreakenddb_path) - .map { [[id: it.name.replaceAll('\\.tar\\.gz$', '')], it] } + ch_hmf_data = Channel.value(createDataMap(hmf_data_paths, params.ref_data_hmf_data_path)) - DECOMP_VIRUSBREAKEND_DB(ch_virusbreakenddb_inputs) - ch_virusbreakenddb = DECOMP_VIRUSBREAKEND_DB.out.dir + } - } else { + // + // Set panel reference paths, unpack if required + // + ch_panel_data = Channel.empty() + if (run_config.mode === Constants.RunMode.TARGETED) { - ch_virusbreakenddb = Channel.fromPath(params.ref_data_virusbreakenddb_path) + panel_data_paths_versions = params.panel_data_paths[params.panel] + panel_data_paths = panel_data_paths_versions[params.genome_version.toString()] - } - } + if (params.ref_data_panel_data_path.endsWith('tar.gz')) { - // - // Set HMF reference paths, unpack if required - // - ch_hmf_data = Channel.empty() - hmf_data_paths = params.hmf_data_paths[params.genome_version.toString()] - if (params.ref_data_hmf_data_path.endsWith('tar.gz')) { + ch_panel_data_inputs = Channel.fromPath(params.ref_data_panel_data_path) + .map { [[id: "panel_data_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } - ch_hmf_data_inputs = Channel.fromPath(params.ref_data_hmf_data_path) - .map { [[id: "hmf_data_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + DECOMP_PANEL_DATA(ch_panel_data_inputs) - DECOMP_HMF_DATA(ch_hmf_data_inputs) - - ch_hmf_data = DECOMP_HMF_DATA.out.dir + ch_panel_data = DECOMP_PANEL_DATA.out.dir .collect() .map { dir_list -> assert dir_list.size() == 1 def dirpath = dir_list[0].toUriString() - return createDataMap(hmf_data_paths, dirpath) + return createDataMap(panel_data_paths, dirpath) } } else { - ch_hmf_data = Channel.value(createDataMap(hmf_data_paths, params.ref_data_hmf_data_path)) - - } - - // - // Set panel reference paths, unpack if required - // - ch_panel_data = Channel.empty() - if (run_config.mode === Constants.RunMode.TARGETED) { - - panel_data_paths_versions = params.panel_data_paths[params.panel] - panel_data_paths = panel_data_paths_versions[params.genome_version.toString()] - - if (params.ref_data_panel_data_path.endsWith('tar.gz')) { + ch_panel_data = Channel.value(createDataMap(panel_data_paths, params.ref_data_panel_data_path)) - ch_panel_data_inputs = Channel.fromPath(params.ref_data_panel_data_path) - .map { [[id: "panel_data_${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } - - DECOMP_PANEL_DATA(ch_panel_data_inputs) - - ch_panel_data = DECOMP_PANEL_DATA.out.dir - .collect() - .map { dir_list -> - assert dir_list.size() == 1 - def dirpath = dir_list[0].toUriString() - return createDataMap(panel_data_paths, dirpath) - } - - } else { - - ch_panel_data = Channel.value(createDataMap(panel_data_paths, params.ref_data_panel_data_path)) - - } } - - if (params.prepare_reference_only) { - - // Create channel of data files to stage (if not already local) and write - ch_refdata = Channel.empty() - .mix( - ch_genome_fasta, - ch_genome_fai, - ch_genome_dict, - ch_genome_bwa_index, - ch_genome_gridss_index, - ch_genome_star_index, - ch_virusbreakenddb, - // Also include base paths for hmf_data and panel_data - Channel.empty() - .mix( - ch_hmf_data, - ch_panel_data, - ) - .map { getDataBaseDirectory(it) } - ) - - WRITE_REFERENCE_DATA( - ch_refdata, - workflow.manifest.version, + } + + if (params.prepare_reference_only) { + + // Create channel of data files to stage (if not already local) and write + ch_refdata = Channel.empty() + .mix( + ch_genome_fasta, + ch_genome_fai, + ch_genome_dict, + ch_genome_bwa_index, + ch_genome_gridss_index, + ch_genome_star_index, + ch_virusbreakenddb, + // Also include base paths for hmf_data and panel_data + Channel.empty() + .mix( + ch_hmf_data, + ch_panel_data, + ) + .map { getDataBaseDirectory(it) } ) - // Clear all stages to prevent running any analysis - run_config.stages = [:] - } + WRITE_REFERENCE_DATA( + ch_refdata, + workflow.manifest.version, + ) + + // Clear all stages to prevent running any analysis + run_config.stages = [:] + } emit: - genome_fasta = ch_genome_fasta.first() // path: genome_fasta - genome_fai = ch_genome_fai.first() // path: genome_fai - genome_dict = ch_genome_dict.first() // path: genome_dict - genome_bwa_index = ch_genome_bwa_index.first() // path: genome_bwa_index - genome_gridss_index = ch_genome_gridss_index.first() // path: genome_gridss_index - genome_star_index = ch_genome_star_index.first() // path: genome_star_index - genome_version = ch_genome_version // val: genome_version - - virusbreakenddb = ch_virusbreakenddb.first() // path: VIRUSBreakend database - hmf_data = ch_hmf_data // map: HMF data paths - panel_data = ch_panel_data // map: Panel data paths - - versions = ch_versions // channel: [ versions.yml ] + genome_fasta = ch_genome_fasta.first() // path: genome_fasta + genome_fai = ch_genome_fai.first() // path: genome_fai + genome_dict = ch_genome_dict.first() // path: genome_dict + genome_bwa_index = ch_genome_bwa_index.first() // path: genome_bwa_index + genome_gridss_index = ch_genome_gridss_index.first() // path: genome_gridss_index + genome_star_index = ch_genome_star_index.first() // path: genome_star_index + genome_version = ch_genome_version // val: genome_version + + virusbreakenddb = ch_virusbreakenddb.first() // path: VIRUSBreakend database + hmf_data = ch_hmf_data // map: HMF data paths + panel_data = ch_panel_data // map: Panel data paths + + versions = ch_versions // channel: [ versions.yml ] } def getRefFileChannel(key) { diff --git a/subworkflows/local/purple_calling/main.nf b/subworkflows/local/purple_calling/main.nf index 229e2fbb..48142ae9 100644 --- a/subworkflows/local/purple_calling/main.nf +++ b/subworkflows/local/purple_calling/main.nf @@ -9,137 +9,137 @@ include { PURPLE } from '../../../modules/local/purple/main' workflow PURPLE_CALLING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_amber // channel: [mandatory] [ meta, amber_dir ] - ch_cobalt // channel: [mandatory] [ meta, cobalt_dir ] - ch_smlv_somatic // channel: [mandatory] [ meta, pave_vcf ] - ch_smlv_germline // channel: [mandatory] [ meta, pave_vcf ] - ch_sv_somatic // channel: [mandatory] [ meta, gripss_vcf, gripss_tbi ] - ch_sv_germline // channel: [mandatory] [ meta, gripss_vcf, gripss_tbi ] - ch_sv_somatic_unfiltered // channel: [mandatory] [ meta, gripss_vcf, gripss_tbi ] - - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - genome_dict // channel: [mandatory] /path/to/genome_dict - gc_profile // channel: [mandatory] /path/to/gc_profile - sage_known_hotspots_somatic // channel: [mandatory] /path/to/sage_known_hotspots_somatic - sage_known_hotspots_germline // channel: [optional] /path/to/sage_known_hotspots_germline - driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel - ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ - purple_germline_del // channel: [optional] /path/to/purple_germline_del - target_region_bed // channel: [optional] /path/to/target_region_bed - target_region_ratios // channel: [optional] /path/to/target_region_ratios - target_region_msi_indels // channel: [optional] /path/to/target_region_msi_indels + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_amber // channel: [mandatory] [ meta, amber_dir ] + ch_cobalt // channel: [mandatory] [ meta, cobalt_dir ] + ch_smlv_somatic // channel: [mandatory] [ meta, pave_vcf ] + ch_smlv_germline // channel: [mandatory] [ meta, pave_vcf ] + ch_sv_somatic // channel: [mandatory] [ meta, gripss_vcf, gripss_tbi ] + ch_sv_germline // channel: [mandatory] [ meta, gripss_vcf, gripss_tbi ] + ch_sv_somatic_unfiltered // channel: [mandatory] [ meta, gripss_vcf, gripss_tbi ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + gc_profile // channel: [mandatory] /path/to/gc_profile + sage_known_hotspots_somatic // channel: [mandatory] /path/to/sage_known_hotspots_somatic + sage_known_hotspots_germline // channel: [optional] /path/to/sage_known_hotspots_germline + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + purple_germline_del // channel: [optional] /path/to/purple_germline_del + target_region_bed // channel: [optional] /path/to/target_region_bed + target_region_ratios // channel: [optional] /path/to/target_region_ratios + target_region_msi_indels // channel: [optional] /path/to/target_region_msi_indels main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources - // channel: [ meta, amber_dir, cobalt_dir, sv_somatic_vcf, sv_somatic_tbi, sv_somatic_unfiltered_vcf, sv_somatic_unfiltered_tbi, sv_germline_vcf, sv_germline_tbi, smlv_somatic_vcf, smlv_germline_vcf ] - ch_inputs_selected = WorkflowOncoanalyser.groupByMeta( - ch_amber, - ch_cobalt, - ch_sv_somatic, - ch_sv_somatic_unfiltered, - ch_sv_germline, - ch_smlv_somatic, - ch_smlv_germline, - ) - .map { d -> - - def meta = d[0] - - // NOTE(SW): avoiding further complexity with loops etc - - def inputs = [ - Utils.selectCurrentOrExisting(d[1], meta, Constants.INPUT.AMBER_DIR), - Utils.selectCurrentOrExisting(d[2], meta, Constants.INPUT.COBALT_DIR), - Utils.selectCurrentOrExisting(d[3], meta, Constants.INPUT.GRIPSS_VCF_TUMOR), - Utils.selectCurrentOrExisting(d[4], meta, Constants.INPUT.GRIPSS_VCF_TUMOR_TBI), - Utils.selectCurrentOrExisting(d[5], meta, Constants.INPUT.GRIPSS_UNFILTERED_VCF_TUMOR), - Utils.selectCurrentOrExisting(d[6], meta, Constants.INPUT.GRIPSS_UNFILTERED_VCF_TUMOR_TBI), - Utils.selectCurrentOrExisting(d[7], meta, Constants.INPUT.GRIPSS_VCF_NORMAL), - Utils.selectCurrentOrExisting(d[8], meta, Constants.INPUT.GRIPSS_VCF_NORMAL_TBI), - Utils.selectCurrentOrExisting(d[9], meta, Constants.INPUT.PAVE_VCF_TUMOR), - Utils.selectCurrentOrExisting(d[10], meta, Constants.INPUT.PAVE_VCF_NORMAL), - ] - - return [meta, *inputs] - } - - // Sort inputs - // channel: runnable: [ meta, amber_dir, cobalt_dir, sv_somatic_vcf, sv_somatic_tbi, sv_somatic_unfiltered_vcf, sv_somatic_unfiltered_tbi, sv_germline_vcf, sv_germline_tbi, smlv_somatic_vcf, smlv_germline_vcf ] - // channel: skip: [ meta ] - ch_inputs_sorted = ch_inputs_selected - .branch { d -> - def meta = d[0] - def amber_dir = d[1] - def cobalt_dir = d[2] - - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PURPLE_DIR) - - runnable: amber_dir && cobalt_dir && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_purple, amber_dir, cobalt_dir, sv_somatic_vcf, sv_somatic_tbi, sv_somatic_unfiltered_vcf, sv_somatic_unfiltered_tbi, sv_germline_vcf, sv_germline_tbi, smlv_somatic_vcf, smlv_germline_vcf ] - ch_purple_inputs = ch_inputs_sorted.runnable - .map { d -> - - def meta = d[0] - def inputs = d[1..-1] - - def meta_purple = [ - key: meta.group_id, - id: meta.group_id, - tumor_id: Utils.getTumorDnaSampleName(meta), - ] - - if (Utils.hasNormalDna(meta)) { - meta_purple.normal_id = Utils.getNormalDnaSampleName(meta) - } - - return [meta_purple, *inputs] - + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources + // channel: [ meta, amber_dir, cobalt_dir, sv_somatic_vcf, sv_somatic_tbi, sv_somatic_unfiltered_vcf, sv_somatic_unfiltered_tbi, sv_germline_vcf, sv_germline_tbi, smlv_somatic_vcf, smlv_germline_vcf ] + ch_inputs_selected = WorkflowOncoanalyser.groupByMeta( + ch_amber, + ch_cobalt, + ch_sv_somatic, + ch_sv_somatic_unfiltered, + ch_sv_germline, + ch_smlv_somatic, + ch_smlv_germline, + ) + .map { d -> + + def meta = d[0] + + // NOTE(SW): avoiding further complexity with loops etc + + def inputs = [ + Utils.selectCurrentOrExisting(d[1], meta, Constants.INPUT.AMBER_DIR), + Utils.selectCurrentOrExisting(d[2], meta, Constants.INPUT.COBALT_DIR), + Utils.selectCurrentOrExisting(d[3], meta, Constants.INPUT.GRIPSS_VCF_TUMOR), + Utils.selectCurrentOrExisting(d[4], meta, Constants.INPUT.GRIPSS_VCF_TUMOR_TBI), + Utils.selectCurrentOrExisting(d[5], meta, Constants.INPUT.GRIPSS_UNFILTERED_VCF_TUMOR), + Utils.selectCurrentOrExisting(d[6], meta, Constants.INPUT.GRIPSS_UNFILTERED_VCF_TUMOR_TBI), + Utils.selectCurrentOrExisting(d[7], meta, Constants.INPUT.GRIPSS_VCF_NORMAL), + Utils.selectCurrentOrExisting(d[8], meta, Constants.INPUT.GRIPSS_VCF_NORMAL_TBI), + Utils.selectCurrentOrExisting(d[9], meta, Constants.INPUT.PAVE_VCF_TUMOR), + Utils.selectCurrentOrExisting(d[10], meta, Constants.INPUT.PAVE_VCF_NORMAL), + ] + + return [meta, *inputs] + } + + // Sort inputs + // channel: runnable: [ meta, amber_dir, cobalt_dir, sv_somatic_vcf, sv_somatic_tbi, sv_somatic_unfiltered_vcf, sv_somatic_unfiltered_tbi, sv_germline_vcf, sv_germline_tbi, smlv_somatic_vcf, smlv_germline_vcf ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { d -> + def meta = d[0] + def amber_dir = d[1] + def cobalt_dir = d[2] + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.PURPLE_DIR) + + runnable: amber_dir && cobalt_dir && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_purple, amber_dir, cobalt_dir, sv_somatic_vcf, sv_somatic_tbi, sv_somatic_unfiltered_vcf, sv_somatic_unfiltered_tbi, sv_germline_vcf, sv_germline_tbi, smlv_somatic_vcf, smlv_germline_vcf ] + ch_purple_inputs = ch_inputs_sorted.runnable + .map { d -> + + def meta = d[0] + def inputs = d[1..-1] + + def meta_purple = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] + + if (Utils.hasNormalDna(meta)) { + meta_purple.normal_id = Utils.getNormalDnaSampleName(meta) } - // Run process - PURPLE( - ch_purple_inputs, - genome_fasta, - genome_version, - genome_fai, - genome_dict, - gc_profile, - sage_known_hotspots_somatic, - sage_known_hotspots_germline, - driver_gene_panel, - ensembl_data_resources, - purple_germline_del, - target_region_bed, - target_region_ratios, - target_region_msi_indels, + return [meta_purple, *inputs] + + } + + // Run process + PURPLE( + ch_purple_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + gc_profile, + sage_known_hotspots_somatic, + sage_known_hotspots_germline, + driver_gene_panel, + ensembl_data_resources, + purple_germline_del, + target_region_bed, + target_region_ratios, + target_region_msi_indels, + ) + + ch_versions = ch_versions.mix(PURPLE.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, purple_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(PURPLE.out.purple_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(PURPLE.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, purple_dir ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(PURPLE.out.purple_dir, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - emit: - purple_dir = ch_outputs // channel: [ meta, purple_dir ] + purple_dir = ch_outputs // channel: [ meta, purple_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/read_alignment_dna/main.nf b/subworkflows/local/read_alignment_dna/main.nf index 0ec6fb04..6632f921 100644 --- a/subworkflows/local/read_alignment_dna/main.nf +++ b/subworkflows/local/read_alignment_dna/main.nf @@ -10,208 +10,209 @@ include { FASTP } from '../../../modules/local/fastp/main' workflow READ_ALIGNMENT_DNA { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_bwa_index // channel: [mandatory] /path/to/genome_bwa_index_dir/ + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_bwa_index // channel: [mandatory] /path/to/genome_bwa_index_dir/ - // Params - max_fastq_records // numeric: [mandatory] max number of FASTQ records per split + // Params + max_fastq_records // numeric: [mandatory] max number of FASTQ records per split main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Sort inputs, separate by tumor and normal - // channel: [ meta ] - ch_inputs_tumor_sorted = ch_inputs - .branch { meta -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_TUMOR) - runnable: Utils.hasTumorDnaFastq(meta) && !has_existing - skip: true - } + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs, separate by tumor and normal + // channel: [ meta ] + ch_inputs_tumor_sorted = ch_inputs + .branch { meta -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_TUMOR) + runnable: Utils.hasTumorDnaFastq(meta) && !has_existing + skip: true + } - ch_inputs_normal_sorted = ch_inputs - .branch { meta -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_NORMAL) - runnable: Utils.hasNormalDnaFastq(meta) && !has_existing - skip: true - } + ch_inputs_normal_sorted = ch_inputs + .branch { meta -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_NORMAL) + runnable: Utils.hasNormalDnaFastq(meta) && !has_existing + skip: true + } - // Create FASTQ input channel - // channel: [ meta_fastq, fastq_fwd, fastq_rev ] - ch_fastq_inputs = Channel.empty() - .mix( - ch_inputs_tumor_sorted.runnable.map { meta -> [meta, Utils.getTumorDnaSample(meta), 'tumor'] }, - ch_inputs_normal_sorted.runnable.map { meta -> [meta, Utils.getNormalDnaSample(meta), 'normal'] }, - ) - .flatMap { meta, meta_sample, sample_type -> - meta_sample - .getAt(Constants.FileType.FASTQ) - .collect { key, fps -> - def (library_id, lane) = key - - def meta_fastq = [ - key: meta.group_id, - id: "${meta.group_id}_${meta_sample.sample_id}", - sample_id: meta_sample.sample_id, - library_id: library_id, - lane: lane, - sample_type: sample_type, - ] + // Create FASTQ input channel + // channel: [ meta_fastq, fastq_fwd, fastq_rev ] + ch_fastq_inputs = Channel.empty() + .mix( + ch_inputs_tumor_sorted.runnable.map { meta -> [meta, Utils.getTumorDnaSample(meta), 'tumor'] }, + ch_inputs_normal_sorted.runnable.map { meta -> [meta, Utils.getNormalDnaSample(meta), 'normal'] }, + ) + .flatMap { meta, meta_sample, sample_type -> + meta_sample + .getAt(Constants.FileType.FASTQ) + .collect { key, fps -> + def (library_id, lane) = key + + def meta_fastq = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + library_id: library_id, + lane: lane, + sample_type: sample_type, + ] - return [meta_fastq, fps['fwd'], fps['rev']] - } - } + return [meta_fastq, fps['fwd'], fps['rev']] + } + } - // - // MODULE: fastp - // - // Split FASTQ into chunks if requested for distributed processing - // channel: [ meta_fastq_ready, fastq_fwd, fastq_fwd ] - ch_fastqs_ready = Channel.empty() - if (max_fastq_records > 0) { + // + // MODULE: fastp + // + // Split FASTQ into chunks if requested for distributed processing + // channel: [ meta_fastq_ready, fastq_fwd, fastq_fwd ] + ch_fastqs_ready = Channel.empty() + if (max_fastq_records > 0) { - // Run process - FASTP( - ch_fastq_inputs, - max_fastq_records, - ) + // Run process + FASTP( + ch_fastq_inputs, + max_fastq_records, + ) - ch_versions = ch_versions.mix(FASTP.out.versions) + ch_versions = ch_versions.mix(FASTP.out.versions) - // Prepare outputs within conditional block - ch_fastqs_ready = FASTP.out.fastq - .flatMap { meta_fastq, reads_fwd, reads_rev -> + // Prepare outputs within conditional block + ch_fastqs_ready = FASTP.out.fastq + .flatMap { meta_fastq, reads_fwd, reads_rev -> - def data = [reads_fwd, reads_rev] - .transpose() - .collect { fwd, rev -> + def data = [reads_fwd, reads_rev] + .transpose() + .collect { fwd, rev -> - def split_fwd = fwd.name.replaceAll('\\..+$', '') - def split_rev = rev.name.replaceAll('\\..+$', '') + def split_fwd = fwd.name.replaceAll('\\..+$', '') + def split_rev = rev.name.replaceAll('\\..+$', '') - assert split_fwd == split_rev + assert split_fwd == split_rev - // NOTE(SW): split allows meta_fastq_ready to be unique, which is required during reunite below - def meta_fastq_ready = [ - *:meta_fastq, - id: "${meta_fastq.id}_${split_fwd}", - split: split_fwd, - ] + // NOTE(SW): split allows meta_fastq_ready to be unique, which is required during reunite below + def meta_fastq_ready = [ + *:meta_fastq, + id: "${meta_fastq.id}_${split_fwd}", + split: split_fwd, + ] - return [meta_fastq_ready, fwd, rev] - } + return [meta_fastq_ready, fwd, rev] + } - return data - } + return data + } - } else { + } else { - ch_fastqs_ready = ch_fastq_inputs - .map { meta_fastq, fastq_fwd, fastq_rev -> + ch_fastqs_ready = ch_fastq_inputs + .map { meta_fastq, fastq_fwd, fastq_rev -> - def meta_fastq_ready = [ - *:meta_fastq, - split: null, - ] + def meta_fastq_ready = [ + *:meta_fastq, + split: null, + ] - return [meta_fastq_ready, fastq_fwd, fastq_rev] - } + return [meta_fastq_ready, fastq_fwd, fastq_rev] + } - } + } - // - // MODULE: BWA-MEM2 - // - // Create process input channel - // channel: [ meta_bwa, fastq_fwd, fastq_rev ] - ch_bwa_inputs = ch_fastqs_ready - .map { meta_fastq_ready, fastq_fwd, fastq_rev -> + // + // MODULE: BWA-MEM2 + // + // Create process input channel + // channel: [ meta_bwa, fastq_fwd, fastq_rev ] + ch_bwa_inputs = ch_fastqs_ready + .map { meta_fastq_ready, fastq_fwd, fastq_rev -> - def meta_bwa = [ - *:meta_fastq_ready, + def meta_bwa = [ + *:meta_fastq_ready, - // TODO(SW): understand target format - read_group: "${meta_fastq_ready.sample_id}.${meta_fastq_ready.library_id}.${meta_fastq_ready.lane}", + // TODO(SW): understand target format + read_group: "${meta_fastq_ready.sample_id}.${meta_fastq_ready.library_id}.${meta_fastq_ready.lane}", - ] + ] - return [meta_bwa, fastq_fwd, fastq_rev] - } + return [meta_bwa, fastq_fwd, fastq_rev] + } - // Run process - BWAMEM2_ALIGN( - ch_bwa_inputs, - genome_fasta, - genome_bwa_index, - ) + // Run process + BWAMEM2_ALIGN( + ch_bwa_inputs, + genome_fasta, + genome_bwa_index, + ) - ch_versions = ch_versions.mix(BWAMEM2_ALIGN.out.versions) + ch_versions = ch_versions.mix(BWAMEM2_ALIGN.out.versions) - // Reunite BAMs - // First, count expected BAMs per sample for non-blocking groupTuple op - // channel: [ meta_count, group_size ] - ch_sample_fastq_counts = ch_bwa_inputs - .map { meta_bwa, reads_fwd, reads_rev -> + // Reunite BAMs + // First, count expected BAMs per sample for non-blocking groupTuple op + // channel: [ meta_count, group_size ] + ch_sample_fastq_counts = ch_bwa_inputs + .map { meta_bwa, reads_fwd, reads_rev -> - def meta_count = [ - key: meta_bwa.key, - sample_type: meta_bwa.sample_type, - ] + def meta_count = [ + key: meta_bwa.key, + sample_type: meta_bwa.sample_type, + ] - return [meta_count, meta_bwa] - } - .groupTuple() - .map { meta_count, meta_bwas -> return [meta_count, meta_bwas.size()] } - - // Now, group with expected size then sort into tumor and normal channels - // channel: [ meta_group, [bam, ...], [bai, ...] ] - ch_bams_united = ch_sample_fastq_counts - .cross( - // First element to match meta_count above for `cross` - BWAMEM2_ALIGN.out.bam.map { meta_bwa, bam, bai -> [[key: meta_bwa.key, sample_type: meta_bwa.sample_type], bam, bai] } - ) - .map { count_tuple, bam_tuple -> - - def group_size = count_tuple[1] - def (meta_bam, bam, bai) = bam_tuple - - def meta_group = [ - *:meta_bam, - ] + return [meta_count, meta_bwa] + } + .groupTuple() + .map { meta_count, meta_bwas -> return [meta_count, meta_bwas.size()] } + + // Now, group with expected size then sort into tumor and normal channels + // channel: [ meta_group, [bam, ...], [bai, ...] ] + ch_bams_united = ch_sample_fastq_counts + .cross( + // First element to match meta_count above for `cross` + BWAMEM2_ALIGN.out.bam.map { meta_bwa, bam, bai -> [[key: meta_bwa.key, sample_type: meta_bwa.sample_type], bam, bai] } + ) + .map { count_tuple, bam_tuple -> - return tuple(groupKey(meta_group, group_size), bam, bai) - } - .groupTuple() - .branch { meta_group, bams, bais -> - assert ['tumor', 'normal'].contains(meta_group.sample_type) - tumor: meta_group.sample_type == 'tumor' - normal: meta_group.sample_type == 'normal' - placeholder: true - } + def group_size = count_tuple[1] + def (meta_bam, bam, bai) = bam_tuple + + def meta_group = [ + *:meta_bam, + ] + + return tuple(groupKey(meta_group, group_size), bam, bai) + } + .groupTuple() + .branch { meta_group, bams, bais -> + assert ['tumor', 'normal'].contains(meta_group.sample_type) + tumor: meta_group.sample_type == 'tumor' + normal: meta_group.sample_type == 'normal' + placeholder: true + } - // Set outputs, restoring original meta - // channel: [ meta, [bam, ...], [bai, ...] ] - ch_bam_tumor_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ch_bams_united.tumor, ch_inputs), - ch_inputs_tumor_sorted.skip.map { meta -> [meta, [], []] }, - ) + // Set outputs, restoring original meta + // channel: [ meta, [bam, ...], [bai, ...] ] + ch_bam_tumor_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_bams_united.tumor, ch_inputs), + ch_inputs_tumor_sorted.skip.map { meta -> [meta, [], []] }, + ) - ch_bam_normal_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ch_bams_united.normal, ch_inputs), - ch_inputs_normal_sorted.skip.map { meta -> [meta, [], []] }, - ) + ch_bam_normal_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_bams_united.normal, ch_inputs), + ch_inputs_normal_sorted.skip.map { meta -> [meta, [], []] }, + ) emit: - dna_tumor = ch_bam_tumor_out // channel: [ meta, [bam, ...], [bai, ...] ] - dna_normal = ch_bam_normal_out // channel: [ meta, [bam, ...], [bai, ...] ] - versions = ch_versions // channel: [ versions.yml ] + dna_tumor = ch_bam_tumor_out // channel: [ meta, [bam, ...], [bai, ...] ] + dna_normal = ch_bam_normal_out // channel: [ meta, [bam, ...], [bai, ...] ] + + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/read_alignment_rna/main.nf b/subworkflows/local/read_alignment_rna/main.nf index ca795e58..d6e920c9 100644 --- a/subworkflows/local/read_alignment_rna/main.nf +++ b/subworkflows/local/read_alignment_rna/main.nf @@ -8,209 +8,210 @@ import Utils include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main' include { SAMBAMBA_MERGE } from '../../../modules/local/sambamba/merge/main' include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' -include { STAR } from '../../../modules/local/star/main' +include { STAR_ALIGN } from '../../../modules/local/star/align/main' workflow READ_ALIGNMENT_RNA { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] - // Reference data - genome_star_index // channel: [mandatory] /path/to/genome_star_index/ + // Reference data + genome_star_index // channel: [mandatory] /path/to/genome_star_index/ main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Sort inputs - // channel: [ meta ] - ch_inputs_sorted = ch_inputs - .branch { meta -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_RNA_TUMOR) - runnable: Utils.hasTumorRnaFastq(meta) && !has_existing - skip: true - } - - // Create FASTQ input channel - // channel: [ meta_fastq, fastq_fwd, fastq_rev ] - ch_fastq_inputs = ch_inputs_sorted.runnable - .flatMap { meta -> - def meta_sample = Utils.getTumorRnaSample(meta) - meta_sample - .getAt(Constants.FileType.FASTQ) - .collect { key, fps -> - def (library_id, lane) = key - - def meta_fastq = [ - key: meta.group_id, - id: "${meta.group_id}_${meta_sample.sample_id}", - sample_id: meta_sample.sample_id, - library_id: library_id, - lane: lane, - ] - - return [meta_fastq, fps['fwd'], fps['rev']] - } - } - - // - // MODULE: STAR - // - // Create process input channel - // channel: [ meta_star, fastq_fwd, fastq_rev ] - ch_star_inputs = ch_fastq_inputs - .map { meta_fastq, fastq_fwd, fastq_rev -> - def meta_star = [ - *:meta_fastq, - - - // TODO(SW): understand target format - read_group: "${meta_fastq.sample_id}.${meta_fastq.library_id}.${meta_fastq.lane}", - - - ] - - return [meta_star, fastq_fwd, fastq_rev] - } - - // Run process - STAR( - ch_star_inputs, - genome_star_index, + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs + // channel: [ meta ] + ch_inputs_sorted = ch_inputs + .branch { meta -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_RNA_TUMOR) + runnable: Utils.hasTumorRnaFastq(meta) && !has_existing + skip: true + } + + // Create FASTQ input channel + // channel: [ meta_fastq, fastq_fwd, fastq_rev ] + ch_fastq_inputs = ch_inputs_sorted.runnable + .flatMap { meta -> + def meta_sample = Utils.getTumorRnaSample(meta) + meta_sample + .getAt(Constants.FileType.FASTQ) + .collect { key, fps -> + def (library_id, lane) = key + + def meta_fastq = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + library_id: library_id, + lane: lane, + ] + + return [meta_fastq, fps['fwd'], fps['rev']] + } + } + + // + // MODULE: STAR alignment + // + // Create process input channel + // channel: [ meta_star, fastq_fwd, fastq_rev ] + ch_star_inputs = ch_fastq_inputs + .map { meta_fastq, fastq_fwd, fastq_rev -> + def meta_star = [ + *:meta_fastq, + + + // TODO(SW): understand target format + read_group: "${meta_fastq.sample_id}.${meta_fastq.library_id}.${meta_fastq.lane}", + + + ] + + return [meta_star, fastq_fwd, fastq_rev] + } + + // Run process + STAR_ALIGN( + ch_star_inputs, + genome_star_index, + ) + + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions) + + // + // MODULE: SAMtools sort + // + // Create process input channel + // channel: [ meta_sort, bam ] + ch_sort_inputs = STAR_ALIGN.out.bam + .map { meta_star, bam -> + def meta_sort = [ + *:meta_star, + prefix: meta_star.read_group, + ] + + return [meta_sort, bam] + } + + // Run process + SAMTOOLS_SORT( + ch_sort_inputs, + ) + + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + + // + // MODULE: Sambamba merge + // + // Reunite BAMs + // First, count expected BAMs per sample for non-blocking groupTuple op + // channel: [ meta_count, group_size ] + ch_sample_fastq_counts = ch_star_inputs + .map { meta_star, reads_fwd, reads_rev -> + def meta_count = [key: meta_star.key] + return [meta_count, meta_star] + } + .groupTuple() + .map { meta_count, meta_stars -> return [meta_count, meta_stars.size()] } + + // Now, group with expected size then sort into tumor and normal channels + // channel: [ meta_group, [bam, ...] ] + ch_bams_united = ch_sample_fastq_counts + .cross( + // First element to match meta_count above for `cross` + SAMTOOLS_SORT.out.bam.map { meta_star, bam -> [[key: meta_star.key], bam] } ) - - ch_versions = ch_versions.mix(STAR.out.versions) - - // - // MODULE: SAMtools sort - // - // Create process input channel - // channel: [ meta_sort, bam ] - ch_sort_inputs = STAR.out.bam - .map { meta_star, bam -> - def meta_sort = [ - *:meta_star, - prefix: meta_star.read_group, - ] - - return [meta_sort, bam] - } - - // Run process - SAMTOOLS_SORT( - ch_sort_inputs, + .map { count_tuple, bam_tuple -> + + def group_size = count_tuple[1] + def (meta_bam, bam) = bam_tuple + + def meta_group = [ + *:meta_bam, + ] + + return tuple(groupKey(meta_group, group_size), bam) + } + .groupTuple() + + // Sort into merge-eligible BAMs (at least two BAMs required) + // channel: runnable: [ meta_group, [bam, ...] ] + // channel: skip: [ meta_group, bam ] + ch_bams_united_sorted = ch_bams_united + .branch { meta_group, bams -> + runnable: bams.size() > 1 + skip: true + return [meta_group, bams[0]] + } + + // Create process input channel + // channel: [ meta_merge, [bams, ...] ] + ch_merge_inputs = WorkflowOncoanalyser.restoreMeta(ch_bams_united_sorted.runnable, ch_inputs) + .map { meta, bams -> + def meta_merge = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorRnaSampleName(meta), + ] + return [meta_merge, bams] + } + + // Run process + SAMBAMBA_MERGE( + ch_merge_inputs, + ) + + ch_versions = ch_versions.mix(SAMBAMBA_MERGE.out.versions) + + // + // MODULE: GATK4 markduplicates + // + // Create process input channel + // channel: [ meta_markdups, bam ] + ch_markdups_inputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SAMBAMBA_MERGE.out.bam, ch_inputs), + WorkflowOncoanalyser.restoreMeta(ch_bams_united_sorted.skip, ch_inputs), ) - - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) - - // - // MODULE: Sambamba merge - // - // Reunite BAMs - // First, count expected BAMs per sample for non-blocking groupTuple op - // channel: [ meta_count, group_size ] - ch_sample_fastq_counts = ch_star_inputs - .map { meta_star, reads_fwd, reads_rev -> - def meta_count = [key: meta_star.key] - return [meta_count, meta_star] - } - .groupTuple() - .map { meta_count, meta_stars -> return [meta_count, meta_stars.size()] } - - // Now, group with expected size then sort into tumor and normal channels - // channel: [ meta_group, [bam, ...] ] - ch_bams_united = ch_sample_fastq_counts - .cross( - // First element to match meta_count above for `cross` - SAMTOOLS_SORT.out.bam.map { meta_star, bam -> [[key: meta_star.key], bam] } - ) - .map { count_tuple, bam_tuple -> - - def group_size = count_tuple[1] - def (meta_bam, bam) = bam_tuple - - def meta_group = [ - *:meta_bam, - ] - - return tuple(groupKey(meta_group, group_size), bam) - } - .groupTuple() - - // Sort into merge-eligible BAMs (at least two BAMs required) - // channel: runnable: [ meta_group, [bam, ...] ] - // channel: skip: [ meta_group, bam ] - ch_bams_united_sorted = ch_bams_united - .branch { meta_group, bams -> - runnable: bams.size() > 1 - skip: true - return [meta_group, bams[0]] - } - - // Create process input channel - // channel: [ meta_merge, [bams, ...] ] - ch_merge_inputs = WorkflowOncoanalyser.restoreMeta(ch_bams_united_sorted.runnable, ch_inputs) - .map { meta, bams -> - def meta_merge = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorRnaSampleName(meta), - ] - return [meta_merge, bams] - } - - // Run process - SAMBAMBA_MERGE( - ch_merge_inputs, + .map { meta, bam -> + def meta_markdups = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorRnaSampleName(meta), + ] + return [meta_markdups, bam] + } + + // Run process + GATK4_MARKDUPLICATES( + ch_markdups_inputs, + [], + [], + ) + + ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions) + + // Combine BAMs and BAIs + // channel: [ meta, bam, bai ] + ch_bams_ready = WorkflowOncoanalyser.groupByMeta( + WorkflowOncoanalyser.restoreMeta(GATK4_MARKDUPLICATES.out.bam, ch_inputs), + WorkflowOncoanalyser.restoreMeta(GATK4_MARKDUPLICATES.out.bai, ch_inputs), + ) + + // Set outputs + // channel: [ meta, bam, bai ] + ch_bam_out = Channel.empty() + .mix( + ch_bams_ready, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, ) - ch_versions = ch_versions.mix(SAMBAMBA_MERGE.out.versions) - - // - // MODULE: GATK4 markduplicates - // - // Create process input channel - // channel: [ meta_markdups, bam ] - ch_markdups_inputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(SAMBAMBA_MERGE.out.bam, ch_inputs), - WorkflowOncoanalyser.restoreMeta(ch_bams_united_sorted.skip, ch_inputs), - ) - .map { meta, bam -> - def meta_markdups = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorRnaSampleName(meta), - ] - return [meta_markdups, bam] - } - - // Run process - GATK4_MARKDUPLICATES( - ch_markdups_inputs, - [], - [], - ) - - ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions) - - // Combine BAMs and BAIs - // channel: [ meta, bam, bai ] - ch_bams_ready = WorkflowOncoanalyser.groupByMeta( - WorkflowOncoanalyser.restoreMeta(GATK4_MARKDUPLICATES.out.bam, ch_inputs), - WorkflowOncoanalyser.restoreMeta(GATK4_MARKDUPLICATES.out.bai, ch_inputs), - ) - - // Set outputs - // channel: [ meta, bam, bai ] - ch_bam_out = Channel.empty() - .mix( - ch_bams_ready, - ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, - ) - emit: - rna_tumor = ch_bam_out // channel: [ meta, bam, bai ] - versions = ch_versions // channel: [ versions.yml ] + rna_tumor = ch_bam_out // channel: [ meta, bam, bai ] + + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/read_processing/main.nf b/subworkflows/local/read_processing/main.nf index 2e67d213..4a0967ca 100644 --- a/subworkflows/local/read_processing/main.nf +++ b/subworkflows/local/read_processing/main.nf @@ -9,114 +9,115 @@ include { MARKDUPS } from '../../../modules/local/markdups/main' workflow READ_PROCESSING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_dna_tumor // channel: [mandatory] [ meta, [bam, ...], [bai, ...] ] - ch_dna_normal // channel: [mandatory] [ meta, [bam, ...], [bai, ...] ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_dna_tumor // channel: [mandatory] [ meta, [bam, ...], [bai, ...] ] + ch_dna_normal // channel: [mandatory] [ meta, [bam, ...], [bai, ...] ] - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_ver // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - genome_dict // channel: [mandatory] /path/to/genome_dict - unmap_regions // channel: [mandatory] /path/to/unmap_regions + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_ver // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + unmap_regions // channel: [mandatory] /path/to/unmap_regions - // Params - has_umis // boolean: [mandatory] UMI processing flag + // Params + has_umis // boolean: [mandatory] UMI processing flag main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select and sort input sources, separating bytumor and normal - // channel: runnable: [ meta, [bam, ...], [bai, ...] ] - // channel: skip: [ meta ] - ch_inputs_tumor_sorted = ch_dna_tumor - .map { meta, bams, bais -> - return [ - meta, - Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_TUMOR) ? [Utils.getInput(meta, Constants.INPUT.BAM_DNA_TUMOR)] : bams, - Utils.hasExistingInput(meta, Constants.INPUT.BAI_DNA_TUMOR) ? [Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR)] : bais, - ] - } - .branch { meta, bams, bais -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR) - runnable: bams && !has_existing - skip: true - return meta - } - - ch_inputs_normal_sorted = ch_dna_normal - .map { meta, bams, bais -> - return [ - meta, - Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_NORMAL) ? [Utils.getInput(meta, Constants.INPUT.BAM_DNA_NORMAL)] : bams, - Utils.hasExistingInput(meta, Constants.INPUT.BAI_DNA_NORMAL) ? [Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL)] : bais, - ] - } - .branch { meta, bams, bais -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL) - runnable: bams && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_markdups, [bam, ...], [bai, ...] ] - ch_markdups_inputs = Channel.empty() - .mix( - ch_inputs_tumor_sorted.runnable.map { meta, bams, bais -> [meta, Utils.getTumorDnaSample(meta), 'tumor', bams, bais] }, - ch_inputs_normal_sorted.runnable.map { meta, bams, bais -> [meta, Utils.getNormalDnaSample(meta), 'normal', bams, bais] }, - ) - .map { meta, meta_sample, sample_type, bams, bais -> - - def meta_markdups = [ - key: meta.group_id, - id: "${meta.group_id}_${meta_sample.sample_id}", - sample_id: meta_sample.sample_id, - sample_type: sample_type, - ] - - return [meta_markdups, bams, bais] - } - - // Run process - MARKDUPS( - ch_markdups_inputs, - genome_fasta, - genome_ver, - genome_fai, - genome_dict, - unmap_regions, - has_umis, + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select and sort input sources, separating bytumor and normal + // channel: runnable: [ meta, [bam, ...], [bai, ...] ] + // channel: skip: [ meta ] + ch_inputs_tumor_sorted = ch_dna_tumor + .map { meta, bams, bais -> + return [ + meta, + Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_TUMOR) ? [Utils.getInput(meta, Constants.INPUT.BAM_DNA_TUMOR)] : bams, + Utils.hasExistingInput(meta, Constants.INPUT.BAI_DNA_TUMOR) ? [Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR)] : bais, + ] + } + .branch { meta, bams, bais -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR) + runnable: bams && !has_existing + skip: true + return meta + } + + ch_inputs_normal_sorted = ch_dna_normal + .map { meta, bams, bais -> + return [ + meta, + Utils.hasExistingInput(meta, Constants.INPUT.BAM_DNA_NORMAL) ? [Utils.getInput(meta, Constants.INPUT.BAM_DNA_NORMAL)] : bams, + Utils.hasExistingInput(meta, Constants.INPUT.BAI_DNA_NORMAL) ? [Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL)] : bais, + ] + } + .branch { meta, bams, bais -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL) + runnable: bams && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_markdups, [bam, ...], [bai, ...] ] + ch_markdups_inputs = Channel.empty() + .mix( + ch_inputs_tumor_sorted.runnable.map { meta, bams, bais -> [meta, Utils.getTumorDnaSample(meta), 'tumor', bams, bais] }, + ch_inputs_normal_sorted.runnable.map { meta, bams, bais -> [meta, Utils.getNormalDnaSample(meta), 'normal', bams, bais] }, + ) + .map { meta, meta_sample, sample_type, bams, bais -> + + def meta_markdups = [ + key: meta.group_id, + id: "${meta.group_id}_${meta_sample.sample_id}", + sample_id: meta_sample.sample_id, + sample_type: sample_type, + ] + + return [meta_markdups, bams, bais] + } + + // Run process + MARKDUPS( + ch_markdups_inputs, + genome_fasta, + genome_ver, + genome_fai, + genome_dict, + unmap_regions, + has_umis, + ) + + // Sort into a tumor and normal channel + ch_markdups_out = MARKDUPS.out.bam + .branch { meta_markdups, bam, bai -> + assert ['tumor', 'normal'].contains(meta_markdups.sample_type) + tumor: meta_markdups.sample_type == 'tumor' + normal: meta_markdups.sample_type == 'normal' + placeholder: true + } + + // Set outputs, restoring original meta + // channel: [ meta, bam, bai ] + ch_bam_tumor_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_markdups_out.tumor, ch_inputs), + ch_inputs_tumor_sorted.skip.map { meta -> [meta, [], []] }, ) - // Sort into a tumor and normal channel - ch_markdups_out = MARKDUPS.out.bam - .branch { meta_markdups, bam, bai -> - assert ['tumor', 'normal'].contains(meta_markdups.sample_type) - tumor: meta_markdups.sample_type == 'tumor' - normal: meta_markdups.sample_type == 'normal' - placeholder: true - } - - // Set outputs, restoring original meta - // channel: [ meta, bam, bai ] - ch_bam_tumor_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ch_markdups_out.tumor, ch_inputs), - ch_inputs_tumor_sorted.skip.map { meta -> [meta, [], []] }, - ) - - ch_bam_normal_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(ch_markdups_out.normal, ch_inputs), - ch_inputs_normal_sorted.skip.map { meta -> [meta, [], []] }, - ) + ch_bam_normal_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ch_markdups_out.normal, ch_inputs), + ch_inputs_normal_sorted.skip.map { meta -> [meta, [], []] }, + ) emit: - dna_tumor = ch_bam_tumor_out // channel: [ meta, bam, bai ] - dna_normal = ch_bam_normal_out // channel: [ meta, bam, bai ] - versions = ch_versions // channel: [ versions.yml ] + dna_tumor = ch_bam_tumor_out // channel: [ meta, bam, bai ] + dna_normal = ch_bam_normal_out // channel: [ meta, bam, bai ] + + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/sage_append/main.nf b/subworkflows/local/sage_append/main.nf index 8dfee365..a7ded0be 100644 --- a/subworkflows/local/sage_append/main.nf +++ b/subworkflows/local/sage_append/main.nf @@ -10,163 +10,163 @@ include { SAGE_APPEND as GERMLINE } from '../../../modules/local/sage/append/mai workflow SAGE_APPEND { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] - ch_purple_dir // channel: [mandatory] [ meta, purple_dir ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] + ch_purple_dir // channel: [mandatory] [ meta, purple_dir ] - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - genome_dict // channel: [mandatory] /path/to/genome_dict + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources and sort - // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] - // channel: skip: [ meta ] - ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( - ch_tumor_rna_bam, - ch_purple_dir, - ) - .map { meta, tumor_bam, tumor_bai, purple_dir -> - return [ - meta, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), - Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), - Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), - ] - } - .branch { meta, tumor_bam, tumor_bai, purple_dir -> - runnable: tumor_bam && purple_dir - skip: true - return meta - } - - // - // MODULE: SAGE append germline - // - // Select inputs that are eligible to run - // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] - // channel: skip: [ meta ] - ch_inputs_germline_sorted = ch_inputs_sorted.runnable - .branch { meta, tumor_bam, tumor_bai, purple_dir -> - - def tumor_dna_id = Utils.getTumorDnaSampleName(meta) - - def has_normal_dna = Utils.hasNormalDna(meta) - def has_tumor_rna = Utils.hasTumorRna(meta) - def has_smlv_germline = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL) - - runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_append, purple_smlv_vcf, tumor_bam, tumor_bai ] - ch_sage_append_germline_inputs = ch_inputs_germline_sorted.runnable - .map { meta, tumor_bam, tumor_bai, purple_dir -> - - def tumor_dna_id = Utils.getTumorDnaSampleName(meta) - - def meta_append = [ - key: meta.group_id, - id: meta.group_id, - tumor_rna_id: Utils.getTumorRnaSampleName(meta), - dna_id: Utils.getNormalDnaSampleName(meta), - ] - - def purple_smlv_vcf = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") - - return [meta_append, purple_smlv_vcf, tumor_bam, tumor_bai] - } - - // Run process - GERMLINE( - ch_sage_append_germline_inputs, - genome_fasta, - genome_version, - genome_fai, - genome_dict, + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_rna_bam, + ch_purple_dir, + ) + .map { meta, tumor_bam, tumor_bai, purple_dir -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + ] + } + .branch { meta, tumor_bam, tumor_bai, purple_dir -> + runnable: tumor_bam && purple_dir + skip: true + return meta + } + + // + // MODULE: SAGE append germline + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_germline_sorted = ch_inputs_sorted.runnable + .branch { meta, tumor_bam, tumor_bai, purple_dir -> + + def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + + def has_normal_dna = Utils.hasNormalDna(meta) + def has_tumor_rna = Utils.hasTumorRna(meta) + def has_smlv_germline = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL) + + runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_append, purple_smlv_vcf, tumor_bam, tumor_bai ] + ch_sage_append_germline_inputs = ch_inputs_germline_sorted.runnable + .map { meta, tumor_bam, tumor_bai, purple_dir -> + + def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + + def meta_append = [ + key: meta.group_id, + id: meta.group_id, + tumor_rna_id: Utils.getTumorRnaSampleName(meta), + dna_id: Utils.getNormalDnaSampleName(meta), + ] + + def purple_smlv_vcf = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") + + return [meta_append, purple_smlv_vcf, tumor_bam, tumor_bai] + } + + // Run process + GERMLINE( + ch_sage_append_germline_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: SAGE append somatic + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_somatic_sorted = ch_inputs_sorted.runnable + .branch { meta, tumor_bam, tumor_bai, purple_dir -> + def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + + def has_tumor_dna = Utils.hasTumorDna(meta) + def has_tumor_rna = Utils.hasTumorRna(meta) + def has_smlv_somatic = file(purple_dir).resolve("${tumor_dna_id}.purple.somatic.vcf.gz") + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR) + + runnable: has_tumor_dna && has_tumor_rna && has_smlv_somatic && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_append, purple_smlv_vcf, tumor_bam, tumor_bai ] + ch_sage_append_somatic_inputs = ch_inputs_somatic_sorted.runnable + .map { meta, tumor_bam, tumor_bai, purple_dir -> + + def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + + def meta_append = [ + key: meta.group_id, + id: meta.group_id, + tumor_rna_id: Utils.getTumorRnaSampleName(meta), + dna_id: Utils.getTumorDnaSampleName(meta), + ] + + def purple_smlv_vcf = file(purple_dir).resolve("${tumor_dna_id}.purple.somatic.vcf.gz") + + return [meta_append, purple_smlv_vcf, tumor_bam, tumor_bai] + } + + // Run process + SOMATIC( + ch_sage_append_somatic_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, sage_append_vcf ] + ch_somatic_vcf = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(GERMLINE.out.versions) - - // - // MODULE: SAGE append somatic - // - // Select inputs that are eligible to run - // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] - // channel: skip: [ meta ] - ch_inputs_somatic_sorted = ch_inputs_sorted.runnable - .branch { meta, tumor_bam, tumor_bai, purple_dir -> - def tumor_dna_id = Utils.getTumorDnaSampleName(meta) - - def has_tumor_dna = Utils.hasTumorDna(meta) - def has_tumor_rna = Utils.hasTumorRna(meta) - def has_smlv_somatic = file(purple_dir).resolve("${tumor_dna_id}.purple.somatic.vcf.gz") - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR) - - runnable: has_tumor_dna && has_tumor_rna && has_smlv_somatic && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_append, purple_smlv_vcf, tumor_bam, tumor_bai ] - ch_sage_append_somatic_inputs = ch_inputs_somatic_sorted.runnable - .map { meta, tumor_bam, tumor_bai, purple_dir -> - - def tumor_dna_id = Utils.getTumorDnaSampleName(meta) - - def meta_append = [ - key: meta.group_id, - id: meta.group_id, - tumor_rna_id: Utils.getTumorRnaSampleName(meta), - dna_id: Utils.getTumorDnaSampleName(meta), - ] - - def purple_smlv_vcf = file(purple_dir).resolve("${tumor_dna_id}.purple.somatic.vcf.gz") - - return [meta_append, purple_smlv_vcf, tumor_bam, tumor_bai] - } - - // Run process - SOMATIC( - ch_sage_append_somatic_inputs, - genome_fasta, - genome_version, - genome_fai, - genome_dict, + ch_germline_vcf = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(SOMATIC.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, sage_append_vcf ] - ch_somatic_vcf = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), - ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - - ch_germline_vcf = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), - ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - emit: - somatic_vcf = ch_somatic_vcf // channel: [ meta, sage_append_vcf ] - germline_vcf = ch_germline_vcf // channel: [ meta, sage_append_vcf ] + somatic_vcf = ch_somatic_vcf // channel: [ meta, sage_append_vcf ] + germline_vcf = ch_germline_vcf // channel: [ meta, sage_append_vcf ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/sage_calling/main.nf b/subworkflows/local/sage_calling/main.nf index 33d815d0..35a44d97 100644 --- a/subworkflows/local/sage_calling/main.nf +++ b/subworkflows/local/sage_calling/main.nf @@ -10,188 +10,188 @@ include { SAGE_SOMATIC as SOMATIC } from '../../../modules/local/sage/somatic/ workflow SAGE_CALLING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] - ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] - - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version - genome_fai // channel: [mandatory] /path/to/genome_fai - genome_dict // channel: [mandatory] /path/to/genome_dict - sage_known_hotspots_germline // channel: [optional] /path/to/sage_known_hotspots_germline - sage_known_hotspots_somatic // channel: [mandatory] /path/to/sage_known_hotspots_somatic - sage_actionable_panel // channel: [mandatory] /path/to/sage_actionable_panel - sage_coverage_panel // channel: [mandatory] /path/to/sage_coverage_panel - sage_highconf_regions // channel: [mandatory] /path/to/sage_highconf_regions - segment_mappability // channel: [mandatory] /path/to/segment_mappability - driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel - ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + sage_known_hotspots_germline // channel: [optional] /path/to/sage_known_hotspots_germline + sage_known_hotspots_somatic // channel: [mandatory] /path/to/sage_known_hotspots_somatic + sage_actionable_panel // channel: [mandatory] /path/to/sage_actionable_panel + sage_coverage_panel // channel: [mandatory] /path/to/sage_coverage_panel + sage_highconf_regions // channel: [mandatory] /path/to/sage_highconf_regions + segment_mappability // channel: [mandatory] /path/to/segment_mappability + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Sort inputs - // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] - // channel: skip: [ meta ] - ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( - ch_tumor_bam, - ch_normal_bam, - ) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - return [ - meta, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), - tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), - Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), - normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), - ] - } - .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - runnable: tumor_bam - skip: true - return meta - } - - // - // MODULE: SAGE germline - // - // Select inputs that are eligible to run - // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] - // channel: skip: [ meta ] - ch_inputs_germline_sorted = ch_inputs_sorted.runnable - .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - def has_tumor_normal = tumor_bam && normal_bam - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_VCF_NORMAL) - - runnable: has_tumor_normal && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai ] - ch_sage_germline_inputs = ch_inputs_germline_sorted.runnable - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - - def meta_sage = [ - key: meta.group_id, - id: meta.group_id, - tumor_id: Utils.getTumorDnaSampleName(meta), - normal_id: Utils.getNormalDnaSampleName(meta), - ] - - return [meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai] + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_bam, + ch_normal_bam, + ) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + tumor_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + Utils.selectCurrentOrExisting(normal_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_NORMAL), + normal_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_NORMAL), + ] + } + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + runnable: tumor_bam + skip: true + return meta + } + + // + // MODULE: SAGE germline + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] + // channel: skip: [ meta ] + ch_inputs_germline_sorted = ch_inputs_sorted.runnable + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + def has_tumor_normal = tumor_bam && normal_bam + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_VCF_NORMAL) + + runnable: has_tumor_normal && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai ] + ch_sage_germline_inputs = ch_inputs_germline_sorted.runnable + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def meta_sage = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + normal_id: Utils.getNormalDnaSampleName(meta), + ] + + return [meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai] + } + + // Run process + GERMLINE( + ch_sage_germline_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + sage_known_hotspots_germline, + sage_actionable_panel, + sage_coverage_panel, + sage_highconf_regions, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(GERMLINE.out.versions) + + // + // MODULE: SAGE somatic + // + // Select inputs that are eligible to run + // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] + // channel: skip: [ meta ] + ch_inputs_somatic_sorted = ch_inputs_sorted.runnable + .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + def has_tumor = tumor_bam + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_VCF_TUMOR) + + runnable: has_tumor && !has_existing + skip: true + return meta + } + + // Create process input channel + // channel: tumor/normal: [ meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai ] + // channel: tumor only: [ meta_sage, tumor_bam, [], tumor_bai, [] ] + ch_sage_somatic_inputs = ch_inputs_somatic_sorted.runnable + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + + def meta_sage = [ + key: meta.group_id, + id: meta.group_id, + tumor_id: Utils.getTumorDnaSampleName(meta), + ] + + if (normal_bam) { + meta_sage.normal_id = Utils.getNormalDnaSampleName(meta) } - // Run process - GERMLINE( - ch_sage_germline_inputs, - genome_fasta, - genome_version, - genome_fai, - genome_dict, - sage_known_hotspots_germline, - sage_actionable_panel, - sage_coverage_panel, - sage_highconf_regions, - ensembl_data_resources, + return [meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai] + } + + // Run process + SOMATIC( + ch_sage_somatic_inputs, + genome_fasta, + genome_version, + genome_fai, + genome_dict, + sage_known_hotspots_somatic, + sage_actionable_panel, + sage_coverage_panel, + sage_highconf_regions, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(SOMATIC.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, sage_vcf, sage_tbi ] + ch_somatic_vcf_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, ) - ch_versions = ch_versions.mix(GERMLINE.out.versions) - - // - // MODULE: SAGE somatic - // - // Select inputs that are eligible to run - // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] - // channel: skip: [ meta ] - ch_inputs_somatic_sorted = ch_inputs_sorted.runnable - .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - def has_tumor = tumor_bam - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_VCF_TUMOR) - - runnable: has_tumor && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: tumor/normal: [ meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai ] - // channel: tumor only: [ meta_sage, tumor_bam, [], tumor_bai, [] ] - ch_sage_somatic_inputs = ch_inputs_somatic_sorted.runnable - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - - def meta_sage = [ - key: meta.group_id, - id: meta.group_id, - tumor_id: Utils.getTumorDnaSampleName(meta), - ] - - if (normal_bam) { - meta_sage.normal_id = Utils.getNormalDnaSampleName(meta) - } - - return [meta_sage, tumor_bam, normal_bam, tumor_bai, normal_bai] - } + // channel: [ meta, sage_vcf, sage_tbi ] + ch_germline_vcf_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, [], []] }, + ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + ) - // Run process - SOMATIC( - ch_sage_somatic_inputs, - genome_fasta, - genome_version, - genome_fai, - genome_dict, - sage_known_hotspots_somatic, - sage_actionable_panel, - sage_coverage_panel, - sage_highconf_regions, - ensembl_data_resources, + // channel: [ meta, sage_dir ] + ch_somatic_dir = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SOMATIC.out.sage_dir, ch_inputs), + ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(SOMATIC.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, sage_vcf, sage_tbi ] - ch_somatic_vcf_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(SOMATIC.out.vcf, ch_inputs), - ch_inputs_somatic_sorted.skip.map { meta -> [meta, [], []] }, - ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, - ) - - // channel: [ meta, sage_vcf, sage_tbi ] - ch_germline_vcf_out = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(GERMLINE.out.vcf, ch_inputs), - ch_inputs_germline_sorted.skip.map { meta -> [meta, [], []] }, - ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, - ) - - // channel: [ meta, sage_dir ] - ch_somatic_dir = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(SOMATIC.out.sage_dir, ch_inputs), - ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - - // channel: [ meta, sage_dir ] - ch_germline_dir = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(GERMLINE.out.sage_dir, ch_inputs), - ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) + // channel: [ meta, sage_dir ] + ch_germline_dir = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(GERMLINE.out.sage_dir, ch_inputs), + ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, + ) emit: - germline_vcf = ch_germline_vcf_out // channel: [ meta, sage_vcf, sage_tbi ] - somatic_vcf = ch_somatic_vcf_out // channel: [ meta, sage_vcf, sage_tbi ] - germline_dir = ch_germline_dir // channel: [ meta, sage_dir ] - somatic_dir = ch_somatic_dir // channel: [ meta, sage_dir ] + germline_vcf = ch_germline_vcf_out // channel: [ meta, sage_vcf, sage_tbi ] + somatic_vcf = ch_somatic_vcf_out // channel: [ meta, sage_vcf, sage_tbi ] + germline_dir = ch_germline_dir // channel: [ meta, sage_dir ] + somatic_dir = ch_somatic_dir // channel: [ meta, sage_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/sigs_fitting/main.nf b/subworkflows/local/sigs_fitting/main.nf index 41d2acb4..240ebc35 100644 --- a/subworkflows/local/sigs_fitting/main.nf +++ b/subworkflows/local/sigs_fitting/main.nf @@ -9,83 +9,83 @@ include { SIGS } from '../../../modules/local/sigs/main' workflow SIGS_FITTING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_purple // channel: [mandatory] [ meta, purple_dir ] + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] - // Reference data - sigs_signatures // channel: [mandatory] /path/to/sigs_signatures + // Reference data + sigs_signatures // channel: [mandatory] /path/to/sigs_signatures main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Select input sources - // channel: [ meta, purple_dir ] - ch_inputs_selected = ch_purple - .map { meta, purple_dir -> - return [meta, Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR)] + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources + // channel: [ meta, purple_dir ] + ch_inputs_selected = ch_purple + .map { meta, purple_dir -> + return [meta, Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR)] + } + + // Sort inputs + // channel: runnable: [ meta, purple_dir ] + // channel: skip: [ meta ] + ch_inputs_sorted = ch_inputs_selected + .branch { meta, purple_dir -> + + def has_dna = Utils.hasTumorDna(meta) + + def tumor_id + def has_smlv_vcf + if (has_dna) { + tumor_id = Utils.getTumorDnaSampleName(meta) + has_smlv_vcf = purple_dir ? file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") : [] } - // Sort inputs - // channel: runnable: [ meta, purple_dir ] - // channel: skip: [ meta ] - ch_inputs_sorted = ch_inputs_selected - .branch { meta, purple_dir -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SIGS_DIR) - def has_dna = Utils.hasTumorDna(meta) + runnable: has_dna && purple_dir && has_smlv_vcf && !has_existing + skip: true + return meta + } - def tumor_id - def has_smlv_vcf - if (has_dna) { - tumor_id = Utils.getTumorDnaSampleName(meta) - has_smlv_vcf = purple_dir ? file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") : [] - } + // Create process input channel + // channel: [ meta_sigs, smlv_vcf ] + ch_sigs_inputs = ch_inputs_sorted.runnable + .map { meta, purple_dir -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SIGS_DIR) + def tumor_id = Utils.getTumorDnaSampleName(meta) - runnable: has_dna && purple_dir && has_smlv_vcf && !has_existing - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_sigs, smlv_vcf ] - ch_sigs_inputs = ch_inputs_sorted.runnable - .map { meta, purple_dir -> + def meta_sigs = [ + key: meta.group_id, + id: meta.group_id, + sample_id: tumor_id, + ] - def tumor_id = Utils.getTumorDnaSampleName(meta) + def smlv_vcf = file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") - def meta_sigs = [ - key: meta.group_id, - id: meta.group_id, - sample_id: tumor_id, - ] + return [meta_sigs, smlv_vcf] + } - def smlv_vcf = file(purple_dir).resolve("${tumor_id}.purple.somatic.vcf.gz") + // Run process + SIGS( + ch_sigs_inputs, + sigs_signatures, + ) - return [meta_sigs, smlv_vcf] - } + ch_versions = ch_versions.mix(SIGS.out.versions) - // Run process - SIGS( - ch_sigs_inputs, - sigs_signatures, + // Set outputs, restoring original meta + // channel: [ meta, sigs_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(SIGS.out.sigs_dir, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(SIGS.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, sigs_dir ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(SIGS.out.sigs_dir, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - emit: - sigs_dir = ch_outputs // channel: [ meta, sigs_dir ] + sigs_dir = ch_outputs // channel: [ meta, sigs_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/virusbreakend_calling/main.nf b/subworkflows/local/virusbreakend_calling/main.nf index 3e210365..ab5bb4fd 100644 --- a/subworkflows/local/virusbreakend_calling/main.nf +++ b/subworkflows/local/virusbreakend_calling/main.nf @@ -10,145 +10,145 @@ include { VIRUSINTERPRETER } from '../../../modules/local/virusinterpreter/main' workflow VIRUSBREAKEND_CALLING { take: - // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] - ch_purple // channel: [mandatory] [ meta, purple_dir ] - ch_bamtools_somatic // channel: [mandatory] [ meta, metrics ] - - // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_fai // channel: [mandatory] /path/to/genome_fai - genome_dict // channel: [mandatory] /path/to/genome_dict - genome_gridss_index // channel: [mandatory] /path/to/genome_gridss_index - virusbreakenddb // channel: [mandatory] /path/to/virusbreakenddb/ - virus_taxonomy_db // channel: [mandatory] /path/to/virus_taxonomy_db - virus_reporting_db // channel: [mandatory] /path/to/virus_reporting_db - - // Params - gridss_config // channel: [optional] /path/to/gridss_config + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + ch_bamtools_somatic // channel: [mandatory] [ meta, metrics ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_fai // channel: [mandatory] /path/to/genome_fai + genome_dict // channel: [mandatory] /path/to/genome_dict + genome_gridss_index // channel: [mandatory] /path/to/genome_gridss_index + virusbreakenddb // channel: [mandatory] /path/to/virusbreakenddb/ + virus_taxonomy_db // channel: [mandatory] /path/to/virus_taxonomy_db + virus_reporting_db // channel: [mandatory] /path/to/virus_reporting_db + + // Params + gridss_config // channel: [optional] /path/to/gridss_config main: - // Channel for version.yml files - // channel: [ versions.yml ] - ch_versions = Channel.empty() - - // Sort inputs - // NOTE(SW): VIRUSBreakend inputs are not allowed in the samplesheet, so aren't considered - // channel: [ meta, tumor_bam, tumor_bai ] - ch_inputs_sorted = ch_tumor_bam - .map { meta, tumor_bam, tumor_bai -> - return [ - meta, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), - Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_DNA_TUMOR), - ] - } - .branch { meta, tumor_bam, tumor_bai -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.VIRUSINTERPRETER_DIR) - runnable: tumor_bam && !has_existing - skip: true - return meta - } - - // - // MODULE: VIRUSBreakend - // - // Create process input channel - // channel: [ meta_virus, tumor_bam ] - ch_virusbreakend_inputs = ch_inputs_sorted.runnable - .map { meta, tumor_bam, tumor_bai -> - - def meta_virus = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - ] - - return [meta_virus, tumor_bam] - } - - // Run process - VIRUSBREAKEND( - ch_virusbreakend_inputs, - gridss_config, - genome_fasta, - genome_fai, - genome_dict, - genome_gridss_index, - virusbreakenddb, + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Sort inputs + // NOTE(SW): VIRUSBreakend inputs are not allowed in the samplesheet, so aren't considered + // channel: [ meta, tumor_bam, tumor_bai ] + ch_inputs_sorted = ch_tumor_bam + .map { meta, tumor_bam, tumor_bai -> + return [ + meta, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_MARKDUPS_DNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_DNA_TUMOR), + ] + } + .branch { meta, tumor_bam, tumor_bai -> + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.VIRUSINTERPRETER_DIR) + runnable: tumor_bam && !has_existing + skip: true + return meta + } + + // + // MODULE: VIRUSBreakend + // + // Create process input channel + // channel: [ meta_virus, tumor_bam ] + ch_virusbreakend_inputs = ch_inputs_sorted.runnable + .map { meta, tumor_bam, tumor_bai -> + + def meta_virus = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_virus, tumor_bam] + } + + // Run process + VIRUSBREAKEND( + ch_virusbreakend_inputs, + gridss_config, + genome_fasta, + genome_fai, + genome_dict, + genome_gridss_index, + virusbreakenddb, + ) + + ch_versions = ch_versions.mix(VIRUSBREAKEND.out.versions) + + // + // MODULE: Virus Interpreter + // + // Select input sources + // channel: [ meta, virus_tsv, purple_dir, metrics ] + ch_virusinterpreter_inputs_selected = WorkflowOncoanalyser.groupByMeta( + WorkflowOncoanalyser.restoreMeta(VIRUSBREAKEND.out.tsv, ch_inputs), + ch_purple, + ch_bamtools_somatic, + ) + .map { meta, virus_tsv, purple_dir, metrics -> + + def inputs = [ + virus_tsv, + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(metrics, meta, Constants.INPUT.BAMTOOLS_TUMOR), + ] + + return [meta, *inputs] + } + + // Sort inputs + // channel: [ meta, virus_tsv, purple_dir, metrics ] + // channel: skip: [ meta ] + ch_virusinterpreter_inputs_sorted = ch_virusinterpreter_inputs_selected + .branch { meta, virus_tsv, purple_dir, metrics -> + runnable: virus_tsv && purple_dir && metrics + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_virus, virus_tsv, purple_dir, metrics ] + ch_virusinterpreter_inputs = ch_virusinterpreter_inputs_sorted.runnable + .map { d -> + + def meta = d[0] + def inputs = d[1..-1] + + def meta_virus = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_virus, *inputs] + } + + // Run process + VIRUSINTERPRETER( + ch_virusinterpreter_inputs, + virus_taxonomy_db, + virus_reporting_db, + ) + + ch_versions = ch_versions.mix(VIRUSINTERPRETER.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, virusinterpreter_dir ] + ch_outputs = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(VIRUSINTERPRETER.out.virusinterpreter_dir, ch_inputs), + ch_virusinterpreter_inputs_sorted.skip.map { meta -> [meta, []] }, + ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_versions = ch_versions.mix(VIRUSBREAKEND.out.versions) - - // - // MODULE: Virus Interpreter - // - // Select input sources - // channel: [ meta, virus_tsv, purple_dir, metrics ] - ch_virusinterpreter_inputs_selected = WorkflowOncoanalyser.groupByMeta( - WorkflowOncoanalyser.restoreMeta(VIRUSBREAKEND.out.tsv, ch_inputs), - ch_purple, - ch_bamtools_somatic, - ) - .map { meta, virus_tsv, purple_dir, metrics -> - - def inputs = [ - virus_tsv, - Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), - Utils.selectCurrentOrExisting(metrics, meta, Constants.INPUT.BAMTOOLS_TUMOR), - ] - - return [meta, *inputs] - } - - // Sort inputs - // channel: [ meta, virus_tsv, purple_dir, metrics ] - // channel: skip: [ meta ] - ch_virusinterpreter_inputs_sorted = ch_virusinterpreter_inputs_selected - .branch { meta, virus_tsv, purple_dir, metrics -> - runnable: virus_tsv && purple_dir && metrics - skip: true - return meta - } - - // Create process input channel - // channel: [ meta_virus, virus_tsv, purple_dir, metrics ] - ch_virusinterpreter_inputs = ch_virusinterpreter_inputs_sorted.runnable - .map { d -> - - def meta = d[0] - def inputs = d[1..-1] - - def meta_virus = [ - key: meta.group_id, - id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), - ] - - return [meta_virus, *inputs] - } - - // Run process - VIRUSINTERPRETER( - ch_virusinterpreter_inputs, - virus_taxonomy_db, - virus_reporting_db, - ) - - ch_versions = ch_versions.mix(VIRUSINTERPRETER.out.versions) - - // Set outputs, restoring original meta - // channel: [ meta, virusinterpreter_dir ] - ch_outputs = Channel.empty() - .mix( - WorkflowOncoanalyser.restoreMeta(VIRUSINTERPRETER.out.virusinterpreter_dir, ch_inputs), - ch_virusinterpreter_inputs_sorted.skip.map { meta -> [meta, []] }, - ch_inputs_sorted.skip.map { meta -> [meta, []] }, - ) - emit: - virusinterpreter_dir = ch_outputs // channel: [ meta, virusinterpreter_dir ] + virusinterpreter_dir = ch_outputs // channel: [ meta, virusinterpreter_dir ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/mrd.nf b/workflows/mrd.nf deleted file mode 100644 index d43b9490..00000000 --- a/workflows/mrd.nf +++ /dev/null @@ -1 +0,0 @@ -// PLACEHOLDER