nf-core · kubranarci · Nov 28, 2024 · Nov 21, 2024 · Nov 22, 2024 · Nov 22, 2024
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -11,5 +11,39 @@ report_section_order:
     order: -1002
 
 export_plots: true
-
 disable_version_detection: true
+ignore_images: false
+
+# Run only these modules
+run_modules:
+  - bcftools
+  - custom_content
+
+custom_data:
+  survivor:
+    id: "survivor"
+    section_name: "SURVIVOR variant statistics"
+    description: "generated by nf-core/survivor"
+    plot_type: "bargraph"
+    format: "tsv"
+    pconfig:
+      id: "survivor"
+      namespace: "SURVIVOR variant statistics"
+      table_title: "Variant statistics from survivor stats tool"
+      ylab: "Count"
+
+  summary_reports:
+    id: "summary_reports"
+    section_name: "Summary Benchmark Reports"
+
+sp:
+  survivor:
+    fn: "*.stats"
+  summary_reports:
+    fn: "*.png"
+
+module_order:
+  - bcftools:
+      name: "Bcftools stats"
+  - survivor
+  - summary_reports
diff --git a/bin/plots.R b/bin/plots.R
@@ -13,21 +13,21 @@ generate_plots <- function(table, benchmark, type, filter, stats) {
         table = table[table$Type == type & table$Filter == filter, ]
         title1 = paste("Type=",type," Filter=",filter, " | TP/FP/FN by tool", sep="")
         title2 = paste("Type=",type," Filter=",filter, " | Precision, Recall, and F1 by Tool", sep="")
-        name1 = paste(type, "_", filter, "_metric_by_tool_", benchmark, ".png", sep = "")
-        name2 = paste(type, "_", filter, "_variants_by_tool_", benchmark, ".png", sep = "")
+        name1 = paste(type, "_", filter, "_metric_by_tool_", benchmark, "_mqc.png", sep = "")
+        name2 = paste(type, "_", filter, "_variants_by_tool_", benchmark, "_mqc.png", sep = "")
     }
     else if (stats != "None" ){
         table = table[table$StatsType == stats, ]
         title1 = paste("StatsType=",stats, " | TP/FP/FN by tool", sep="")
         title2 = paste("StatsType=",stats, " | Precision, Recall, and F1 by Tool", sep="")
-        name1 = paste(stats, "_metric_by_tool_", benchmark, ".png", sep = "")
-        name2 = paste(stats, "_variants_by_tool_", benchmark, ".png", sep = "")
+        name1 = paste(stats, "_metric_by_tool_", benchmark, "_mqc.png", sep = "")
+        name2 = paste(stats, "_variants_by_tool_", benchmark, "_mqc.png", sep = "")
     }
     else{
         title1 = paste("TP/FP/FN by tool", sep="")
         title2 = paste("Precision, Recall, and F1 by Tool", sep="")
-        name1 = paste("metric_by_tool_", benchmark, ".png", sep = "")
-        name2 = paste("variants_by_tool_", benchmark, ".png", sep = "")
+        name1 = paste("metric_by_tool_", benchmark, "_mqc.png", sep = "")
+        name2 = paste("variants_by_tool_", benchmark, "_mqc.png", sep = "")
     }
     input_data_melted <- melt(table, id.vars = "Tool")
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -136,7 +136,7 @@ process {
     // Variant stats
     withName: SURVIVOR_STATS {
         ext.prefix ={["${meta.id}",
-                    (meta.caller != null)      ? ".${meta.caller}" : ''
+                    (meta.caller != null)      ? ".${meta.caller}_mqc" : '_mqc'
                     ].join('').trim()
                     }
         publishDir = [
@@ -243,7 +243,7 @@ process {
                 (meta.maxMatches != null)          ? "--mm ${meta.maxMatches}" : ''
                 ].join(' ').trim()}
         publishDir = [
-            path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/wittyer_bench"},
+            path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/wittyer"},
             pattern: "*{.vcf.gz,tbi,json}",
             mode: params.publish_dir_mode
         ]
@@ -266,7 +266,7 @@ process {
         ]
     }
     withName: PLOTS {
-        ext.prefix = {"${meta.benchmark_tool}"}
+        ext.prefix = {"${meta.benchmark_tool}_mqc"}
         publishDir = [
             path: {"${params.outdir}/${params.variant_type}/summary/plots/${meta.benchmark_tool}"},
             pattern: "*{png}",

diff --git a/conf/tests/germline_sv.config b/conf/tests/germline_sv.config
@@ -33,7 +33,7 @@ params {
     analysis             = 'germline'
     variant_type         = "structural"
     method               = 'svanalyzer,wittyer,truvari'
-    preprocess           = "normalization,deduplication"
+    preprocess           = "normalization,deduplication,filter_contigs"
     sv_standardization   = "svync,homogenize"
     min_sv_size          = 30
     truth_id             = "HG002"

diff --git a/modules.json b/modules.json
@@ -82,7 +82,7 @@
                     },
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "b8d36829fa84b6e404364abff787e8b07f6d058c",
+                        "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d",
                         "installed_by": ["modules"]
                     },
                     "picard/createsequencedictionary": {

diff --git a/modules/local/plots/main.nf b/modules/local/plots/main.nf
@@ -11,8 +11,8 @@ process PLOTS {
     tuple val(meta), path(summary)
 
     output:
-    tuple val(meta),path("*.png"), emit: plots
-    path "versions.yml"          , emit: versions
+    path("*.png")          , emit: plots
+    path "versions.yml"    , emit: versions
 
     when:
     task.ext.when == null || task.ext.when

diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
diff --git a/subworkflows/local/cnv_germline_benchmark.nf b/subworkflows/local/cnv_germline_benchmark.nf
@@ -9,32 +9,30 @@ include { TABIX_BGZIP as TABIX_BGZIP_TRUTH } from '../../modules/nf-core/tabix/b
 workflow CNV_GERMLINE_BENCHMARK {
     take:
     input_ch  // channel: [val(meta),test_vcf,test_index,truth_vcf,truth_index, bed]
-    fasta     // reference channel [val(meta), ref.fa]
-    fai       // reference channel [val(meta), ref.fa.fai]
 
     main:
 
-    versions =        Channel.empty()
+    versions        = Channel.empty()
     summary_reports = Channel.empty()
 
     // CNV benchmarking is only possible with wittyer now!
 
     TABIX_BGZIP_QUERY(
-        input_ch.map{ meta, vcf, tbi, truth_vcf, truth_tbi, bed ->
+        input_ch.map{ meta, vcf, _tbi, _truth_vcf, _truth_tbi, _bed ->
             [ meta, vcf ]
         }
     )
     versions = versions.mix(TABIX_BGZIP_QUERY.out.versions.first())
 
     TABIX_BGZIP_TRUTH(
-        input_ch.map{ meta, vcf, tbi, truth_vcf, truth_tbi, bed ->
+        input_ch.map{ meta, _vcf, _tbi, truth_vcf, _truth_tbi, _bed ->
             [ meta, truth_vcf ]
         }
     )
     versions = versions.mix(TABIX_BGZIP_TRUTH.out.versions.first())
 
-    input_ch.map{ meta, vcf, tbi, truth_vcf, truth_tbi, bed ->
-            [ meta, bed ]
+    input_ch.map{ meta, _vcf, _tbi, _truth_vcf, _truth_tbi, bedfile ->
+            [ meta, bedfile ]
         }
         .set { bed }
 
@@ -56,6 +54,6 @@ workflow CNV_GERMLINE_BENCHMARK {
 
 
     emit:
-    summary_reports
-    versions
+    summary_reports  // channel: [val(meta), summary]
+    versions         // channel: [versions.yml]
 }
diff --git a/subworkflows/local/compare_benchmark_results.nf b/subworkflows/local/compare_benchmark_results.nf
@@ -63,14 +63,13 @@ workflow COMPARE_BENCHMARK_RESULTS {
         merged_vcfs = merged_vcfs.mix(SURVIVOR_MERGE.out.vcf)
     }
 
-
     // convert vcf files to csv
     VCF_TO_CSV(
         merged_vcfs
     )
     versions = versions.mix(VCF_TO_CSV.out.versions.first())
 
     emit:
-    versions
-    merged_vcfs
+    merged_vcfs  // channel: [val(meta), vcf]
+    versions     // channel: [versions.yml]
 }
diff --git a/subworkflows/local/liftover_vcfs_truth.nf b/subworkflows/local/liftover_vcfs_truth.nf
@@ -59,7 +59,7 @@ workflow LIFTOVER_VCFS_TRUTH {
     // liftover high confidence file if given
     UCSC_LIFTOVER(
         high_conf_ch.map{file -> tuple([id: params.truth_id], file)},
-        chain.map{meta, file -> file}
+        chain.map{_meta, file -> file}
     )
     versions = versions.mix(UCSC_LIFTOVER.out.versions.first())
 
@@ -73,11 +73,10 @@ workflow LIFTOVER_VCFS_TRUTH {
         SORT_BED.out.bed
     )
     versions = versions.mix(BEDTOOLS_MERGE.out.versions.first())
-
     bed_ch = BEDTOOLS_MERGE.out.bed
 
     emit:
     vcf_ch      // channel: [val(meta), vcf.gz]
     bed_ch      // channel: [val(meta), bed]
-    versions
+    versions    // channel: [versions.yml]
 }
diff --git a/subworkflows/local/prepare_vcfs_test.nf b/subworkflows/local/prepare_vcfs_test.nf
@@ -87,29 +87,18 @@ workflow PREPARE_VCFS_TEST {
 
     if (params.analysis.contains("somatic")){
 
-        // somatic spesific preperations
-        //vcf_ch.branch{
-        //        def meta = it[0]
-        //        small: meta.vartype == "small"
-        //        other: true
-        //    }
-        //    .set{vcf}
-
+        // somatic specific preparations
         if (params.variant_type == "small"){
-            out_vcf_ch = Channel.empty()
-
             SPLIT_SMALL_VARIANTS_TEST(
-                vcf.small
+                vcf_ch
             )
             versions = versions.mix(SPLIT_SMALL_VARIANTS_TEST.out.versions.first())
-            out_vcf_ch = out_vcf_ch.mix(SPLIT_SMALL_VARIANTS_TEST.out.out_vcf_ch,
-                                        vcf.other)
-            vcf_ch = out_vcf_ch
+            vcf_ch = SPLIT_SMALL_VARIANTS_TEST.out.out_vcf_ch
         }
 
     }
 
     emit:
-    vcf_ch
-    versions
+    vcf_ch   // channel: [val(meta), vcf.gz, tbi]
+    versions // channel: [versions.yml]
 }
diff --git a/subworkflows/local/prepare_vcfs_truth.nf b/subworkflows/local/prepare_vcfs_truth.nf
@@ -37,7 +37,7 @@ workflow PREPARE_VCFS_TRUTH {
         )
         versions = versions.mix(LIFTOVER_VCFS_TRUTH.out.versions.first())
         truth_ch = LIFTOVER_VCFS_TRUTH.out.vcf_ch
-        high_conf_ch = LIFTOVER_VCFS_TRUTH.out.bed_ch.map{ meta, bed -> [bed]}
+        high_conf_ch = LIFTOVER_VCFS_TRUTH.out.bed_ch.map{ _meta, bed -> [bed]}
     }
 
     // Reheader sample name for truth file - using meta.caller
@@ -50,7 +50,7 @@ workflow PREPARE_VCFS_TRUTH {
 
     if (params.preprocess.contains("normalization")){
 
-        // multi-allelic variants will be splitted.
+        // multi-allelic variants will be splitter.
         BCFTOOLS_NORM(
             vcf_ch,
             fasta
@@ -78,7 +78,7 @@ workflow PREPARE_VCFS_TRUTH {
     }
 
     emit:
-    vcf_ch
-    high_conf_ch
-    versions
+    vcf_ch       // channel: [val(meta), vcf, tbi]
+    high_conf_ch // channel: [val(meta), bed]
+    versions     // channel: [versions.yml]
 }
diff --git a/subworkflows/local/report_benchmark_statistics.nf b/subworkflows/local/report_benchmark_statistics.nf
@@ -4,8 +4,8 @@
 
 include { MERGE_REPORTS         } from '../../modules/local/merge_reports'
 include { PLOTS                 } from '../../modules/local/plots'
-include { CREATE_DATAVZRD_INPUT } from '../../modules/local/create_datavzrd_input'  addParams( options: params.options )
-include { DATAVZRD              } from '../../modules/nf-core/datavzrd'           addParams( options: params.options )
+include { CREATE_DATAVZRD_INPUT } from '../../modules/local/create_datavzrd_input'
+include { DATAVZRD              } from '../../modules/nf-core/datavzrd'
 
 workflow REPORT_BENCHMARK_STATISTICS {
     take:
@@ -14,6 +14,8 @@ workflow REPORT_BENCHMARK_STATISTICS {
     main:
 
     versions = Channel.empty()
+    ch_plots = Channel.empty()
+
     // merge summary statistics from the same benchmarking tool
     MERGE_REPORTS(
         reports
@@ -24,6 +26,7 @@ workflow REPORT_BENCHMARK_STATISTICS {
     PLOTS(
         MERGE_REPORTS.out.summary
     )
+    ch_plots = ch_plots.mix(PLOTS.out.plots.flatten())
     versions = versions.mix(PLOTS.out.versions.first())
 
     MERGE_REPORTS.out.summary
@@ -32,8 +35,8 @@ workflow REPORT_BENCHMARK_STATISTICS {
 
     // add path to csv file to the datavzrd input
     summary
-        .map { meta, summary ->
-                [ meta, summary, file("${projectDir}/assets/datavzrd/${meta.id}.datavzrd.template.yaml", checkIfExists:true) ]
+        .map { meta, summary_file ->
+                [ meta, summary_file, file("${projectDir}/assets/datavzrd/${meta.id}.datavzrd.template.yaml", checkIfExists:true) ]
             }
         .set {template}
 
@@ -48,8 +51,7 @@ workflow REPORT_BENCHMARK_STATISTICS {
     )
     versions = versions.mix(DATAVZRD.out.versions.first())
 
-    datavzrd_report = DATAVZRD.out.report
-
     emit:
-    versions
+    versions        // channel: [versions.yml]
+    ch_plots        // channel: [plots.png]
 }
diff --git a/subworkflows/local/report_vcf_statistics.nf b/subworkflows/local/report_vcf_statistics.nf
@@ -11,27 +11,20 @@ workflow REPORT_VCF_STATISTICS {
 
     main:
 
-    versions = Channel.empty()
-    survivor_stats = Channel.empty()
-
-    //input_ch.branch{
-    //        def meta = it[0]
-    //        sv:     meta.vartype == "sv" || meta.vartype == "cnv"
-    //        other:  true
-    //    }
-    //    .set{input}
+    versions     = Channel.empty()
+    ch_stats     = Channel.empty()
 
     if (params.variant_type == "structural"){
         // use survivor stats to get SV statistics by TYPE
         SURVIVOR_STATS(
-            input_ch.map{ meta, vcf, tbi ->
+            input_ch.map{ meta, vcf, _tbi ->
                 [ meta, vcf ]
             },
             -1,
             -1,
             -1
         )
-        survivor_stats = SURVIVOR_STATS.out.stats
+        ch_stats = ch_stats.mix(SURVIVOR_STATS.out.stats.map{_meta, stats -> stats})
         versions = versions.mix(SURVIVOR_STATS.out.versions.first())
     }
 
@@ -45,13 +38,12 @@ workflow REPORT_VCF_STATISTICS {
         [[],[]],
         [[],[]]
     )
-    bcftools_stats = BCFTOOLS_STATS.out.stats
+    ch_stats = ch_stats.mix(BCFTOOLS_STATS.out.stats.map{_meta, stats -> stats})
     versions = versions.mix(BCFTOOLS_STATS.out.versions.first())
 
-    // Add here a tool, to visualize SV statistics in a histogram.
+    // TODO: Add here a tool, to visualize SV statistics in a histogram.
 
     emit:
-    bcftools_stats
-    survivor_stats
-    versions
+    ch_stats  // channel: [stats]
+    versions  // channel: [versions.yml]
 }