Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding plots to multiqc #108

Merged
merged 6 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,39 @@ report_section_order:
order: -1002

export_plots: true

disable_version_detection: true
ignore_images: false

# Run only these modules
run_modules:
- bcftools
- custom_content

custom_data:
survivor:
id: "survivor"
section_name: "SURVIVOR variant statistics"
description: "generated by nf-core/survivor"
plot_type: "bargraph"
format: "tsv"
pconfig:
id: "survivor"
namespace: "SURVIVOR variant statistics"
table_title: "Variant statistics from survivor stats tool"
ylab: "Count"

summary_reports:
id: "summary_reports"
section_name: "Summary Benchmark Reports"

sp:
survivor:
fn: "*.stats"
summary_reports:
fn: "*.png"

module_order:
- bcftools:
name: "Bcftools stats"
- survivor
- summary_reports
12 changes: 6 additions & 6 deletions bin/plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@ generate_plots <- function(table, benchmark, type, filter, stats) {
table = table[table$Type == type & table$Filter == filter, ]
title1 = paste("Type=",type," Filter=",filter, " | TP/FP/FN by tool", sep="")
title2 = paste("Type=",type," Filter=",filter, " | Precision, Recall, and F1 by Tool", sep="")
name1 = paste(type, "_", filter, "_metric_by_tool_", benchmark, ".png", sep = "")
name2 = paste(type, "_", filter, "_variants_by_tool_", benchmark, ".png", sep = "")
name1 = paste(type, "_", filter, "_metric_by_tool_", benchmark, "_mqc.png", sep = "")
name2 = paste(type, "_", filter, "_variants_by_tool_", benchmark, "_mqc.png", sep = "")
}
else if (stats != "None" ){
table = table[table$StatsType == stats, ]
title1 = paste("StatsType=",stats, " | TP/FP/FN by tool", sep="")
title2 = paste("StatsType=",stats, " | Precision, Recall, and F1 by Tool", sep="")
name1 = paste(stats, "_metric_by_tool_", benchmark, ".png", sep = "")
name2 = paste(stats, "_variants_by_tool_", benchmark, ".png", sep = "")
name1 = paste(stats, "_metric_by_tool_", benchmark, "_mqc.png", sep = "")
name2 = paste(stats, "_variants_by_tool_", benchmark, "_mqc.png", sep = "")
}
else{
title1 = paste("TP/FP/FN by tool", sep="")
title2 = paste("Precision, Recall, and F1 by Tool", sep="")
name1 = paste("metric_by_tool_", benchmark, ".png", sep = "")
name2 = paste("variants_by_tool_", benchmark, ".png", sep = "")
name1 = paste("metric_by_tool_", benchmark, "_mqc.png", sep = "")
name2 = paste("variants_by_tool_", benchmark, "_mqc.png", sep = "")
}
input_data_melted <- melt(table, id.vars = "Tool")

Expand Down
6 changes: 3 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ process {
// Variant stats
withName: SURVIVOR_STATS {
ext.prefix ={["${meta.id}",
(meta.caller != null) ? ".${meta.caller}" : ''
(meta.caller != null) ? ".${meta.caller}_mqc" : '_mqc'
].join('').trim()
}
publishDir = [
Expand Down Expand Up @@ -243,7 +243,7 @@ process {
(meta.maxMatches != null) ? "--mm ${meta.maxMatches}" : ''
].join(' ').trim()}
publishDir = [
path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/wittyer_bench"},
path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/wittyer"},
pattern: "*{.vcf.gz,tbi,json}",
mode: params.publish_dir_mode
]
Expand All @@ -266,7 +266,7 @@ process {
]
}
withName: PLOTS {
ext.prefix = {"${meta.benchmark_tool}"}
ext.prefix = {"${meta.benchmark_tool}_mqc"}
publishDir = [
path: {"${params.outdir}/${params.variant_type}/summary/plots/${meta.benchmark_tool}"},
pattern: "*{png}",
Expand Down
2 changes: 1 addition & 1 deletion conf/tests/germline_sv.config
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ params {
analysis = 'germline'
variant_type = "structural"
method = 'svanalyzer,wittyer,truvari'
preprocess = "normalization,deduplication"
preprocess = "normalization,deduplication,filter_contigs"
sv_standardization = "svync,homogenize"
min_sv_size = 30
truth_id = "HG002"
Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
},
"multiqc": {
"branch": "master",
"git_sha": "b8d36829fa84b6e404364abff787e8b07f6d058c",
"git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d",
"installed_by": ["modules"]
},
"picard/createsequencedictionary": {
Expand Down
4 changes: 2 additions & 2 deletions modules/local/plots/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ process PLOTS {
tuple val(meta), path(summary)

output:
tuple val(meta),path("*.png"), emit: plots
path "versions.yml" , emit: versions
path("*.png") , emit: plots
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/multiqc/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 7 additions & 9 deletions subworkflows/local/cnv_germline_benchmark.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,30 @@ include { TABIX_BGZIP as TABIX_BGZIP_TRUTH } from '../../modules/nf-core/tabix/b
workflow CNV_GERMLINE_BENCHMARK {
take:
input_ch // channel: [val(meta),test_vcf,test_index,truth_vcf,truth_index, bed]
fasta // reference channel [val(meta), ref.fa]
fai // reference channel [val(meta), ref.fa.fai]

main:

versions = Channel.empty()
versions = Channel.empty()
summary_reports = Channel.empty()

// CNV benchmarking is only possible with wittyer now!

TABIX_BGZIP_QUERY(
input_ch.map{ meta, vcf, tbi, truth_vcf, truth_tbi, bed ->
input_ch.map{ meta, vcf, _tbi, _truth_vcf, _truth_tbi, _bed ->
[ meta, vcf ]
}
)
versions = versions.mix(TABIX_BGZIP_QUERY.out.versions.first())

TABIX_BGZIP_TRUTH(
input_ch.map{ meta, vcf, tbi, truth_vcf, truth_tbi, bed ->
input_ch.map{ meta, _vcf, _tbi, truth_vcf, _truth_tbi, _bed ->
[ meta, truth_vcf ]
}
)
versions = versions.mix(TABIX_BGZIP_TRUTH.out.versions.first())

input_ch.map{ meta, vcf, tbi, truth_vcf, truth_tbi, bed ->
[ meta, bed ]
input_ch.map{ meta, _vcf, _tbi, _truth_vcf, _truth_tbi, bedfile ->
[ meta, bedfile ]
}
.set { bed }

Expand All @@ -56,6 +54,6 @@ workflow CNV_GERMLINE_BENCHMARK {


emit:
summary_reports
versions
summary_reports // channel: [val(meta), summary]
versions // channel: [versions.yml]
}
5 changes: 2 additions & 3 deletions subworkflows/local/compare_benchmark_results.nf
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,13 @@ workflow COMPARE_BENCHMARK_RESULTS {
merged_vcfs = merged_vcfs.mix(SURVIVOR_MERGE.out.vcf)
}


// convert vcf files to csv
VCF_TO_CSV(
merged_vcfs
)
versions = versions.mix(VCF_TO_CSV.out.versions.first())

emit:
versions
merged_vcfs
merged_vcfs // channel: [val(meta), vcf]
versions // channel: [versions.yml]
}
5 changes: 2 additions & 3 deletions subworkflows/local/liftover_vcfs_truth.nf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ workflow LIFTOVER_VCFS_TRUTH {
// liftover high confidence file if given
UCSC_LIFTOVER(
high_conf_ch.map{file -> tuple([id: params.truth_id], file)},
chain.map{meta, file -> file}
chain.map{_meta, file -> file}
)
versions = versions.mix(UCSC_LIFTOVER.out.versions.first())

Expand All @@ -73,11 +73,10 @@ workflow LIFTOVER_VCFS_TRUTH {
SORT_BED.out.bed
)
versions = versions.mix(BEDTOOLS_MERGE.out.versions.first())

bed_ch = BEDTOOLS_MERGE.out.bed

emit:
vcf_ch // channel: [val(meta), vcf.gz]
bed_ch // channel: [val(meta), bed]
versions
versions // channel: [versions.yml]
}
21 changes: 5 additions & 16 deletions subworkflows/local/prepare_vcfs_test.nf
Original file line number Diff line number Diff line change
Expand Up @@ -87,29 +87,18 @@ workflow PREPARE_VCFS_TEST {

if (params.analysis.contains("somatic")){

// somatic spesific preperations
//vcf_ch.branch{
// def meta = it[0]
// small: meta.vartype == "small"
// other: true
// }
// .set{vcf}

// somatic specific preparations
if (params.variant_type == "small"){
out_vcf_ch = Channel.empty()

SPLIT_SMALL_VARIANTS_TEST(
vcf.small
vcf_ch
)
versions = versions.mix(SPLIT_SMALL_VARIANTS_TEST.out.versions.first())
out_vcf_ch = out_vcf_ch.mix(SPLIT_SMALL_VARIANTS_TEST.out.out_vcf_ch,
vcf.other)
vcf_ch = out_vcf_ch
vcf_ch = SPLIT_SMALL_VARIANTS_TEST.out.out_vcf_ch
}

}

emit:
vcf_ch
versions
vcf_ch // channel: [val(meta), vcf.gz, tbi]
versions // channel: [versions.yml]
}
10 changes: 5 additions & 5 deletions subworkflows/local/prepare_vcfs_truth.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ workflow PREPARE_VCFS_TRUTH {
)
versions = versions.mix(LIFTOVER_VCFS_TRUTH.out.versions.first())
truth_ch = LIFTOVER_VCFS_TRUTH.out.vcf_ch
high_conf_ch = LIFTOVER_VCFS_TRUTH.out.bed_ch.map{ meta, bed -> [bed]}
high_conf_ch = LIFTOVER_VCFS_TRUTH.out.bed_ch.map{ _meta, bed -> [bed]}
}

// Reheader sample name for truth file - using meta.caller
Expand All @@ -50,7 +50,7 @@ workflow PREPARE_VCFS_TRUTH {

if (params.preprocess.contains("normalization")){

// multi-allelic variants will be splitted.
// multi-allelic variants will be splitter.
BCFTOOLS_NORM(
vcf_ch,
fasta
Expand Down Expand Up @@ -78,7 +78,7 @@ workflow PREPARE_VCFS_TRUTH {
}

emit:
vcf_ch
high_conf_ch
versions
vcf_ch // channel: [val(meta), vcf, tbi]
high_conf_ch // channel: [val(meta), bed]
versions // channel: [versions.yml]
}
16 changes: 9 additions & 7 deletions subworkflows/local/report_benchmark_statistics.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

include { MERGE_REPORTS } from '../../modules/local/merge_reports'
include { PLOTS } from '../../modules/local/plots'
include { CREATE_DATAVZRD_INPUT } from '../../modules/local/create_datavzrd_input' addParams( options: params.options )
include { DATAVZRD } from '../../modules/nf-core/datavzrd' addParams( options: params.options )
include { CREATE_DATAVZRD_INPUT } from '../../modules/local/create_datavzrd_input'
include { DATAVZRD } from '../../modules/nf-core/datavzrd'

workflow REPORT_BENCHMARK_STATISTICS {
take:
Expand All @@ -14,6 +14,8 @@ workflow REPORT_BENCHMARK_STATISTICS {
main:

versions = Channel.empty()
ch_plots = Channel.empty()

// merge summary statistics from the same benchmarking tool
MERGE_REPORTS(
reports
Expand All @@ -24,6 +26,7 @@ workflow REPORT_BENCHMARK_STATISTICS {
PLOTS(
MERGE_REPORTS.out.summary
)
ch_plots = ch_plots.mix(PLOTS.out.plots.flatten())
versions = versions.mix(PLOTS.out.versions.first())

MERGE_REPORTS.out.summary
Expand All @@ -32,8 +35,8 @@ workflow REPORT_BENCHMARK_STATISTICS {

// add path to csv file to the datavzrd input
summary
.map { meta, summary ->
[ meta, summary, file("${projectDir}/assets/datavzrd/${meta.id}.datavzrd.template.yaml", checkIfExists:true) ]
.map { meta, summary_file ->
[ meta, summary_file, file("${projectDir}/assets/datavzrd/${meta.id}.datavzrd.template.yaml", checkIfExists:true) ]
}
.set {template}

Expand All @@ -48,8 +51,7 @@ workflow REPORT_BENCHMARK_STATISTICS {
)
versions = versions.mix(DATAVZRD.out.versions.first())

datavzrd_report = DATAVZRD.out.report

emit:
versions
versions // channel: [versions.yml]
ch_plots // channel: [plots.png]
}
24 changes: 8 additions & 16 deletions subworkflows/local/report_vcf_statistics.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,20 @@ workflow REPORT_VCF_STATISTICS {

main:

versions = Channel.empty()
survivor_stats = Channel.empty()

//input_ch.branch{
// def meta = it[0]
// sv: meta.vartype == "sv" || meta.vartype == "cnv"
// other: true
// }
// .set{input}
versions = Channel.empty()
ch_stats = Channel.empty()

if (params.variant_type == "structural"){
// use survivor stats to get SV statistics by TYPE
SURVIVOR_STATS(
input_ch.map{ meta, vcf, tbi ->
input_ch.map{ meta, vcf, _tbi ->
[ meta, vcf ]
},
-1,
-1,
-1
)
survivor_stats = SURVIVOR_STATS.out.stats
ch_stats = ch_stats.mix(SURVIVOR_STATS.out.stats.map{_meta, stats -> stats})
versions = versions.mix(SURVIVOR_STATS.out.versions.first())
}

Expand All @@ -45,13 +38,12 @@ workflow REPORT_VCF_STATISTICS {
[[],[]],
[[],[]]
)
bcftools_stats = BCFTOOLS_STATS.out.stats
ch_stats = ch_stats.mix(BCFTOOLS_STATS.out.stats.map{_meta, stats -> stats})
versions = versions.mix(BCFTOOLS_STATS.out.versions.first())

// Add here a tool, to visualize SV statistics in a histogram.
// TODO: Add here a tool, to visualize SV statistics in a histogram.

emit:
bcftools_stats
survivor_stats
versions
ch_stats // channel: [stats]
versions // channel: [versions.yml]
}
Loading