From 823ceceb0e3b4394aa5dd531f53451e6265d0ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Tue, 3 Dec 2024 11:14:30 +0000 Subject: [PATCH 01/10] remove failing truth.md links --- docs/truth.md | 2 -- docs/usage.md | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/truth.md b/docs/truth.md index 0222fb2..cec7073 100644 --- a/docs/truth.md +++ b/docs/truth.md @@ -1,7 +1,5 @@ # nf-core/variantbenchmarking: Truth files -## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/variantbenchmarking/truth](https://nf-co.re/variantbenchmarking/truth) - ## Defining Truth VCF and High confidence BED files This pipeline requires a set of Truth VCF, as a baseline for comparisons, and a high confidence bed files, to restrict analysis to regions. Although, those sets can be anything depending on the type of the analysis, for benchmarking of human genomes there are golden set of samples provided by [Genome in a Bottle project](https://www.nist.gov/programs-projects/genome-bottle) and [SEQC2 consortium](https://sites.google.com/view/seqc2/home/data-analysis/high-confidence-somatic-snv-and-indel-v1-2). diff --git a/docs/usage.md b/docs/usage.md index b7c6047..e56db2c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -41,7 +41,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p ## Truth samples -Please find the detailed information about truth samples [here](https://nf-co.re/variantbenchmarking/truth). +Please find the detailed information about truth samples [here](/workspace/variantbenchmarking/docs/truth.md). ## Lifting over truth sets From 4c770b236b4bafa934482aecf81024895cfdbe66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Tue, 3 Dec 2024 16:21:18 +0000 Subject: [PATCH 02/10] add liftover option for test vcfs --- assets/schema_input.json | 6 + ...tover_hg37.config => liftover_test.config} | 7 +- ...over_hg38.config => liftover_truth.config} | 2 +- nextflow.config | 6 +- nextflow_schema.json | 13 +- ...iftover_vcfs_truth.nf => liftover_vcfs.nf} | 14 +- subworkflows/local/prepare_vcfs_test.nf | 38 ++- subworkflows/local/prepare_vcfs_truth.nf | 12 +- tests/liftover_test.nf.test | 40 +++ tests/liftover_test.nf.test.snap | 208 ++++++++++++++ ...er_hg38.nf.test => liftover_truth.nf.test} | 6 +- ....test.snap => liftover_truth.nf.test.snap} | 267 +----------------- workflows/variantbenchmarking.nf | 29 +- 13 files changed, 341 insertions(+), 307 deletions(-) rename conf/tests/{liftover_hg37.config => liftover_test.config} (79%) rename conf/tests/{liftover_hg38.config => liftover_truth.config} (98%) rename subworkflows/local/{liftover_vcfs_truth.nf => liftover_vcfs.nf} (87%) create mode 100644 tests/liftover_test.nf.test create mode 100644 tests/liftover_test.nf.test.snap rename tests/{liftover_hg38.nf.test => liftover_truth.nf.test} (87%) rename tests/{liftover_hg38.nf.test.snap => liftover_truth.nf.test.snap} (59%) diff --git a/assets/schema_input.json b/assets/schema_input.json index 7967366..c6a6cf1 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -154,6 +154,12 @@ "enum": ["sc", "cts", "d"], "minLength": 1, "default": null + }, + "liftover": { + "type": "boolean", + "description": "Liftover option for test vcfs, to activate add params.liftover='test' ", + "meta": ["liftover"], + "default": false } }, "required": ["test_vcf", "caller", "id"] diff --git a/conf/tests/liftover_hg37.config b/conf/tests/liftover_test.config similarity index 79% rename from conf/tests/liftover_hg37.config rename to conf/tests/liftover_test.config index e7d88bb..f3878e2 100644 --- a/conf/tests/liftover_hg37.config +++ b/conf/tests/liftover_test.config @@ -20,7 +20,7 @@ params { max_time = '8.h' // Input data - input = 'https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/samplesheet_sv_hg37.csv' + input = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/samplesheet_sv_liftover.csv" outdir = 'results' genome = 'GRCh37' @@ -28,14 +28,15 @@ params { analysis = 'germline' variant_type = "structural" method = 'truvari' - preprocess = "normalization,deduplication,filter_contigs" + preprocess = "filter_contigs" min_sv_size = 30 truth_id = "HG002" truth_vcf = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz" + regions_bed = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed" //liftover files - liftover = true + liftover = "test" chain = "http://ftp.ensembl.org/pub/assembly_mapping/homo_sapiens/GRCh38_to_GRCh37.chain.gz" rename_chr = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch38_grch37.txt" } diff --git a/conf/tests/liftover_hg38.config b/conf/tests/liftover_truth.config similarity index 98% rename from conf/tests/liftover_hg38.config rename to conf/tests/liftover_truth.config index ae8a2ba..3ab7d01 100644 --- a/conf/tests/liftover_hg38.config +++ b/conf/tests/liftover_truth.config @@ -35,7 +35,7 @@ params { regions_bed = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.bed" //liftover files - liftover = true + liftover = "truth" chain = "https://raw.githubusercontent.com/broadinstitute/gatk/master/scripts/funcotator/data_sources/gnomAD/b37ToHg38.over.chain" rename_chr = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch37_grch38.txt" } diff --git a/nextflow.config b/nextflow.config index c655a87..f1e969b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -48,7 +48,7 @@ params { dictionary = null rename_chr = null chain = null - liftover = false + liftover = "" // MultiQC options multiqc_config = null @@ -193,8 +193,8 @@ profiles { somatic_snv { includeConfig 'conf/tests/somatic_snv.config' } somatic_indel { includeConfig 'conf/tests/somatic_indel.config' } somatic_sv { includeConfig 'conf/tests/somatic_sv.config' } - liftover_hg37 { includeConfig 'conf/tests/liftover_hg37.config' } - liftover_hg38 { includeConfig 'conf/tests/liftover_hg38.config' } + liftover_test { includeConfig 'conf/tests/liftover_test.config' } + liftover_truth { includeConfig 'conf/tests/liftover_truth.config' } } diff --git a/nextflow_schema.json b/nextflow_schema.json index c9d2a76..517cbcd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -31,12 +31,14 @@ }, "truth_id": { "type": "string", + "default": "", "description": "Truth id, sample name to define truth vcf", "fa_icon": "fas fa-folder-open", "errorMessage": "The sample name of the truth case. Examples: HG002, SEQC2, HG001, HG003, CHM13" }, "analysis": { "type": "string", + "default": "", "description": "The analysis type used by the input files", "enum": ["germline", "somatic"], "pattern": "(germline|somatic)", @@ -45,6 +47,7 @@ }, "variant_type": { "type": "string", + "default": "", "description": "Variant types to benchmark", "errorMessage": "Select a variant type to make the analysis: small,snv,indel,structural or copynumber. Select small when your vcf contains both snvs and indels", "enum": ["small", "snv", "indel", "structural", "copynumber"], @@ -53,6 +56,7 @@ }, "method": { "type": "string", + "default": "truvari,svanalyzer,happy,sompy,rtgtools,wittyer", "description": "The benchmarking methods to use. Should be a comma-separate list of one or more of the following options: truvari, svanalyzer, happy, sompy, rtgtools, wittyer", "errorMessage": "A wrong input has been detected. Should be a comma-separated list of one or more of the following options: truvari, svanalyzer, happy, sompy, rtgtools, wittyer", "pattern": "^((truvari|svanalyzer|happy|sompy|rtgtools|wittyer)?,?)*(? tuple([id: params.truth_id], file)}, + ch_bed.map{file -> tuple([id: params.truth_id], file)}, chain.map{_meta, file -> file} ) versions = versions.mix(UCSC_LIFTOVER.out.versions.first()) diff --git a/subworkflows/local/prepare_vcfs_test.nf b/subworkflows/local/prepare_vcfs_test.nf index fdf2c32..69e0ae0 100644 --- a/subworkflows/local/prepare_vcfs_test.nf +++ b/subworkflows/local/prepare_vcfs_test.nf @@ -2,13 +2,14 @@ // PREPARE_VCFS: SUBWORKFLOW TO PREPARE INPUT VCFS // -include { VCF_REHEADER_SAMPLENAME } from '../local/vcf_reheader_samplename' -include { VCF_VARIANT_DEDUPLICATION } from '../local/vcf_variant_deduplication' -include { VCF_VARIANT_FILTERING } from '../local/vcf_variant_filtering' -include { SPLIT_SMALL_VARIANTS_TEST } from '../local/split_small_variants_test' -include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm' -include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix' -include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix' +include { VCF_REHEADER_SAMPLENAME } from '../local/vcf_reheader_samplename' +include { VCF_VARIANT_DEDUPLICATION } from '../local/vcf_variant_deduplication' +include { VCF_VARIANT_FILTERING } from '../local/vcf_variant_filtering' +include { SPLIT_SMALL_VARIANTS_TEST } from '../local/split_small_variants_test' +include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm' +include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix' +include { LIFTOVER_VCFS } from '../local/liftover_vcfs' include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_CONTIGS } from '../../modules/nf-core/bcftools/view' @@ -17,14 +18,35 @@ workflow PREPARE_VCFS_TEST { test_ch // channel: [val(meta), vcf] fasta // reference channel [val(meta), ref.fa] fai // reference channel [val(meta), ref.fa.fai] + chain // reference channel [val(meta), chain.gz] + rename_chr // reference channel [val(meta), chrlist.txt] + dictionary // reference channel [val(meta), genome.dict] main: versions = Channel.empty() + test_ch.branch{ + def meta = it[0] + liftover: meta.liftover + other: true}.set{vcf} + + vcf_ch = Channel.empty() + + LIFTOVER_VCFS( + vcf.liftover, + Channel.empty(), + fasta, + chain, + rename_chr, + dictionary + ) + versions = versions.mix(LIFTOVER_VCFS.out.versions.first()) + vcf_ch = vcf_ch.mix(LIFTOVER_VCFS.out.vcf_ch,vcf.other) + // Add "query" to test sample VCF_REHEADER_SAMPLENAME( - test_ch, + vcf_ch, fai ) versions = versions.mix(VCF_REHEADER_SAMPLENAME.out.versions.first()) diff --git a/subworkflows/local/prepare_vcfs_truth.nf b/subworkflows/local/prepare_vcfs_truth.nf index 1f6b372..1548741 100644 --- a/subworkflows/local/prepare_vcfs_truth.nf +++ b/subworkflows/local/prepare_vcfs_truth.nf @@ -7,7 +7,7 @@ include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix' include { VCF_REHEADER_SAMPLENAME } from '../local/vcf_reheader_samplename' include { VCF_VARIANT_DEDUPLICATION } from '../local/vcf_variant_deduplication' -include { LIFTOVER_VCFS_TRUTH } from '../local/liftover_vcfs_truth' +include { LIFTOVER_VCFS } from '../local/liftover_vcfs' workflow PREPARE_VCFS_TRUTH { @@ -25,9 +25,9 @@ workflow PREPARE_VCFS_TRUTH { versions = Channel.empty() // if liftover option is set convert truth files - if (params.liftover){ + if (params.liftover.contains("truth")){ - LIFTOVER_VCFS_TRUTH( + LIFTOVER_VCFS( truth_ch, high_conf_ch, fasta, @@ -35,9 +35,9 @@ workflow PREPARE_VCFS_TRUTH { rename_chr, dictionary ) - versions = versions.mix(LIFTOVER_VCFS_TRUTH.out.versions.first()) - truth_ch = LIFTOVER_VCFS_TRUTH.out.vcf_ch - high_conf_ch = LIFTOVER_VCFS_TRUTH.out.bed_ch.map{ _meta, bed -> [bed]} + versions = versions.mix(LIFTOVER_VCFS.out.versions.first()) + truth_ch = LIFTOVER_VCFS.out.vcf_ch + high_conf_ch = LIFTOVER_VCFS.out.bed_ch.map{ _meta, bed -> [bed]} } // Reheader sample name for truth file - using meta.caller diff --git a/tests/liftover_test.nf.test b/tests/liftover_test.nf.test new file mode 100644 index 0000000..b049a2b --- /dev/null +++ b/tests/liftover_test.nf.test @@ -0,0 +1,40 @@ +nextflow_pipeline { + + name "Test pipeline for liftover structural germline variants, tested benchmarking methods are truvari" + script "../main.nf" + tag "pipeline" + tag "structural" + tag "germline" + tag "liftover" + tag "hg37" + config "../conf/tests/liftover_test.config" + + test("Params: --analysis 'germline' --variant_type 'structural' --method 'truvari' --liftover 'test'") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success }, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/liftover_test.nf.test.snap b/tests/liftover_test.nf.test.snap new file mode 100644 index 0000000..7dfaaf1 --- /dev/null +++ b/tests/liftover_test.nf.test.snap @@ -0,0 +1,208 @@ +{ + "Params: --analysis 'germline' --variant_type 'structural' --method 'truvari' --liftover 'test'": { + "content": [ + 67, + { + "BCFTOOLS_REHEADER": { + "bcftools": 1.18 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.18 + }, + "BCFTOOLS_VIEW_CONTIGS": { + "bcftools": 1.18 + }, + "BGZIP_TABIX": { + "tabix": 1.12 + }, + "DATAVZRD": { + "datavzrd": "2.36.12" + }, + "MERGE_REPORTS": { + "python": "3.8.6" + }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.2.0-1-g3948afb6b" + }, + "PLOTS": { + "r-base": "4.3.1" + }, + "SURVIVOR_MERGE": { + "survivor": "1.0.7" + }, + "SURVIVOR_STATS": { + "survivor": "1.0.7" + }, + "TABIX_BGZIP": { + "tabix": "1.19.1" + }, + "TABIX_BGZIPTABIX": { + "tabix": "1.19.1" + }, + "TABIX_TABIX": { + "tabix": "1.19.1" + }, + "TRUVARI_BENCH": { + "truvari": "4.1.0)" + }, + "VCF_TO_CSV": { + "python": "3.12.4" + }, + "Workflow": { + "nf-core/variantbenchmarking": "v1.0dev" + } + }, + [ + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/dictionary", + "references/dictionary/genome.dict", + "structural", + "structural/HG002", + "structural/HG002/liftover", + "structural/HG002/liftover/test2.renamechr.vcf.gz", + "structural/HG002/stats", + "structural/HG002/stats/bcftools", + "structural/HG002/stats/bcftools/HG002.bcftools_stats.txt", + "structural/HG002/stats/survivor", + "structural/HG002/stats/survivor/HG002_mqc.stats", + "structural/multiqc", + "structural/multiqc/multiqc_data", + "structural/multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "structural/multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "structural/multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "structural/multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "structural/multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "structural/multiqc/multiqc_data/multiqc.log", + "structural/multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "structural/multiqc/multiqc_data/multiqc_citations.txt", + "structural/multiqc/multiqc_data/multiqc_data.json", + "structural/multiqc/multiqc_data/multiqc_general_stats.txt", + "structural/multiqc/multiqc_data/multiqc_software_versions.txt", + "structural/multiqc/multiqc_data/multiqc_sources.txt", + "structural/multiqc/multiqc_data/multiqc_survivor.txt", + "structural/multiqc/multiqc_plots", + "structural/multiqc/multiqc_plots/pdf", + "structural/multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths.pdf", + "structural/multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "structural/multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "structural/multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "structural/multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "structural/multiqc/multiqc_plots/pdf/general_stats_table.pdf", + "structural/multiqc/multiqc_plots/pdf/survivor-cnt.pdf", + "structural/multiqc/multiqc_plots/pdf/survivor-pct.pdf", + "structural/multiqc/multiqc_plots/png", + "structural/multiqc/multiqc_plots/png/bcftools_stats_indel-lengths.png", + "structural/multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "structural/multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "structural/multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "structural/multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "structural/multiqc/multiqc_plots/png/general_stats_table.png", + "structural/multiqc/multiqc_plots/png/survivor-cnt.png", + "structural/multiqc/multiqc_plots/png/survivor-pct.png", + "structural/multiqc/multiqc_plots/svg", + "structural/multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths.svg", + "structural/multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "structural/multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "structural/multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "structural/multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "structural/multiqc/multiqc_plots/svg/general_stats_table.svg", + "structural/multiqc/multiqc_plots/svg/survivor-cnt.svg", + "structural/multiqc/multiqc_plots/svg/survivor-pct.svg", + "structural/multiqc/multiqc_report.html", + "structural/summary", + "structural/summary/comparisons", + "structural/summary/comparisons/truvari.FN.csv", + "structural/summary/comparisons/truvari.FP.csv", + "structural/summary/comparisons/truvari.TP_base.csv", + "structural/summary/comparisons/truvari.TP_comp.csv", + "structural/summary/datavzrd", + "structural/summary/datavzrd/truvari", + "structural/summary/datavzrd/truvari/index.html", + "structural/summary/datavzrd/truvari/static", + "structural/summary/datavzrd/truvari/static/bootstrap-select.min.css", + "structural/summary/datavzrd/truvari/static/bootstrap-table-fixed-columns.min.css", + "structural/summary/datavzrd/truvari/static/bootstrap-table.min.css", + "structural/summary/datavzrd/truvari/static/bootstrap.min.css", + "structural/summary/datavzrd/truvari/static/bundle.js", + "structural/summary/datavzrd/truvari/static/datavzrd.css", + "structural/summary/datavzrd/truvari/test", + "structural/summary/datavzrd/truvari/test/config.js", + "structural/summary/datavzrd/truvari/test/data", + "structural/summary/datavzrd/truvari/test/data/data_1.js", + "structural/summary/datavzrd/truvari/test/functions.js", + "structural/summary/datavzrd/truvari/test/heatmap.js", + "structural/summary/datavzrd/truvari/test/index_1.html", + "structural/summary/datavzrd/truvari/test/plots", + "structural/summary/datavzrd/truvari/test/plots/plot_0.js", + "structural/summary/datavzrd/truvari/test/plots/plot_1.js", + "structural/summary/datavzrd/truvari/test/plots/plot_2.js", + "structural/summary/datavzrd/truvari/test/plots/plot_3.js", + "structural/summary/datavzrd/truvari/test/plots/plot_4.js", + "structural/summary/datavzrd/truvari/test/plots/plot_5.js", + "structural/summary/datavzrd/truvari/test/plots/plot_6.js", + "structural/summary/datavzrd/truvari/test/plots/plot_7.js", + "structural/summary/datavzrd/versions.yml", + "structural/summary/plots", + "structural/summary/plots/truvari", + "structural/summary/plots/truvari/metric_by_tool_truvari_mqc.png", + "structural/summary/plots/truvari/variants_by_tool_truvari_mqc.png", + "structural/summary/tables", + "structural/summary/tables/truvari", + "structural/summary/tables/truvari/truvari.summary.csv", + "structural/test1", + "structural/test1/benchmarks", + "structural/test1/benchmarks/truvari", + "structural/test1/benchmarks/truvari/test1.HG002.delly.fn.vcf.gz", + "structural/test1/benchmarks/truvari/test1.HG002.delly.fn.vcf.gz.tbi", + "structural/test1/benchmarks/truvari/test1.HG002.delly.fp.vcf.gz", + "structural/test1/benchmarks/truvari/test1.HG002.delly.fp.vcf.gz.tbi", + "structural/test1/benchmarks/truvari/test1.HG002.delly.summary.json", + "structural/test1/benchmarks/truvari/test1.HG002.delly.tp-base.vcf.gz", + "structural/test1/benchmarks/truvari/test1.HG002.delly.tp-base.vcf.gz.tbi", + "structural/test1/benchmarks/truvari/test1.HG002.delly.tp-comp.vcf.gz", + "structural/test1/benchmarks/truvari/test1.HG002.delly.tp-comp.vcf.gz.tbi", + "structural/test1/preprocess", + "structural/test1/preprocess/test1.filter.vcf", + "structural/test1/stats", + "structural/test1/stats/bcftools", + "structural/test1/stats/bcftools/test1.delly.bcftools_stats.txt", + "structural/test1/stats/survivor", + "structural/test1/stats/survivor/test1.delly_mqc.stats", + "structural/test2", + "structural/test2/benchmarks", + "structural/test2/benchmarks/truvari", + "structural/test2/benchmarks/truvari/test2.HG002.manta.fn.vcf.gz", + "structural/test2/benchmarks/truvari/test2.HG002.manta.fn.vcf.gz.tbi", + "structural/test2/benchmarks/truvari/test2.HG002.manta.fp.vcf.gz", + "structural/test2/benchmarks/truvari/test2.HG002.manta.fp.vcf.gz.tbi", + "structural/test2/benchmarks/truvari/test2.HG002.manta.summary.json", + "structural/test2/benchmarks/truvari/test2.HG002.manta.tp-base.vcf.gz", + "structural/test2/benchmarks/truvari/test2.HG002.manta.tp-base.vcf.gz.tbi", + "structural/test2/benchmarks/truvari/test2.HG002.manta.tp-comp.vcf.gz", + "structural/test2/benchmarks/truvari/test2.HG002.manta.tp-comp.vcf.gz.tbi", + "structural/test2/preprocess", + "structural/test2/preprocess/test2.filter.vcf", + "structural/test2/stats", + "structural/test2/stats/bcftools", + "structural/test2/stats/bcftools/test2.manta.bcftools_stats.txt", + "structural/test2/stats/survivor", + "structural/test2/stats/survivor/test2.manta_mqc.stats" + ], + [ + "HG002.bcftools_stats.txt:md5,7c007a87b5730787e570712e784d3cc3", + "HG002_mqc.stats:md5,68681df47b35e3193be03610f5c6e3d6", + "test1.delly.bcftools_stats.txt:md5,d32a9bea755d2065e19e88b9159e2502", + "test1.delly_mqc.stats:md5,e140ad55975c767578b0dd6aff58ba29", + "test2.manta.bcftools_stats.txt:md5,99f8a0b12efa30c6253b44ebfeb9b1d6", + "test2.manta_mqc.stats:md5,1d74b41c6b970992e3a39682b0a68e23" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-03T15:52:10.533611573" + } +} \ No newline at end of file diff --git a/tests/liftover_hg38.nf.test b/tests/liftover_truth.nf.test similarity index 87% rename from tests/liftover_hg38.nf.test rename to tests/liftover_truth.nf.test index 00d46af..f3e94ce 100644 --- a/tests/liftover_hg38.nf.test +++ b/tests/liftover_truth.nf.test @@ -1,15 +1,15 @@ nextflow_pipeline { - name "Test pipeline for small germline variants, tested benchmarking methods are happy and rtgtools" + name "Test pipeline for liftover small germline variants, tested benchmarking methods are happy and rtgtools" script "../main.nf" tag "pipeline" tag "small" tag "germline" tag "liftover" tag "hg38" - config "../conf/tests/liftover_hg38.config" + config "../conf/tests/liftover_truth.config" - test("Params: --analysis 'germline' --variant_type 'small' --method 'happy,rtgtools' --liftover") { + test("Params: --analysis 'germline' --variant_type 'small' --method 'happy,rtgtools' --liftover 'truth'") { when { params { diff --git a/tests/liftover_hg38.nf.test.snap b/tests/liftover_truth.nf.test.snap similarity index 59% rename from tests/liftover_hg38.nf.test.snap rename to tests/liftover_truth.nf.test.snap index 55af699..fc319d7 100644 --- a/tests/liftover_hg38.nf.test.snap +++ b/tests/liftover_truth.nf.test.snap @@ -1,7 +1,7 @@ { - "Params: --analysis 'germline' --variant_type 'small' --method 'happy,rtgtools' --liftover": { + "Params: --analysis 'germline' --variant_type 'small' --method 'happy,rtgtools' --liftover 'truth'": { "content": [ - 85, + 86, { "BCFTOOLS_DEDUP": { "bcftools": 1.18 @@ -33,6 +33,9 @@ "MERGE_REPORTS": { "python": "3.8.6" }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.2.0-1-g3948afb6b" + }, "PLOTS": { "r-base": "4.3.1" }, @@ -351,264 +354,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-11-29T09:23:31.648065377" - }, - "-stub": { - "content": [ - 85, - { - "BCFTOOLS_DEDUP": { - "bcftools": 1.18 - }, - "BCFTOOLS_MERGE": { - "bcftools": 1.2 - }, - "BCFTOOLS_NORM": { - "bcftools": 1.18 - }, - "BCFTOOLS_REHEADER": { - "bcftools": 1.18 - }, - "BCFTOOLS_SORT": { - "bcftools": 1.18 - }, - "BCFTOOLS_STATS": { - "bcftools": 1.18 - }, - "BCFTOOLS_VIEW_CONTIGS": { - "bcftools": 1.18 - }, - "DATAVZRD": { - "datavzrd": "2.36.12" - }, - "HAPPY_HAPPY": { - "hap.py": "0.3.14" - }, - "MERGE_REPORTS": { - "python": "3.8.6" - }, - "PLOTS": { - "r-base": "4.3.1" - }, - "REFORMAT_HEADER": { - "tabix": 1.12 - }, - "RTGTOOLS_FORMAT": { - "rtg-tools": "3.12.1" - }, - "RTGTOOLS_VCFEVAL": { - "rtg-tools": "3.12.1" - }, - "TABIX_BGZIP": { - "tabix": "1.19.1" - }, - "TABIX_BGZIPTABIX": { - "tabix": "1.19.1" - }, - "TABIX_TABIX": { - "tabix": "1.19.1" - }, - "UCSC_LIFTOVER": { - "ucsc": 377 - }, - "VCF_TO_CSV": { - "python": "3.12.4" - }, - "Workflow": { - "nf-core/variantbenchmarking": "v1.0dev" - } - }, - [ - "pipeline_info", - "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", - "references", - "references/dictionary", - "references/dictionary/genome.dict", - "references/rtgtools", - "references/rtgtools/genome.sdf", - "small", - "small/HG002", - "small/HG002/liftover", - "small/HG002/liftover/HG002.renamechr.vcf.gz", - "small/HG002/liftover/HG002.sort.merged.bed", - "small/HG002/preprocess", - "small/HG002/preprocess/HG002.renamechr.rh.norm.dedup.sort.vcf.gz", - "small/HG002/stats", - "small/HG002/stats/bcftools", - "small/HG002/stats/bcftools/HG002.bcftools_stats.txt", - "small/multiqc", - "small/multiqc/multiqc_data", - "small/multiqc/multiqc_plots", - "small/multiqc/multiqc_report.html", - "small/summary", - "small/summary/comparisons", - "small/summary/comparisons/small", - "small/summary/comparisons/small/rtgtools.FN.csv", - "small/summary/comparisons/small/rtgtools.FP.csv", - "small/summary/comparisons/small/rtgtools.TP_base.csv", - "small/summary/comparisons/small/rtgtools.TP_comp.csv", - "small/summary/datavzrd", - "small/summary/datavzrd/happy", - "small/summary/datavzrd/happy/index.html", - "small/summary/datavzrd/happy/network", - "small/summary/datavzrd/happy/network/config.js", - "small/summary/datavzrd/happy/network/data", - "small/summary/datavzrd/happy/network/data/data_1.js", - "small/summary/datavzrd/happy/network/functions.js", - "small/summary/datavzrd/happy/network/heatmap.js", - "small/summary/datavzrd/happy/network/index_1.html", - "small/summary/datavzrd/happy/network/plots", - "small/summary/datavzrd/happy/network/plots/plot_0.js", - "small/summary/datavzrd/happy/static", - "small/summary/datavzrd/happy/static/bootstrap-select.min.css", - "small/summary/datavzrd/happy/static/bootstrap-table-fixed-columns.min.css", - "small/summary/datavzrd/happy/static/bootstrap-table.min.css", - "small/summary/datavzrd/happy/static/bootstrap.min.css", - "small/summary/datavzrd/happy/static/bundle.js", - "small/summary/datavzrd/happy/static/datavzrd.css", - "small/summary/datavzrd/rtgtools", - "small/summary/datavzrd/rtgtools/index.html", - "small/summary/datavzrd/rtgtools/network", - "small/summary/datavzrd/rtgtools/network/config.js", - "small/summary/datavzrd/rtgtools/network/data", - "small/summary/datavzrd/rtgtools/network/data/data_1.js", - "small/summary/datavzrd/rtgtools/network/functions.js", - "small/summary/datavzrd/rtgtools/network/heatmap.js", - "small/summary/datavzrd/rtgtools/network/index_1.html", - "small/summary/datavzrd/rtgtools/network/plots", - "small/summary/datavzrd/rtgtools/network/plots/plot_0.js", - "small/summary/datavzrd/rtgtools/static", - "small/summary/datavzrd/rtgtools/static/bootstrap-select.min.css", - "small/summary/datavzrd/rtgtools/static/bootstrap-table-fixed-columns.min.css", - "small/summary/datavzrd/rtgtools/static/bootstrap-table.min.css", - "small/summary/datavzrd/rtgtools/static/bootstrap.min.css", - "small/summary/datavzrd/rtgtools/static/bundle.js", - "small/summary/datavzrd/rtgtools/static/datavzrd.css", - "small/summary/datavzrd/versions.yml", - "small/summary/plots", - "small/summary/plots/happy", - "small/summary/plots/happy/metric_by_tool_happy.png", - "small/summary/plots/happy/variants_by_tool_happy.png", - "small/summary/plots/rtgtools", - "small/summary/plots/rtgtools/metric_by_tool_rtgtools.png", - "small/summary/plots/rtgtools/variants_by_tool_rtgtools.png", - "small/summary/tables", - "small/summary/tables/happy", - "small/summary/tables/happy/happy.regions.csv", - "small/summary/tables/happy/happy.summary.csv", - "small/summary/tables/rtgtools", - "small/summary/tables/rtgtools/rtgtools.regions.csv", - "small/summary/tables/rtgtools/rtgtools.summary.csv", - "small/test1", - "small/test1/benchmarks", - "small/test1/benchmarks/happy", - "small/test1/benchmarks/happy/test1.HG002.strelka.extended.csv", - "small/test1/benchmarks/happy/test1.HG002.strelka.metrics.json.gz", - "small/test1/benchmarks/happy/test1.HG002.strelka.roc.Locations.INDEL.PASS.csv.gz", - "small/test1/benchmarks/happy/test1.HG002.strelka.roc.Locations.INDEL.csv.gz", - "small/test1/benchmarks/happy/test1.HG002.strelka.roc.Locations.SNP.PASS.csv.gz", - "small/test1/benchmarks/happy/test1.HG002.strelka.roc.Locations.SNP.csv.gz", - "small/test1/benchmarks/happy/test1.HG002.strelka.roc.all.csv.gz", - "small/test1/benchmarks/happy/test1.HG002.strelka.runinfo.json", - "small/test1/benchmarks/happy/test1.HG002.strelka.summary.csv", - "small/test1/benchmarks/happy/test1.HG002.strelka.vcf.gz", - "small/test1/benchmarks/happy/test1.HG002.strelka.vcf.gz.tbi", - "small/test1/benchmarks/rtgtools", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.fn.vcf.gz", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.fn.vcf.gz.tbi", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.fp.vcf.gz", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.fp.vcf.gz.tbi", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.non_snp_roc.tsv.gz", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.phasing.txt", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.snp_roc.tsv.gz", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.summary.txt", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.tp-baseline.vcf.gz", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.tp-baseline.vcf.gz.tbi", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.tp.vcf.gz", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.tp.vcf.gz.tbi", - "small/test1/benchmarks/rtgtools/test1.HG002.strelka.weighted_roc.tsv.gz", - "small/test1/preprocess", - "small/test1/preprocess/test1.dedup.sort.vcf.gz", - "small/test1/stats", - "small/test1/stats/bcftools", - "small/test1/stats/bcftools/test1.strelka.bcftools_stats.txt", - "small/test2", - "small/test2/benchmarks", - "small/test2/benchmarks/happy", - "small/test2/benchmarks/happy/test2.HG002.bcftools.extended.csv", - "small/test2/benchmarks/happy/test2.HG002.bcftools.metrics.json.gz", - "small/test2/benchmarks/happy/test2.HG002.bcftools.roc.Locations.INDEL.PASS.csv.gz", - "small/test2/benchmarks/happy/test2.HG002.bcftools.roc.Locations.INDEL.csv.gz", - "small/test2/benchmarks/happy/test2.HG002.bcftools.roc.Locations.SNP.PASS.csv.gz", - "small/test2/benchmarks/happy/test2.HG002.bcftools.roc.Locations.SNP.csv.gz", - "small/test2/benchmarks/happy/test2.HG002.bcftools.roc.all.csv.gz", - "small/test2/benchmarks/happy/test2.HG002.bcftools.runinfo.json", - "small/test2/benchmarks/happy/test2.HG002.bcftools.summary.csv", - "small/test2/benchmarks/happy/test2.HG002.bcftools.vcf.gz", - "small/test2/benchmarks/happy/test2.HG002.bcftools.vcf.gz.tbi", - "small/test2/benchmarks/rtgtools", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.fn.vcf.gz", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.fn.vcf.gz.tbi", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.fp.vcf.gz", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.fp.vcf.gz.tbi", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.non_snp_roc.tsv.gz", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.phasing.txt", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.snp_roc.tsv.gz", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.summary.txt", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.tp-baseline.vcf.gz", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.tp-baseline.vcf.gz.tbi", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.tp.vcf.gz", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.tp.vcf.gz.tbi", - "small/test2/benchmarks/rtgtools/test2.HG002.bcftools.weighted_roc.tsv.gz", - "small/test2/preprocess", - "small/test2/preprocess/test2.dedup.sort.vcf.gz", - "small/test2/stats", - "small/test2/stats/bcftools", - "small/test2/stats/bcftools/test2.bcftools.bcftools_stats.txt" - ], - [ - "genome.sdf:md5,d41d8cd98f00b204e9800998ecf8427e", - "HG002.sort.merged.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "HG002.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "multiqc_plots:md5,d41d8cd98f00b204e9800998ecf8427e", - "config.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "data_1.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "functions.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "heatmap.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "index_1.html:md5,d41d8cd98f00b204e9800998ecf8427e", - "plot_0.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "config.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "data_1.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "functions.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "heatmap.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "index_1.html:md5,d41d8cd98f00b204e9800998ecf8427e", - "plot_0.js:md5,d41d8cd98f00b204e9800998ecf8427e", - "test1.HG002.strelka.extended.csv:md5,d41d8cd98f00b204e9800998ecf8427e", - "test1.HG002.strelka.roc.Locations.INDEL.PASS.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test1.HG002.strelka.roc.Locations.INDEL.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test1.HG002.strelka.roc.Locations.SNP.PASS.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test1.HG002.strelka.roc.Locations.SNP.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test1.HG002.strelka.roc.all.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test1.HG002.strelka.summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", - "test1.HG002.strelka.phasing.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "test1.HG002.strelka.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "test1.strelka.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "test2.HG002.bcftools.extended.csv:md5,d41d8cd98f00b204e9800998ecf8427e", - "test2.HG002.bcftools.roc.Locations.INDEL.PASS.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test2.HG002.bcftools.roc.Locations.INDEL.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test2.HG002.bcftools.roc.Locations.SNP.PASS.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test2.HG002.bcftools.roc.Locations.SNP.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test2.HG002.bcftools.roc.all.csv.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test2.HG002.bcftools.summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", - "test2.HG002.bcftools.phasing.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "test2.HG002.bcftools.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "test2.bcftools.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.0" - }, - "timestamp": "2024-11-12T15:39:05.089607077" + "timestamp": "2024-12-03T15:58:16.618582714" } } \ No newline at end of file diff --git a/workflows/variantbenchmarking.nf b/workflows/variantbenchmarking.nf index 0cd825e..2c78a31 100644 --- a/workflows/variantbenchmarking.nf +++ b/workflows/variantbenchmarking.nf @@ -67,20 +67,24 @@ workflow VARIANTBENCHMARKING { : Channel.empty() // read chain file, liftover genome and rename chr files if liftover is true - chain = Channel.empty() - rename_chr = Channel.empty() - dictionary = Channel.empty() - if (params.liftover){ - chain = params.chain ? Channel.fromPath(params.chain, checkIfExists: true).map{ bed -> tuple([id: bed.getSimpleName()], bed) }.collect() - : Channel.empty() - rename_chr = params.rename_chr ? Channel.fromPath(params.rename_chr, checkIfExists: true).map{ txt -> tuple([id: txt.getSimpleName()], txt) }.collect() - : Channel.empty() + if (params.chain && params.rename_chr){ + chain = Channel.fromPath(params.chain, checkIfExists: true).map{ bed -> tuple([id: bed.getSimpleName()], bed) }.collect() + rename_chr = Channel.fromPath(params.rename_chr, checkIfExists: true).map{ txt -> tuple([id: txt.getSimpleName()], txt) }.collect() + }else{ + log.error "Please specify params.chain and params.rename_chr to process liftover of the files" + exit 1 + } - dictionary = params.dictionary ? Channel.fromPath(params.dictionary, checkIfExists: true).map{ dict -> tuple([id: dict.getSimpleName()], dict) }.collect() - : Channel.empty() + // if dictinoary file is missing PICARD_CREATESEQUENCEDICTIONARY will create one + dictionary = params.dictionary ? Channel.fromPath(params.dictionary, checkIfExists: true).map{ dict -> tuple([id: dict.getSimpleName()], dict) }.collect() : Channel.empty() + }else{ + chain = Channel.empty() + rename_chr = Channel.empty() + dictionary = Channel.empty() } + // PREPROCESSES // subsample multisample vcf if necessary @@ -115,7 +119,10 @@ workflow VARIANTBENCHMARKING { PREPARE_VCFS_TEST( vcf_ch, fasta, - fai + fai, + chain, + rename_chr, + dictionary ) ch_versions = ch_versions.mix(PREPARE_VCFS_TEST.out.versions) From c687f926c7e05408c0cf78bb59e595e747f32956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Tue, 3 Dec 2024 16:28:15 +0000 Subject: [PATCH 03/10] edit default --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 517cbcd..ed1d723 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -38,7 +38,7 @@ }, "analysis": { "type": "string", - "default": "", + "default": "germline", "description": "The analysis type used by the input files", "enum": ["germline", "somatic"], "pattern": "(germline|somatic)", @@ -47,7 +47,7 @@ }, "variant_type": { "type": "string", - "default": "", + "default": "small", "description": "Variant types to benchmark", "errorMessage": "Select a variant type to make the analysis: small,snv,indel,structural or copynumber. Select small when your vcf contains both snvs and indels", "enum": ["small", "snv", "indel", "structural", "copynumber"], From 4c82c9ace2c9ebf1722adbc943d47b616f0327ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Tue, 3 Dec 2024 16:39:42 +0000 Subject: [PATCH 04/10] error handling with truth --- conf/tests/liftover_test.config | 2 +- nextflow_schema.json | 3 --- workflows/variantbenchmarking.nf | 10 ++++++++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/conf/tests/liftover_test.config b/conf/tests/liftover_test.config index f3878e2..937ea4f 100644 --- a/conf/tests/liftover_test.config +++ b/conf/tests/liftover_test.config @@ -32,7 +32,7 @@ params { min_sv_size = 30 truth_id = "HG002" - truth_vcf = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz" + //truth_vcf = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz" regions_bed = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed" //liftover files diff --git a/nextflow_schema.json b/nextflow_schema.json index ed1d723..446b0f0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -31,14 +31,12 @@ }, "truth_id": { "type": "string", - "default": "", "description": "Truth id, sample name to define truth vcf", "fa_icon": "fas fa-folder-open", "errorMessage": "The sample name of the truth case. Examples: HG002, SEQC2, HG001, HG003, CHM13" }, "analysis": { "type": "string", - "default": "germline", "description": "The analysis type used by the input files", "enum": ["germline", "somatic"], "pattern": "(germline|somatic)", @@ -47,7 +45,6 @@ }, "variant_type": { "type": "string", - "default": "small", "description": "Variant types to benchmark", "errorMessage": "Select a variant type to make the analysis: small,snv,indel,structural or copynumber. Select small when your vcf contains both snvs and indels", "enum": ["small", "snv", "indel", "structural", "copynumber"], diff --git a/workflows/variantbenchmarking.nf b/workflows/variantbenchmarking.nf index 2c78a31..aa23d3a 100644 --- a/workflows/variantbenchmarking.nf +++ b/workflows/variantbenchmarking.nf @@ -56,8 +56,14 @@ workflow VARIANTBENCHMARKING { //// check Truth Files //// - truth_ch = Channel.fromPath(params.truth_vcf, checkIfExists: true) - .map{ vcf -> tuple([id: params.truth_id, vartype:params.variant_type], vcf) }.collect() + if (params.truth_id && params.truth_ch){ + truth_ch = Channel.fromPath(params.truth_vcf, checkIfExists: true) + .map{ vcf -> tuple([id: params.truth_id, vartype:params.variant_type], vcf) }.collect() + }else{ + log.error "Please specify params.truth_id and params.truth_vcf to perform benchmarking analysis" + exit 1 + } + regions_bed_ch = params.regions_bed ? Channel.fromPath(params.regions_bed, checkIfExists: true).collect() : Channel.empty() From 2f14200c08fa60fc845f91591f9ffafd40b268f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Tue, 3 Dec 2024 16:40:51 +0000 Subject: [PATCH 05/10] chande default for methods --- nextflow_schema.json | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 446b0f0..fc4b2f4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -53,7 +53,6 @@ }, "method": { "type": "string", - "default": "truvari,svanalyzer,happy,sompy,rtgtools,wittyer", "description": "The benchmarking methods to use. Should be a comma-separate list of one or more of the following options: truvari, svanalyzer, happy, sompy, rtgtools, wittyer", "errorMessage": "A wrong input has been detected. Should be a comma-separated list of one or more of the following options: truvari, svanalyzer, happy, sompy, rtgtools, wittyer", "pattern": "^((truvari|svanalyzer|happy|sompy|rtgtools|wittyer)?,?)*(? Date: Tue, 3 Dec 2024 16:48:43 +0000 Subject: [PATCH 06/10] wrong decleration of params.truth_vcf --- workflows/variantbenchmarking.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/variantbenchmarking.nf b/workflows/variantbenchmarking.nf index aa23d3a..455dbbd 100644 --- a/workflows/variantbenchmarking.nf +++ b/workflows/variantbenchmarking.nf @@ -56,7 +56,7 @@ workflow VARIANTBENCHMARKING { //// check Truth Files //// - if (params.truth_id && params.truth_ch){ + if (params.truth_id && params.truth_vcf){ truth_ch = Channel.fromPath(params.truth_vcf, checkIfExists: true) .map{ vcf -> tuple([id: params.truth_id, vartype:params.variant_type], vcf) }.collect() }else{ From 57fd7c7423a11ddb066982b73b0f5d1ea0f3d621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Tue, 3 Dec 2024 17:15:28 +0000 Subject: [PATCH 07/10] fix --- conf/tests/liftover_test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/tests/liftover_test.config b/conf/tests/liftover_test.config index 937ea4f..f3878e2 100644 --- a/conf/tests/liftover_test.config +++ b/conf/tests/liftover_test.config @@ -32,7 +32,7 @@ params { min_sv_size = 30 truth_id = "HG002" - //truth_vcf = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz" + truth_vcf = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz" regions_bed = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed" //liftover files From 70214d23c361f05a6d284820538b5daac1bdd193 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Wed, 4 Dec 2024 10:24:48 +0000 Subject: [PATCH 08/10] renew snapshots --- tests/germline_small.nf.test.snap | 18 ++++++++++++++---- tests/germline_sv.nf.test.snap | 20 ++++++++++++++++---- tests/somatic_indel.nf.test.snap | 14 ++++++++++---- tests/somatic_snv.nf.test.snap | 12 +++++++++--- tests/somatic_sv.nf.test.snap | 10 ++++++++-- 5 files changed, 57 insertions(+), 17 deletions(-) diff --git a/tests/germline_small.nf.test.snap b/tests/germline_small.nf.test.snap index a99138b..e1a3bb6 100644 --- a/tests/germline_small.nf.test.snap +++ b/tests/germline_small.nf.test.snap @@ -1,7 +1,7 @@ { "-stub": { "content": [ - 76, + 77, { "BCFTOOLS_DEDUP": { "bcftools": 1.18 @@ -33,6 +33,9 @@ "MERGE_REPORTS": { "python": "3.8.6" }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.2.0-1-g3948afb6b" + }, "PLOTS": { "r-base": "4.3.1" }, @@ -62,6 +65,8 @@ "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", "references", + "references/dictionary", + "references/dictionary/genome.dict", "references/rtgtools", "references/rtgtools/genome.sdf", "small", @@ -231,11 +236,11 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-11-28T16:47:47.024615694" + "timestamp": "2024-12-04T10:12:17.779754389" }, "Params: --analysis 'germline' --variant_type 'small' --method 'happy,rtgtools'": { "content": [ - 76, + 77, { "BCFTOOLS_DEDUP": { "bcftools": 1.18 @@ -267,6 +272,9 @@ "MERGE_REPORTS": { "python": "3.8.6" }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.2.0-1-g3948afb6b" + }, "PLOTS": { "r-base": "4.3.1" }, @@ -296,6 +304,8 @@ "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", "references", + "references/dictionary", + "references/dictionary/genome.dict", "references/rtgtools", "references/rtgtools/genome.sdf", "references/rtgtools/genome.sdf/done", @@ -575,6 +585,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-11-28T16:45:26.990755841" + "timestamp": "2024-12-04T10:10:09.338722844" } } \ No newline at end of file diff --git a/tests/germline_sv.nf.test.snap b/tests/germline_sv.nf.test.snap index b566380..5181ca9 100644 --- a/tests/germline_sv.nf.test.snap +++ b/tests/germline_sv.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'germline' --variant_type 'structural' --method 'truvari,svbenchmark,wittyer'": { "content": [ - 144, + 145, { "BCFTOOLS_DEDUP": { "bcftools": 1.18 @@ -30,6 +30,9 @@ "MERGE_REPORTS": { "python": "3.8.6" }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.2.0-1-g3948afb6b" + }, "PLOTS": { "r-base": "4.3.1" }, @@ -79,6 +82,9 @@ [ "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/dictionary", + "references/dictionary/genome.dict", "structural", "structural/HG002", "structural/HG002/preprocess", @@ -357,11 +363,11 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-11-28T16:51:57.386421371" + "timestamp": "2024-12-04T10:22:09.959009433" }, "-stub": { "content": [ - 144, + 145, { "BCFTOOLS_DEDUP": { "bcftools": 1.18 @@ -390,6 +396,9 @@ "MERGE_REPORTS": { "python": "3.8.6" }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.2.0-1-g3948afb6b" + }, "PLOTS": { "r-base": "4.3.1" }, @@ -439,6 +448,9 @@ [ "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/dictionary", + "references/dictionary/genome.dict", "structural", "structural/HG002", "structural/HG002/preprocess", @@ -656,6 +668,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-11-28T16:54:34.483191183" + "timestamp": "2024-12-04T10:24:27.339868009" } } \ No newline at end of file diff --git a/tests/somatic_indel.nf.test.snap b/tests/somatic_indel.nf.test.snap index 6107271..2e1d10b 100644 --- a/tests/somatic_indel.nf.test.snap +++ b/tests/somatic_indel.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'somatic' --variant_type 'indel' --method 'sompy'": { "content": [ - 27, + 28, { "BCFTOOLS_REHEADER": { "bcftools": 1.18 @@ -30,6 +30,9 @@ "MERGE_REPORTS": { "python": "3.8.6" }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.2.0-1-g3948afb6b" + }, "PLOTS": { "r-base": "4.3.1" }, @@ -165,7 +168,10 @@ "indel/test2/stats/bcftools", "indel/test2/stats/bcftools/test2.strelka.bcftools_stats.txt", "pipeline_info", - "pipeline_info/nf_core_pipeline_software_mqc_versions.yml" + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/dictionary", + "references/dictionary/genome.dict" ], [ "SEQC2.bcftools_stats.txt:md5,e530daf4f6a4923f1cd85c51893d5747", @@ -175,8 +181,8 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.10.1" + "nextflow": "24.10.2" }, - "timestamp": "2024-11-22T12:53:42.07320983" + "timestamp": "2024-12-04T09:59:41.934881268" } } \ No newline at end of file diff --git a/tests/somatic_snv.nf.test.snap b/tests/somatic_snv.nf.test.snap index 4c4cccb..20188bb 100644 --- a/tests/somatic_snv.nf.test.snap +++ b/tests/somatic_snv.nf.test.snap @@ -1,7 +1,7 @@ { "-stub": { "content": [ - 38, + 39, { "BCFTOOLS_REHEADER": { "bcftools": 1.18 @@ -30,6 +30,9 @@ "MERGE_REPORTS": { "python": "3.8.6" }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.2.0-1-g3948afb6b" + }, "PLOTS": { "r-base": "4.3.1" }, @@ -46,6 +49,9 @@ [ "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/dictionary", + "references/dictionary/genome.dict", "snv", "snv/SEQC2", "snv/SEQC2/stats", @@ -125,8 +131,8 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.10.1" + "nextflow": "24.10.2" }, - "timestamp": "2024-11-22T12:57:55.883023408" + "timestamp": "2024-12-04T09:53:59.901867572" } } \ No newline at end of file diff --git a/tests/somatic_sv.nf.test.snap b/tests/somatic_sv.nf.test.snap index fabf609..539def8 100644 --- a/tests/somatic_sv.nf.test.snap +++ b/tests/somatic_sv.nf.test.snap @@ -1,7 +1,7 @@ { "Params: --analysis 'somatic' --variant_type 'structural' --method 'truvari,svbenchmark'": { "content": [ - 69, + 70, { "BCFTOOLS_REHEADER": { "bcftools": 1.18 @@ -30,6 +30,9 @@ "MERGE_REPORTS": { "python": "3.8.6" }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.2.0-1-g3948afb6b" + }, "PLOTS": { "r-base": "4.3.1" }, @@ -61,6 +64,9 @@ [ "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/dictionary", + "references/dictionary/genome.dict", "structural", "structural/SEQC2", "structural/SEQC2/stats", @@ -206,6 +212,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-11-28T16:58:12.671419675" + "timestamp": "2024-12-04T09:56:29.305188123" } } \ No newline at end of file From d7ed58ac4786b11e92e8d127e94d824878337271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Thu, 5 Dec 2024 09:17:46 +0000 Subject: [PATCH 09/10] update link to truth.md --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index e56db2c..8f4d274 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -41,7 +41,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p ## Truth samples -Please find the detailed information about truth samples [here](/workspace/variantbenchmarking/docs/truth.md). +Please find the detailed information about truth samples [here](../docs/truth.md). ## Lifting over truth sets From f7c496cd7def2e1b7f6b2d0a760f5111c83074e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Thu, 5 Dec 2024 12:35:46 +0000 Subject: [PATCH 10/10] update usage.md --- docs/usage.md | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 8f4d274..7471a45 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -45,10 +45,24 @@ Please find the detailed information about truth samples [here](../docs/truth.md ## Lifting over truth sets -This workflow comes with a liftover option for truth sets. In order to activate liftover use `--liftover true`. +This workflow comes with a liftover option for truth sets. In order to activate liftover use `--liftover "truth"`. - `--chain`: This workflow uses picard tools for lifting over and a chain file has to be provided specific to the input truth vcf. Some examples can be found [here](https://genome.ucsc.edu/goldenPath/help/chain.html) - `--rename_chr`: Renaming chromosomes is required after liftover process. Some examples can be found under `assets/rename_contigs` directory. +- `--dictionary`: .dict file is required to run liftover process. If dictionary file is not provided, picard createsequencedictionary will create and use the file. + +## Lifting over test sets + +Lifting over test samples is also possible through this pipeline, if you want to liftover at least one of the samples first use `--liftover "test"` and add liftover option to samplesheet: + +```csv title="samplesheet.csv" +id,test_vcf,caller,liftover +test1,test1.vcf.gz,delly,true +test2,test2.vcf,gatk,false +test3,test3.vcf.gz,cnvkit,true +``` + +Please note that you should still provide chain and reame_chr files, and lifting over truth and test samples simultaneously is not possible. ## Standardization and normalization parameters @@ -234,10 +248,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `test_full` - A profile with a complete configuration for full size of sample testing - Includes links to test data so needs no other parameters -- `liftover_hg37` - - A profile with a complete configuration for using liftover of HG002 hg38 truth set to hg37 +- `liftover_test` + - A profile with a complete configuration for using liftover of HG002 hg38 test set to hg37 - Includes links to test data so needs no other parameters -- `liftover_hg38` +- `liftover_truth` - A profile with a complete configuration for using liftover of HG002 hg37 truth set to hg38 - Includes links to test data so needs no other parameters - `germline_small`