add liftover option for test vcfs

nf-core · Dec 3, 2024 · 4c770b2 · 4c770b2
1 parent 823cece
commit 4c770b2
Show file tree

Hide file tree

Showing 13 changed files with 341 additions and 307 deletions.
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -154,6 +154,12 @@
                 "enum": ["sc", "cts", "d"],
                 "minLength": 1,
                 "default": null
+            },
+            "liftover": {
+                "type": "boolean",
+                "description": "Liftover option for test vcfs, to activate add params.liftover='test' ",
+                "meta": ["liftover"],
+                "default": false
             }
         },
         "required": ["test_vcf", "caller", "id"]

diff --git a/conf/tests/liftover_hg37.config → conf/tests/liftover_test.config b/conf/tests/liftover_hg37.config → conf/tests/liftover_test.config
@@ -20,22 +20,23 @@ params {
     max_time              = '8.h'
 
     // Input data
-    input                = 'https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/samplesheet_sv_hg37.csv'
+    input                = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/samplesheet_sv_liftover.csv"
     outdir               = 'results'
     genome               = 'GRCh37'
 
     // Processes
     analysis             = 'germline'
     variant_type         = "structural"
     method               = 'truvari'
-    preprocess           = "normalization,deduplication,filter_contigs"
+    preprocess           = "filter_contigs"
     min_sv_size          = 30
 
     truth_id             = "HG002"
     truth_vcf            = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz"
+    regions_bed          = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed"
 
     //liftover files
-    liftover              = true
+    liftover              = "test"
     chain                 = "http://ftp.ensembl.org/pub/assembly_mapping/homo_sapiens/GRCh38_to_GRCh37.chain.gz"
     rename_chr            = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch38_grch37.txt"
 }
diff --git a/conf/tests/liftover_hg38.config → conf/tests/liftover_truth.config b/conf/tests/liftover_hg38.config → conf/tests/liftover_truth.config
@@ -35,7 +35,7 @@ params {
     regions_bed          = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.bed"
 
     //liftover files
-    liftover             = true
+    liftover             = "truth"
     chain                = "https://raw.githubusercontent.com/broadinstitute/gatk/master/scripts/funcotator/data_sources/gnomAD/b37ToHg38.over.chain"
     rename_chr           = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch37_grch38.txt"
 }
diff --git a/nextflow.config b/nextflow.config
@@ -48,7 +48,7 @@ params {
     dictionary                 = null
     rename_chr                 = null
     chain                      = null
-    liftover                   = false
+    liftover                   = ""
 
     // MultiQC options
     multiqc_config             = null
@@ -193,8 +193,8 @@ profiles {
     somatic_snv    { includeConfig 'conf/tests/somatic_snv.config'   }
     somatic_indel  { includeConfig 'conf/tests/somatic_indel.config' }
     somatic_sv     { includeConfig 'conf/tests/somatic_sv.config'    }
-    liftover_hg37  { includeConfig 'conf/tests/liftover_hg37.config' }
-    liftover_hg38  { includeConfig 'conf/tests/liftover_hg38.config' }
+    liftover_test  { includeConfig 'conf/tests/liftover_test.config' }
+    liftover_truth { includeConfig 'conf/tests/liftover_truth.config' }
 
 }
 

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -31,12 +31,14 @@
                 },
                 "truth_id": {
                     "type": "string",
+                    "default": "",
                     "description": "Truth id, sample name to define truth vcf",
                     "fa_icon": "fas fa-folder-open",
                     "errorMessage": "The sample name of the truth case. Examples: HG002, SEQC2, HG001, HG003, CHM13"
                 },
                 "analysis": {
                     "type": "string",
+                    "default": "",
                     "description": "The analysis type used by the input files",
                     "enum": ["germline", "somatic"],
                     "pattern": "(germline|somatic)",
@@ -45,6 +47,7 @@
                 },
                 "variant_type": {
                     "type": "string",
+                    "default": "",
                     "description": "Variant types to benchmark",
                     "errorMessage": "Select a variant type to make the analysis: small,snv,indel,structural or copynumber. Select small when your vcf contains both snvs and indels",
                     "enum": ["small", "snv", "indel", "structural", "copynumber"],
@@ -53,6 +56,7 @@
                 },
                 "method": {
                     "type": "string",
+                    "default": "truvari,svanalyzer,happy,sompy,rtgtools,wittyer",
                     "description": "The benchmarking methods to use. Should be a comma-separate list of one or more of the following options: truvari, svanalyzer, happy, sompy, rtgtools, wittyer",
                     "errorMessage": "A wrong input has been detected. Should be a comma-separated list of one or more of the following options: truvari, svanalyzer, happy, sompy, rtgtools, wittyer",
                     "pattern": "^((truvari|svanalyzer|happy|sompy|rtgtools|wittyer)?,?)*(?<!,)$",
@@ -197,18 +201,19 @@
                     "default": "s3://ngi-igenomes/igenomes/"
                 },
                 "liftover": {
-                    "type": "boolean",
-                    "description": "Run liftover workflow",
+                    "type": "string",
+                    "description": "Run liftover workflow: test,truth",
                     "fa_icon": "fas fa-ban",
                     "hidden": true,
-                    "help_text": "Makes the use of liftover subworkflow, hg37 truth sets will liftover to hg38 and visa versa. Has to be either combined with itruth.config or --chain and --rename_chr."
+                    "pattern": "^((test|truth)?,?)*(?<!,)$",
+                    "help_text": "Makes the use of liftover subworkflow, hg37 truth sets will liftover to hg38 and visa versa. Has to be either combined with --chain and --rename_chr."
                 },
                 "chain": {
                     "type": "string",
                     "format": "file-path",
                     "exists": true,
                     "pattern": "^\\S+\\.(chain|bed)?(\\.gz)?$",
-                    "description": "Path to the chain file sey required for liftover.",
+                    "description": "Path to the chain file required for liftover.",
                     "help_text": "This parameter is *mandatory* if `--liftover` is true",
                     "fa_icon": "fas fa-file-csv"
                 },

diff --git a/subworkflows/local/liftover_vcfs_truth.nf → subworkflows/local/liftover_vcfs.nf b/subworkflows/local/liftover_vcfs_truth.nf → subworkflows/local/liftover_vcfs.nf
@@ -1,5 +1,5 @@
 //
-// LIFTOVER_VCFS_TRUTH: SUBWORKFLOW TO LIFTOVER TRUTH VCFS HG37 TO HG38 OR HG38 TO HG37
+// LIFTOVER_VCFS: SUBWORKFLOW TO LIFTOVER VCFS HG37 TO HG38 OR HG38 TO HG37
 //
 
 include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary'
@@ -11,10 +11,10 @@ include { SORT_BED                        } from '../../modules/local/custom/sor
 include { BEDTOOLS_MERGE                  } from '../../modules/nf-core/bedtools/merge'
 
 
-workflow LIFTOVER_VCFS_TRUTH {
+workflow LIFTOVER_VCFS {
     take:
-    truth_ch        // channel: [val(meta), vcf]
-    high_conf_ch    // channel: [bed]
+    ch_vcf          // channel: [val(meta), vcf]
+    ch_bed          // channel: [bed]
     fasta           // reference channel [val(meta), ref.fa]
     chain           // chain channel [val(meta), chain.gz]
     rename_chr      // reference channel [val(meta), chrlist.txt]
@@ -35,7 +35,7 @@ workflow LIFTOVER_VCFS_TRUTH {
 
     // Use picard liftovervcf tool to convert vcfs
     PICARD_LIFTOVERVCF(
-        truth_ch,
+        ch_vcf,
         dictionary,
         fasta,
         chain
@@ -56,9 +56,9 @@ workflow LIFTOVER_VCFS_TRUTH {
     )
     vcf_ch = BCFTOOLS_RENAME_CHR.out.vcf
 
-    // liftover high confidence file if given
+    // liftover high confidence bed file if given
     UCSC_LIFTOVER(
-        high_conf_ch.map{file -> tuple([id: params.truth_id], file)},
+        ch_bed.map{file -> tuple([id: params.truth_id], file)},
         chain.map{_meta, file -> file}
     )
     versions = versions.mix(UCSC_LIFTOVER.out.versions.first())

diff --git a/subworkflows/local/prepare_vcfs_test.nf b/subworkflows/local/prepare_vcfs_test.nf
@@ -2,13 +2,14 @@
 // PREPARE_VCFS: SUBWORKFLOW TO PREPARE INPUT VCFS
 //
 
-include { VCF_REHEADER_SAMPLENAME     } from '../local/vcf_reheader_samplename'
-include { VCF_VARIANT_DEDUPLICATION   } from '../local/vcf_variant_deduplication'
-include { VCF_VARIANT_FILTERING       } from '../local/vcf_variant_filtering'
-include { SPLIT_SMALL_VARIANTS_TEST   } from '../local/split_small_variants_test'
-include { BCFTOOLS_NORM               } from '../../modules/nf-core/bcftools/norm'
-include { TABIX_BGZIPTABIX            } from '../../modules/nf-core/tabix/bgziptabix'
-include { TABIX_TABIX                 } from '../../modules/nf-core/tabix/tabix'
+include { VCF_REHEADER_SAMPLENAME      } from '../local/vcf_reheader_samplename'
+include { VCF_VARIANT_DEDUPLICATION    } from '../local/vcf_variant_deduplication'
+include { VCF_VARIANT_FILTERING        } from '../local/vcf_variant_filtering'
+include { SPLIT_SMALL_VARIANTS_TEST    } from '../local/split_small_variants_test'
+include { BCFTOOLS_NORM                } from '../../modules/nf-core/bcftools/norm'
+include { TABIX_BGZIPTABIX             } from '../../modules/nf-core/tabix/bgziptabix'
+include { TABIX_TABIX                  } from '../../modules/nf-core/tabix/tabix'
+include { LIFTOVER_VCFS                } from '../local/liftover_vcfs'
 include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_CONTIGS } from '../../modules/nf-core/bcftools/view'
 
 
@@ -17,14 +18,35 @@ workflow PREPARE_VCFS_TEST {
     test_ch     // channel: [val(meta), vcf]
     fasta       // reference channel [val(meta), ref.fa]
     fai         // reference channel [val(meta), ref.fa.fai]
+    chain       // reference channel [val(meta), chain.gz]
+    rename_chr  // reference channel [val(meta), chrlist.txt]
+    dictionary  // reference channel [val(meta), genome.dict]
 
     main:
 
     versions = Channel.empty()
 
+    test_ch.branch{
+        def meta = it[0]
+        liftover: meta.liftover
+        other: true}.set{vcf}
+
+    vcf_ch = Channel.empty()
+
+    LIFTOVER_VCFS(
+        vcf.liftover,
+        Channel.empty(),
+        fasta,
+        chain,
+        rename_chr,
+        dictionary
+    )
+    versions = versions.mix(LIFTOVER_VCFS.out.versions.first())
+    vcf_ch = vcf_ch.mix(LIFTOVER_VCFS.out.vcf_ch,vcf.other)
+
     // Add "query" to test sample
     VCF_REHEADER_SAMPLENAME(
-        test_ch,
+        vcf_ch,
         fai
     )
     versions = versions.mix(VCF_REHEADER_SAMPLENAME.out.versions.first())

diff --git a/subworkflows/local/prepare_vcfs_truth.nf b/subworkflows/local/prepare_vcfs_truth.nf
@@ -7,7 +7,7 @@ include { BCFTOOLS_NORM              } from '../../modules/nf-core/bcftools/norm
 include { TABIX_TABIX                } from '../../modules/nf-core/tabix/tabix'
 include { VCF_REHEADER_SAMPLENAME    } from '../local/vcf_reheader_samplename'
 include { VCF_VARIANT_DEDUPLICATION  } from '../local/vcf_variant_deduplication'
-include { LIFTOVER_VCFS_TRUTH        } from '../local/liftover_vcfs_truth'
+include { LIFTOVER_VCFS              } from '../local/liftover_vcfs'
 
 
 workflow PREPARE_VCFS_TRUTH {
@@ -25,19 +25,19 @@ workflow PREPARE_VCFS_TRUTH {
     versions = Channel.empty()
 
     // if liftover option is set convert truth files
-    if (params.liftover){
+    if (params.liftover.contains("truth")){
 
-        LIFTOVER_VCFS_TRUTH(
+        LIFTOVER_VCFS(
             truth_ch,
             high_conf_ch,
             fasta,
             chain,
             rename_chr,
             dictionary
         )
-        versions = versions.mix(LIFTOVER_VCFS_TRUTH.out.versions.first())
-        truth_ch = LIFTOVER_VCFS_TRUTH.out.vcf_ch
-        high_conf_ch = LIFTOVER_VCFS_TRUTH.out.bed_ch.map{ _meta, bed -> [bed]}
+        versions = versions.mix(LIFTOVER_VCFS.out.versions.first())
+        truth_ch = LIFTOVER_VCFS.out.vcf_ch
+        high_conf_ch = LIFTOVER_VCFS.out.bed_ch.map{ _meta, bed -> [bed]}
     }
 
     // Reheader sample name for truth file - using meta.caller

diff --git a/tests/liftover_test.nf.test b/tests/liftover_test.nf.test
@@ -0,0 +1,40 @@
+nextflow_pipeline {
+
+    name "Test pipeline for liftover structural germline variants, tested benchmarking methods are truvari"
+    script "../main.nf"
+    tag "pipeline"
+    tag "structural"
+    tag "germline"
+    tag "liftover"
+    tag "hg37"
+    config "../conf/tests/liftover_test.config"
+
+    test("Params: --analysis 'germline' --variant_type 'structural' --method 'truvari' --liftover 'test'") {
+
+        when {
+            params {
+                outdir               = "$outputDir"
+            }
+        }
+
+        then {
+            // stable_name: All files + folders in ${params.outdir}/ with a stable name
+            def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}'])
+            // stable_path: All files in ${params.outdir}/ with stable content
+            def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore')
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    // Number of successful tasks
+                    workflow.trace.succeeded().size(),
+                    // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions
+                    removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"),
+                    // All stable path name, with a relative path
+                    stable_name,
+                    // All files with stable contents
+                    stable_path
+                ).match() }
+            )
+        }
+    }
+}