Skip to content

Commit

Permalink
add liftover option for test vcfs
Browse files Browse the repository at this point in the history
  • Loading branch information
kubranarci committed Dec 3, 2024
1 parent 823cece commit 4c770b2
Show file tree
Hide file tree
Showing 13 changed files with 341 additions and 307 deletions.
6 changes: 6 additions & 0 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@
"enum": ["sc", "cts", "d"],
"minLength": 1,
"default": null
},
"liftover": {
"type": "boolean",
"description": "Liftover option for test vcfs, to activate add params.liftover='test' ",
"meta": ["liftover"],
"default": false
}
},
"required": ["test_vcf", "caller", "id"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,23 @@ params {
max_time = '8.h'

// Input data
input = 'https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/samplesheet_sv_hg37.csv'
input = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/samplesheet_sv_liftover.csv"
outdir = 'results'
genome = 'GRCh37'

// Processes
analysis = 'germline'
variant_type = "structural"
method = 'truvari'
preprocess = "normalization,deduplication,filter_contigs"
preprocess = "filter_contigs"
min_sv_size = 30

truth_id = "HG002"
truth_vcf = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz"
regions_bed = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed"

//liftover files
liftover = true
liftover = "test"
chain = "http://ftp.ensembl.org/pub/assembly_mapping/homo_sapiens/GRCh38_to_GRCh37.chain.gz"
rename_chr = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch38_grch37.txt"
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ params {
regions_bed = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.bed"

//liftover files
liftover = true
liftover = "truth"
chain = "https://raw.githubusercontent.com/broadinstitute/gatk/master/scripts/funcotator/data_sources/gnomAD/b37ToHg38.over.chain"
rename_chr = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch37_grch38.txt"
}
6 changes: 3 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ params {
dictionary = null
rename_chr = null
chain = null
liftover = false
liftover = ""

// MultiQC options
multiqc_config = null
Expand Down Expand Up @@ -193,8 +193,8 @@ profiles {
somatic_snv { includeConfig 'conf/tests/somatic_snv.config' }
somatic_indel { includeConfig 'conf/tests/somatic_indel.config' }
somatic_sv { includeConfig 'conf/tests/somatic_sv.config' }
liftover_hg37 { includeConfig 'conf/tests/liftover_hg37.config' }
liftover_hg38 { includeConfig 'conf/tests/liftover_hg38.config' }
liftover_test { includeConfig 'conf/tests/liftover_test.config' }
liftover_truth { includeConfig 'conf/tests/liftover_truth.config' }

}

Expand Down
13 changes: 9 additions & 4 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@
},
"truth_id": {
"type": "string",
"default": "",
"description": "Truth id, sample name to define truth vcf",
"fa_icon": "fas fa-folder-open",
"errorMessage": "The sample name of the truth case. Examples: HG002, SEQC2, HG001, HG003, CHM13"
},
"analysis": {
"type": "string",
"default": "",
"description": "The analysis type used by the input files",
"enum": ["germline", "somatic"],
"pattern": "(germline|somatic)",
Expand All @@ -45,6 +47,7 @@
},
"variant_type": {
"type": "string",
"default": "",
"description": "Variant types to benchmark",
"errorMessage": "Select a variant type to make the analysis: small,snv,indel,structural or copynumber. Select small when your vcf contains both snvs and indels",
"enum": ["small", "snv", "indel", "structural", "copynumber"],
Expand All @@ -53,6 +56,7 @@
},
"method": {
"type": "string",
"default": "truvari,svanalyzer,happy,sompy,rtgtools,wittyer",
"description": "The benchmarking methods to use. Should be a comma-separate list of one or more of the following options: truvari, svanalyzer, happy, sompy, rtgtools, wittyer",
"errorMessage": "A wrong input has been detected. Should be a comma-separated list of one or more of the following options: truvari, svanalyzer, happy, sompy, rtgtools, wittyer",
"pattern": "^((truvari|svanalyzer|happy|sompy|rtgtools|wittyer)?,?)*(?<!,)$",
Expand Down Expand Up @@ -197,18 +201,19 @@
"default": "s3://ngi-igenomes/igenomes/"
},
"liftover": {
"type": "boolean",
"description": "Run liftover workflow",
"type": "string",
"description": "Run liftover workflow: test,truth",
"fa_icon": "fas fa-ban",
"hidden": true,
"help_text": "Makes the use of liftover subworkflow, hg37 truth sets will liftover to hg38 and visa versa. Has to be either combined with itruth.config or --chain and --rename_chr."
"pattern": "^((test|truth)?,?)*(?<!,)$",
"help_text": "Makes the use of liftover subworkflow, hg37 truth sets will liftover to hg38 and visa versa. Has to be either combined with --chain and --rename_chr."
},
"chain": {
"type": "string",
"format": "file-path",
"exists": true,
"pattern": "^\\S+\\.(chain|bed)?(\\.gz)?$",
"description": "Path to the chain file sey required for liftover.",
"description": "Path to the chain file required for liftover.",
"help_text": "This parameter is *mandatory* if `--liftover` is true",
"fa_icon": "fas fa-file-csv"
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// LIFTOVER_VCFS_TRUTH: SUBWORKFLOW TO LIFTOVER TRUTH VCFS HG37 TO HG38 OR HG38 TO HG37
// LIFTOVER_VCFS: SUBWORKFLOW TO LIFTOVER VCFS HG37 TO HG38 OR HG38 TO HG37
//

include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary'
Expand All @@ -11,10 +11,10 @@ include { SORT_BED } from '../../modules/local/custom/sor
include { BEDTOOLS_MERGE } from '../../modules/nf-core/bedtools/merge'


workflow LIFTOVER_VCFS_TRUTH {
workflow LIFTOVER_VCFS {
take:
truth_ch // channel: [val(meta), vcf]
high_conf_ch // channel: [bed]
ch_vcf // channel: [val(meta), vcf]
ch_bed // channel: [bed]
fasta // reference channel [val(meta), ref.fa]
chain // chain channel [val(meta), chain.gz]
rename_chr // reference channel [val(meta), chrlist.txt]
Expand All @@ -35,7 +35,7 @@ workflow LIFTOVER_VCFS_TRUTH {

// Use picard liftovervcf tool to convert vcfs
PICARD_LIFTOVERVCF(
truth_ch,
ch_vcf,
dictionary,
fasta,
chain
Expand All @@ -56,9 +56,9 @@ workflow LIFTOVER_VCFS_TRUTH {
)
vcf_ch = BCFTOOLS_RENAME_CHR.out.vcf

// liftover high confidence file if given
// liftover high confidence bed file if given
UCSC_LIFTOVER(
high_conf_ch.map{file -> tuple([id: params.truth_id], file)},
ch_bed.map{file -> tuple([id: params.truth_id], file)},
chain.map{_meta, file -> file}
)
versions = versions.mix(UCSC_LIFTOVER.out.versions.first())
Expand Down
38 changes: 30 additions & 8 deletions subworkflows/local/prepare_vcfs_test.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
// PREPARE_VCFS: SUBWORKFLOW TO PREPARE INPUT VCFS
//

include { VCF_REHEADER_SAMPLENAME } from '../local/vcf_reheader_samplename'
include { VCF_VARIANT_DEDUPLICATION } from '../local/vcf_variant_deduplication'
include { VCF_VARIANT_FILTERING } from '../local/vcf_variant_filtering'
include { SPLIT_SMALL_VARIANTS_TEST } from '../local/split_small_variants_test'
include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm'
include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix'
include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix'
include { VCF_REHEADER_SAMPLENAME } from '../local/vcf_reheader_samplename'
include { VCF_VARIANT_DEDUPLICATION } from '../local/vcf_variant_deduplication'
include { VCF_VARIANT_FILTERING } from '../local/vcf_variant_filtering'
include { SPLIT_SMALL_VARIANTS_TEST } from '../local/split_small_variants_test'
include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm'
include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix'
include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix'
include { LIFTOVER_VCFS } from '../local/liftover_vcfs'
include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_CONTIGS } from '../../modules/nf-core/bcftools/view'


Expand All @@ -17,14 +18,35 @@ workflow PREPARE_VCFS_TEST {
test_ch // channel: [val(meta), vcf]
fasta // reference channel [val(meta), ref.fa]
fai // reference channel [val(meta), ref.fa.fai]
chain // reference channel [val(meta), chain.gz]
rename_chr // reference channel [val(meta), chrlist.txt]
dictionary // reference channel [val(meta), genome.dict]

main:

versions = Channel.empty()

test_ch.branch{
def meta = it[0]
liftover: meta.liftover
other: true}.set{vcf}

vcf_ch = Channel.empty()

LIFTOVER_VCFS(
vcf.liftover,
Channel.empty(),
fasta,
chain,
rename_chr,
dictionary
)
versions = versions.mix(LIFTOVER_VCFS.out.versions.first())
vcf_ch = vcf_ch.mix(LIFTOVER_VCFS.out.vcf_ch,vcf.other)

// Add "query" to test sample
VCF_REHEADER_SAMPLENAME(
test_ch,
vcf_ch,
fai
)
versions = versions.mix(VCF_REHEADER_SAMPLENAME.out.versions.first())
Expand Down
12 changes: 6 additions & 6 deletions subworkflows/local/prepare_vcfs_truth.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm
include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix'
include { VCF_REHEADER_SAMPLENAME } from '../local/vcf_reheader_samplename'
include { VCF_VARIANT_DEDUPLICATION } from '../local/vcf_variant_deduplication'
include { LIFTOVER_VCFS_TRUTH } from '../local/liftover_vcfs_truth'
include { LIFTOVER_VCFS } from '../local/liftover_vcfs'


workflow PREPARE_VCFS_TRUTH {
Expand All @@ -25,19 +25,19 @@ workflow PREPARE_VCFS_TRUTH {
versions = Channel.empty()

// if liftover option is set convert truth files
if (params.liftover){
if (params.liftover.contains("truth")){

LIFTOVER_VCFS_TRUTH(
LIFTOVER_VCFS(
truth_ch,
high_conf_ch,
fasta,
chain,
rename_chr,
dictionary
)
versions = versions.mix(LIFTOVER_VCFS_TRUTH.out.versions.first())
truth_ch = LIFTOVER_VCFS_TRUTH.out.vcf_ch
high_conf_ch = LIFTOVER_VCFS_TRUTH.out.bed_ch.map{ _meta, bed -> [bed]}
versions = versions.mix(LIFTOVER_VCFS.out.versions.first())
truth_ch = LIFTOVER_VCFS.out.vcf_ch
high_conf_ch = LIFTOVER_VCFS.out.bed_ch.map{ _meta, bed -> [bed]}
}

// Reheader sample name for truth file - using meta.caller
Expand Down
40 changes: 40 additions & 0 deletions tests/liftover_test.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
nextflow_pipeline {

name "Test pipeline for liftover structural germline variants, tested benchmarking methods are truvari"
script "../main.nf"
tag "pipeline"
tag "structural"
tag "germline"
tag "liftover"
tag "hg37"
config "../conf/tests/liftover_test.config"

test("Params: --analysis 'germline' --variant_type 'structural' --method 'truvari' --liftover 'test'") {

when {
params {
outdir = "$outputDir"
}
}

then {
// stable_name: All files + folders in ${params.outdir}/ with a stable name
def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}'])
// stable_path: All files in ${params.outdir}/ with stable content
def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore')
assertAll(
{ assert workflow.success },
{ assert snapshot(
// Number of successful tasks
workflow.trace.succeeded().size(),
// pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions
removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"),
// All stable path name, with a relative path
stable_name,
// All files with stable contents
stable_path
).match() }
)
}
}
}
Loading

0 comments on commit 4c770b2

Please sign in to comment.