diff --git a/CHANGELOG.md b/CHANGELOG.md index 8282efd2..95a68158 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.1.2 - Groovy Ghent - [Mar 21 2023] + +### New features + +1. Added a parameter for setting the splitting depth threshold `--split_threshold FLOAT` + +### Changes + +1. Change the default splitting threshold to 0.2 instead of 0.3 + ## v1.1.1 - Golden Ghent - [Mar 20 2023] ### Changes diff --git a/modules/local/split_beds/main.nf b/modules/local/split_beds/main.nf index 9cd1411c..9938a6e6 100644 --- a/modules/local/split_beds/main.nf +++ b/modules/local/split_beds/main.nf @@ -9,6 +9,7 @@ process SPLIT_BEDS { input: tuple val(meta), path(bed), val(sample_count) + val(single_threshold) output: tuple val(meta), path("*.bed") , emit: beds @@ -19,15 +20,15 @@ process SPLIT_BEDS { script: def prefix = task.ext.prefix ?: meta.id - def threshold = sample_count * 0.3 + def threshold = sample_count * single_threshold // This module will split a BED file created with goleft/indexsplit into - // multiple BED files. All regions that have a scaled data size lower than 0.3 * amount of samples + // multiple BED files. All regions that have a scaled data size lower than single_threshold * amount of samples // will be merged into one BED file. All regions that have a higher scaled data size // will be put into their own BED file. Also all regions with no reads are removed. """ awk -vFS="\t" '{ if (\$0 ~ /^[^#].*\$/ && \$5 >= 1) { - if (\$4 >= ${threshold}) { + if (\$4 > ${threshold}) { print \$0 > sprintf("${prefix}_%s_%s_%s.bed", \$1, \$2, \$3) } else { print \$0 > "${prefix}_others.bed" diff --git a/nextflow.config b/nextflow.config index db7358fd..cd8c8745 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { // Pipeline specific parameters scatter_count = 40 - // bed_merge_distance = 3000000 + split_threshold = 0.2 filter = false annotate = false gemini = false @@ -244,7 +244,7 @@ manifest { description = 'A nextflow pipeline for calling and annotating variants' mainScript = 'main.nf' nextflowVersion = '!>=22.10.5' - version = '1.1.1' + version = '1.1.2' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 65b8ea31..694cd428 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -137,6 +137,11 @@ "default": 40, "description": "The amount of scattering that should happen per sample. Increase this number to increase the pipeline run speed, but at the tradeoff of using more IO and disk space. The actual scatter value can differ a bit (usually not a bigger difference than 1) when using alt contigs in the reference." }, + "split_threshold": { + "type": "number", + "default": 0.2, + "description": "The exclusive threshold to use when defining whether a region has very low depth. This value is used to concatenate all small, low depth regions for WGS" + }, "species": { "type": "string", "default": "homo_sapiens", diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index a5d44c6f..54194e81 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -32,7 +32,8 @@ workflow GERMLINE_VARIANT_CALLING { // SPLIT_BEDS( - beds.map { it + [1] } + beds.map { it + [1] }, + params.split_threshold.toFloat() ) ch_versions = ch_versions.mix(SPLIT_BEDS.out.versions.first()) diff --git a/subworkflows/local/joint_genotyping.nf b/subworkflows/local/joint_genotyping.nf index abe0e9b8..69edba73 100644 --- a/subworkflows/local/joint_genotyping.nf +++ b/subworkflows/local/joint_genotyping.nf @@ -53,7 +53,8 @@ workflow JOINT_GENOTYPING { } } [ meta, bed, meta.family_count ] - } + }, + params.split_threshold.toFloat() ) ch_versions = ch_versions.mix(SPLIT_BEDS.out.versions.first())