From 37d46d206d254f15cb254f47a114efc1d558abc1 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Thu, 11 Jul 2024 11:53:50 +0200
Subject: [PATCH 01/18] dysgu_added

---
 modules.json                                  |  5 +
 modules/nf-core/dysgu/environment.yml         |  7 ++
 modules/nf-core/dysgu/main.nf                 | 55 +++++++++++
 modules/nf-core/dysgu/meta.yml                | 68 +++++++++++++
 modules/nf-core/dysgu/tests/main.nf.test      | 96 +++++++++++++++++++
 modules/nf-core/dysgu/tests/main.nf.test.snap | 75 +++++++++++++++
 modules/nf-core/dysgu/tests/nextflow.config   |  5 +
 modules/nf-core/dysgu/tests/tags.yml          |  2 +
 8 files changed, 313 insertions(+)
 create mode 100644 modules/nf-core/dysgu/environment.yml
 create mode 100644 modules/nf-core/dysgu/main.nf
 create mode 100644 modules/nf-core/dysgu/meta.yml
 create mode 100644 modules/nf-core/dysgu/tests/main.nf.test
 create mode 100644 modules/nf-core/dysgu/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/dysgu/tests/nextflow.config
 create mode 100644 modules/nf-core/dysgu/tests/tags.yml

diff --git a/modules.json b/modules.json
index a64bdb2c7d..d64d140459 100644
--- a/modules.json
+++ b/modules.json
@@ -129,6 +129,11 @@
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/dragmap/hashtable/dragmap-hashtable.diff"
                     },
+                    "dysgu": {
+                        "branch": "master",
+                        "git_sha": "c27498285a0beca2239b395cf88129c586a837fc",
+                        "installed_by": ["modules"]
+                    },
                     "ensemblvep/download": {
                         "branch": "master",
                         "git_sha": "3db4f8488315cd7d7cf3fcb64251f6603210e831",
diff --git a/modules/nf-core/dysgu/environment.yml b/modules/nf-core/dysgu/environment.yml
new file mode 100644
index 0000000000..5efb6db61a
--- /dev/null
+++ b/modules/nf-core/dysgu/environment.yml
@@ -0,0 +1,7 @@
+name: dysgu
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - dysgu
diff --git a/modules/nf-core/dysgu/main.nf b/modules/nf-core/dysgu/main.nf
new file mode 100644
index 0000000000..6aa1deea95
--- /dev/null
+++ b/modules/nf-core/dysgu/main.nf
@@ -0,0 +1,55 @@
+process DYSGU {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/dysgu:48830f55112c399e':
+        'community.wave.seqera.io/library/dysgu:faf71ac972284412' }"
+
+    input:
+    tuple val(meta), path(input), path(index)
+    tuple val(meta2), path(fasta), path(fai)
+
+    output:
+    tuple val(meta), path('*.vcf.gz')       , emit: vcf
+    tuple val(meta), path('*.vcf.gz.tbi')   , emit: tbi
+    path 'versions.yml'                     , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def args3 = task.ext.args3 ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    dysgu run \\
+        -p ${task.cpus} \\
+        -x \\
+        $fasta \\
+        . \\
+        $input \\
+        | bgzip ${args2} --threads ${task.cpus} --stdout > ${prefix}.vcf.gz
+    tabix ${args3} ${prefix}.vcf.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        dysgu: \$(dysgu --version 2>&1)
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    echo "" | gzip > ${prefix}.vcf.gz
+    touch ${prefix}.vcf.gz.tbi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        dysgu: \$(dysgu --version 2>&1)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/dysgu/meta.yml b/modules/nf-core/dysgu/meta.yml
new file mode 100644
index 0000000000..67c6c70abf
--- /dev/null
+++ b/modules/nf-core/dysgu/meta.yml
@@ -0,0 +1,68 @@
+name: dysgu
+
+description: Dysgu calls structural variants (SVs) from mapped sequencing reads. It is designed for accurate and efficient detection of structural variations.
+keywords:
+  - structural variants
+  - sv
+  - vcf
+tools:
+  - dysgu:
+      description: Structural variant caller for mapped sequencing data
+      homepage: https://github.com/kcleal/dysgu
+      documentation: https://github.com/kcleal/dysgu/blob/master/README.rst
+      tool_dev_url: https://github.com/kcleal/dysgu
+      doi: "10.1093/nar/gkac039"
+      licence: ["GPL-3.0-or-later"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1' ]`
+  - input:
+      type: file
+      description: Input BAM file
+      pattern: "*.bam"
+  - index:
+      type: file
+      description: BAM index file
+      pattern: "*.bai"
+  - fasta:
+      type: file
+      description: Genome reference FASTA file
+      pattern: "*.{fa,fasta}"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
+  - fai:
+      type: file
+      description: Genome reference FASTA index file
+      pattern: "*.{fa.fai,fasta.fai}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'sample1' ]
+  - vcf:
+      type: file
+      description: VCF file with identified structural variants
+      pattern: "*.{vcf.gz}"
+  - tbi:
+      type: file
+      description: The index of the BCF/VCF file
+      pattern: "*.{vcf.gz.tbi}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@famosab"
+  - "@poddarharsh15"
+maintainers:
+  - "@poddarharsh15"
diff --git a/modules/nf-core/dysgu/tests/main.nf.test b/modules/nf-core/dysgu/tests/main.nf.test
new file mode 100644
index 0000000000..1714b5b7ab
--- /dev/null
+++ b/modules/nf-core/dysgu/tests/main.nf.test
@@ -0,0 +1,96 @@
+nextflow_process {
+
+    name "Test Process DYSGU"
+    script "../main.nf"
+    process "DYSGU"
+    config "./nextflow.config"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "dysgu"
+
+
+    test("human - bam") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test'], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)
+                ]
+                input[1] = [ [ id:'reference'], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match() },
+                { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") }
+            )
+        }
+
+    }
+
+
+    test("human - cram") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test'], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true)
+                ]
+                input[1] = [ [ id:'reference'], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.versions).match() },
+                { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") }
+            )
+        }
+
+    }
+
+
+    test("human - bam - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test'], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)
+                ]
+                input[1] = [ [ id:'reference'], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/dysgu/tests/main.nf.test.snap b/modules/nf-core/dysgu/tests/main.nf.test.snap
new file mode 100644
index 0000000000..ecf725f5d3
--- /dev/null
+++ b/modules/nf-core/dysgu/tests/main.nf.test.snap
@@ -0,0 +1,75 @@
+{
+    "human - bam - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,cf1e0487502108690603dd16f034bf5e"
+                ],
+                "tbi": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "vcf": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,cf1e0487502108690603dd16f034bf5e"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-08T13:18:45.660262"
+    },
+    "human - bam": {
+        "content": [
+            [
+                "versions.yml:md5,cf1e0487502108690603dd16f034bf5e"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-08T13:19:56.62312"
+    },
+    "human - cram": {
+        "content": [
+            [
+                "versions.yml:md5,cf1e0487502108690603dd16f034bf5e"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-08T13:20:04.494134"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/dysgu/tests/nextflow.config b/modules/nf-core/dysgu/tests/nextflow.config
new file mode 100644
index 0000000000..5336ab55cf
--- /dev/null
+++ b/modules/nf-core/dysgu/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: DYSGU {
+         ext.args = '--exome '
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/dysgu/tests/tags.yml b/modules/nf-core/dysgu/tests/tags.yml
new file mode 100644
index 0000000000..6bffc95e91
--- /dev/null
+++ b/modules/nf-core/dysgu/tests/tags.yml
@@ -0,0 +1,2 @@
+dysgu:
+  - "modules/nf-core/dysgu/**"

From c6cdd2dc4516e2a8342cd09f063d2d16aa708761 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Thu, 11 Jul 2024 13:49:11 +0200
Subject: [PATCH 02/18] dysgu_main.nf_update

---
 .../local/bam_variant_calling_dysgu/main.nf   | 43 +++++++++++++++++++
 .../bam_variant_calling_germline_all/main.nf  |  1 +
 2 files changed, 44 insertions(+)
 create mode 100644 subworkflows/local/bam_variant_calling_dysgu/main.nf

diff --git a/subworkflows/local/bam_variant_calling_dysgu/main.nf b/subworkflows/local/bam_variant_calling_dysgu/main.nf
new file mode 100644
index 0000000000..60ed61eb69
--- /dev/null
+++ b/subworkflows/local/bam_variant_calling_dysgu/main.nf
@@ -0,0 +1,43 @@
+//
+// dysgu variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
+include { DYSGU } from '../../../modules/nf-core/dysgu/main'
+
+// Seems to be the consensus on upstream modules implementation too
+workflow BAM_VARIANT_CALLING_GERMLINE_DYSGU {
+    take:
+    cram          // channel: [mandatory] [ meta, cram, crai ]
+    fasta         // channel: [mandatory] [ meta, fasta ]
+    fasta_fai     // channel: [mandatory] [ meta, fasta_fai ]
+    intervals     // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi] or [ [], []] if no intervals; intervals file contains all intervals
+
+    main:
+    versions = Channel.empty()
+
+    // Combine cram and intervals, account for 0 intervals
+    cram_intervals = cram.combine(intervals).map{ it ->
+        bed_gz = it.size() > 3 ? it[3] : []
+        bed_tbi = it.size() > 3 ? it[4] : []
+
+        [it[0], it[1], it[2], bed_gz, bed_tbi]
+    }
+
+    DYSGU(cram_intervals, fasta, fasta_fai, [])
+
+    
+    dysgu_vcf = DYSGU.out.vcf
+
+    // Only dysgu SV should get annotated
+    // add variantcaller to meta map
+    vcf = dysgu_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'dysgu' ], vcf ] }
+
+    versions = versions.mix(DYSGU.out.versions)
+
+    emit:
+    vcf
+
+    versions
+}
diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf
index 79efd8bf94..2733ea1bd9 100644
--- a/subworkflows/local/bam_variant_calling_germline_all/main.nf
+++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf
@@ -17,6 +17,7 @@ include { BAM_VARIANT_CALLING_SINGLE_TIDDIT
 include { SENTIEON_DNAMODELAPPLY                                                       } from '../../../modules/nf-core/sentieon/dnamodelapply/main'
 include { VCF_VARIANT_FILTERING_GATK                                                   } from '../vcf_variant_filtering_gatk/main'
 include { VCF_VARIANT_FILTERING_GATK as SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main'
+include { BAM_VARIANT_CALLING_DYSGU                                                    } from '../bam_variant_calling_dysgu/main'
 
 
 

From ab5af46cec4c76fb452ed87579375d288b4f88a2 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Thu, 11 Jul 2024 13:57:32 +0200
Subject: [PATCH 03/18] adding_dysgu_geramline_main

---
 .../bam_variant_calling_germline_all/main.nf    | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf
index 2733ea1bd9..5e03c492db 100644
--- a/subworkflows/local/bam_variant_calling_germline_all/main.nf
+++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf
@@ -67,6 +67,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
     vcf_sentieon_haplotyper  = Channel.empty()
     vcf_strelka              = Channel.empty()
     vcf_tiddit               = Channel.empty()
+    vcf_dysgu                = Channel.empty()
 
     // BCFTOOLS MPILEUP
     if (tools.split(',').contains('mpileup')) {
@@ -191,6 +192,18 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
         versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions)
     }
 
+    // DYSGU
+    if (tools.split(',').contains('dysgu')) {
+        BAM_VARIANT_CALLING_DYSGU (
+            cram,
+            fasta,
+            fasta_fai
+        )
+
+        vcf_dysgu = BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.vcf
+        versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.versions)
+    }
+
     // SENTIEON DNASCOPE
     if (tools.split(',').contains('sentieon_dnascope')) {
         BAM_VARIANT_CALLING_SENTIEON_DNASCOPE(
@@ -350,7 +363,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
         vcf_mpileup,
         vcf_sentieon_haplotyper,
         vcf_strelka,
-        vcf_tiddit
+        vcf_tiddit,
+        vcf_dysgu
     )
 
     emit:
@@ -366,6 +380,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
     vcf_sentieon_dnascope
     vcf_sentieon_haplotyper
     vcf_tiddit
+    vcf_dysgu
 
     versions
 }

From 327d99b780e70e293059d6e939ac8f7b35829604 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Thu, 11 Jul 2024 14:00:20 +0200
Subject: [PATCH 04/18] dysgu.config_update

---
 conf/modules/dysgu.config | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 conf/modules/dysgu.config

diff --git a/conf/modules/dysgu.config b/conf/modules/dysgu.config
new file mode 100644
index 0000000000..8af87c5a70
--- /dev/null
+++ b/conf/modules/dysgu.config
@@ -0,0 +1,28 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Available keys to override module options:
+        ext.args   = Additional arguments appended to command in module.
+        ext.args2  = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3  = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix = File name prefix for output files.
+        ext.when   = When to run the module.
+----------------------------------------------------------------------------------------
+*/
+
+// DYSGU
+
+process {
+    if (params.tools && params.tools.split(',').contains('dysgu')) {
+        withName: 'DYSGU_GERMLINE' {
+            ext.args   = { params.wes ? "--exome" : '' }
+            ext.prefix = { "${meta.id}.dysgu" }
+            publishDir = [
+                mode: params.publish_dir_mode,
+                path: { "${params.outdir}/variant_calling/dysgu/${meta.id}" },
+                pattern: "*{dysgu}.{vcf.gz,vcf.gz.tbi}"
+            ]
+        }
+    }
+}

From 40b31094794ee47cdb9fe370ba1e0f561d5613ce Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Thu, 11 Jul 2024 14:45:36 +0200
Subject: [PATCH 05/18] dysgu_main.nf_update

---
 subworkflows/local/bam_variant_calling_dysgu/main.nf | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/bam_variant_calling_dysgu/main.nf b/subworkflows/local/bam_variant_calling_dysgu/main.nf
index 60ed61eb69..d783e4102e 100644
--- a/subworkflows/local/bam_variant_calling_dysgu/main.nf
+++ b/subworkflows/local/bam_variant_calling_dysgu/main.nf
@@ -18,7 +18,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_DYSGU {
     versions = Channel.empty()
 
     // Combine cram and intervals, account for 0 intervals
-    cram_intervals = cram.combine(intervals).map{ it ->
+    cram_intervals = cram.combine(intervals).map { it ->
         bed_gz = it.size() > 3 ? it[3] : []
         bed_tbi = it.size() > 3 ? it[4] : []
 
@@ -27,12 +27,11 @@ workflow BAM_VARIANT_CALLING_GERMLINE_DYSGU {
 
     DYSGU(cram_intervals, fasta, fasta_fai, [])
 
-    
     dysgu_vcf = DYSGU.out.vcf
 
     // Only dysgu SV should get annotated
     // add variantcaller to meta map
-    vcf = dysgu_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'dysgu' ], vcf ] }
+    vcf = dysgu_vcf.map { meta, vcf -> [ meta + [ variantcaller:'dysgu' ], vcf ] }
 
     versions = versions.mix(DYSGU.out.versions)
 

From 99412bf76545c30e72afdffb23622ca471e818c3 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Thu, 11 Jul 2024 16:23:59 +0200
Subject: [PATCH 06/18] schema_update

---
 nextflow_schema.json | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 7545b9930e..1b4df230f6 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -356,7 +356,7 @@
                 },
                 "cf_ploidy": {
                     "type": "string",
-                    "default": "2",
+                    "default": 2,
                     "fa_icon": "fas fa-bacon",
                     "help_text": "In case of doubt, you can set different values and Control-FREEC will select the one that explains most observed CNAs Example: ploidy=2 , ploidy=2,3,4. For more details, see the [manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html).",
                     "description": "Genome ploidy used by ControlFREEC",
@@ -1061,7 +1061,8 @@
                     "fa_icon": "far fa-check-circle",
                     "description": "Validation of parameters in lenient more.",
                     "hidden": true,
-                    "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
+                    "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).",
+                    "default": true
                 },
                 "hook_url": {
                     "type": "string",

From 67354fd912742403916dd999d6db59f8ed2d88eb Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Thu, 11 Jul 2024 16:26:23 +0200
Subject: [PATCH 07/18] update_main.nf

---
 subworkflows/local/bam_variant_calling_dysgu/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/bam_variant_calling_dysgu/main.nf b/subworkflows/local/bam_variant_calling_dysgu/main.nf
index d783e4102e..661e3d057a 100644
--- a/subworkflows/local/bam_variant_calling_dysgu/main.nf
+++ b/subworkflows/local/bam_variant_calling_dysgu/main.nf
@@ -7,7 +7,7 @@
 include { DYSGU } from '../../../modules/nf-core/dysgu/main'
 
 // Seems to be the consensus on upstream modules implementation too
-workflow BAM_VARIANT_CALLING_GERMLINE_DYSGU {
+workflow BAM_VARIANT_CALLING_DYSGU {
     take:
     cram          // channel: [mandatory] [ meta, cram, crai ]
     fasta         // channel: [mandatory] [ meta, fasta ]

From 21f316fe83e3fe46b264e1d13e36ebb92807ae3d Mon Sep 17 00:00:00 2001
From: poddarharsh15 <45700858+poddarharsh15@users.noreply.github.com>
Date: Thu, 11 Jul 2024 17:06:12 +0200
Subject: [PATCH 08/18] Update dysgu.config

Co-authored-by: Maxime U Garcia <maxime.garcia@seqera.io>
---
 conf/modules/dysgu.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules/dysgu.config b/conf/modules/dysgu.config
index 8af87c5a70..9aa44e572c 100644
--- a/conf/modules/dysgu.config
+++ b/conf/modules/dysgu.config
@@ -15,7 +15,7 @@
 
 process {
     if (params.tools && params.tools.split(',').contains('dysgu')) {
-        withName: 'DYSGU_GERMLINE' {
+        withName: 'DYSGU' {
             ext.args   = { params.wes ? "--exome" : '' }
             ext.prefix = { "${meta.id}.dysgu" }
             publishDir = [

From 98f70ca676d6d46a740b519d294b1af5dbcae6b6 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Fri, 12 Jul 2024 11:18:52 +0200
Subject: [PATCH 09/18] tool_dysgu_add

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 1b4df230f6..6d34a46f3b 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -112,7 +112,7 @@
                     "fa_icon": "fas fa-toolbox",
                     "description": "Tools to use for duplicate marking, variant calling and/or for annotation.",
                     "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
-                    "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(?<!,)$"
+                    "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|dysgu|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(?<!,)$"
                 },
                 "skip_tools": {
                     "type": "string",

From a63bddb76f16ec3bacf8f0806b9751b69ce213da Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Fri, 12 Jul 2024 11:57:13 +0200
Subject: [PATCH 10/18] help_text_dysguaddded_schema

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 6d34a46f3b..2f28fe7bef 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -111,7 +111,7 @@
                     "type": "string",
                     "fa_icon": "fas fa-toolbox",
                     "description": "Tools to use for duplicate marking, variant calling and/or for annotation.",
-                    "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
+                    "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: DYSGU, Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT \n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
                     "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|dysgu|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(?<!,)$"
                 },
                 "skip_tools": {

From a57e4f1a1c067f39e799a12ff5fbb7e951e28422 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Fri, 12 Jul 2024 13:53:46 +0200
Subject: [PATCH 11/18] docs_updated_dysgu

---
 docs/output.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/output.md b/docs/output.md
index 7f8455f95d..45562b60c1 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -45,6 +45,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
   - [Structural Variants](#structural-variants)
     - [Manta](#manta)
     - [TIDDIT](#tiddit)
+    - [DYSGU](#dysgu)
   - [Sample heterogeneity, ploidy and CNVs](#sample-heterogeneity-ploidy-and-cnvs)
     - [ASCAT](#ascat)
     - [CNVKit](#cnvkit)
@@ -639,6 +640,18 @@ It is optimized for analysis of germline variation in small sets of individuals
 
 </details>
 
+#### DYSGU
+
+[DYSGU](https://github.com/kcleal/dysgu) dysgu (pronounced duss-key) is a set of command line tools and python-API, for calling structural variants using paired-end or long read sequencing data. For further reading and documentation see the [DYSGU manual](https://github.com/kcleal/dysgu/blob/master/README.rst).
+
+<details markdown="1">
+<summary>Output files for normal samples</summary>
+
+**Output directory: `{outdir}/variantcalling/dysgu/<sample>/`**
+
+- `<sample>.dysgu.vcf.gz` and `<sample>.dysgu.vcf.gz.tbi`
+  - VCF with tabix index containing SV calls
+
 ### Sample heterogeneity, ploidy and CNVs
 
 #### ASCAT

From 787728844eb6320c7eacdf20ec02a73d56f3a530 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Mon, 9 Sep 2024 10:04:35 +0200
Subject: [PATCH 12/18] interval_added

---
 subworkflows/local/bam_variant_calling_germline_all/main.nf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf
index fa2e44c571..b03fa3dd12 100644
--- a/subworkflows/local/bam_variant_calling_germline_all/main.nf
+++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf
@@ -198,7 +198,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
         BAM_VARIANT_CALLING_DYSGU (
             cram,
             fasta,
-            fasta_fai
+            fasta_fai,
+            intervals
         )
 
         vcf_dysgu = BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.vcf

From cf40cb450fc1fa06e945af9081c32e675dcbd742 Mon Sep 17 00:00:00 2001
From: asp8200 <anders@mynucleus.com>
Date: Tue, 10 Sep 2024 07:50:14 +0000
Subject: [PATCH 13/18] Simplify the config of DYSGU and include from
 nextflow.config

---
 conf/modules/dysgu.config | 18 ++++++++----------
 nextflow.config           |  1 +
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/conf/modules/dysgu.config b/conf/modules/dysgu.config
index 9aa44e572c..f637ae4cc3 100644
--- a/conf/modules/dysgu.config
+++ b/conf/modules/dysgu.config
@@ -14,15 +14,13 @@
 // DYSGU
 
 process {
-    if (params.tools && params.tools.split(',').contains('dysgu')) {
-        withName: 'DYSGU' {
-            ext.args   = { params.wes ? "--exome" : '' }
-            ext.prefix = { "${meta.id}.dysgu" }
-            publishDir = [
-                mode: params.publish_dir_mode,
-                path: { "${params.outdir}/variant_calling/dysgu/${meta.id}" },
-                pattern: "*{dysgu}.{vcf.gz,vcf.gz.tbi}"
-            ]
-        }
+    withName: 'DYSGU' {
+        ext.args   = { params.wes ? "--exome" : '' }
+        ext.prefix = { "${meta.id}.dysgu" }
+        publishDir = [
+            mode: params.publish_dir_mode,
+            path: { "${params.outdir}/variant_calling/dysgu/${meta.id}" },
+            pattern: "*.{vcf.gz,vcf.gz.tbi}"
+        ]
     }
 }
diff --git a/nextflow.config b/nextflow.config
index 84c8a75297..6c92634357 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -428,6 +428,7 @@ includeConfig 'conf/modules/ascat.config'
 includeConfig 'conf/modules/cnvkit.config'
 includeConfig 'conf/modules/controlfreec.config'
 includeConfig 'conf/modules/deepvariant.config'
+includeConfig 'conf/modules/dysgu.config'
 includeConfig 'conf/modules/freebayes.config'
 includeConfig 'conf/modules/haplotypecaller.config'
 includeConfig 'conf/modules/joint_germline.config'

From c49561ee4eaf63cf037e50abd4a3595fbc5e2c45 Mon Sep 17 00:00:00 2001
From: asp8200 <anders@mynucleus.com>
Date: Tue, 10 Sep 2024 07:51:31 +0000
Subject: [PATCH 14/18] Using separate input channels for fasta and fasta_fai
 in DYSGU module

---
 modules/nf-core/dysgu/main.nf                        |  3 ++-
 subworkflows/local/bam_variant_calling_dysgu/main.nf | 12 +-----------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/modules/nf-core/dysgu/main.nf b/modules/nf-core/dysgu/main.nf
index 6aa1deea95..17c4d381c3 100644
--- a/modules/nf-core/dysgu/main.nf
+++ b/modules/nf-core/dysgu/main.nf
@@ -9,7 +9,8 @@ process DYSGU {
 
     input:
     tuple val(meta), path(input), path(index)
-    tuple val(meta2), path(fasta), path(fai)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
 
     output:
     tuple val(meta), path('*.vcf.gz')       , emit: vcf
diff --git a/subworkflows/local/bam_variant_calling_dysgu/main.nf b/subworkflows/local/bam_variant_calling_dysgu/main.nf
index 661e3d057a..68a186c41e 100644
--- a/subworkflows/local/bam_variant_calling_dysgu/main.nf
+++ b/subworkflows/local/bam_variant_calling_dysgu/main.nf
@@ -12,20 +12,11 @@ workflow BAM_VARIANT_CALLING_DYSGU {
     cram          // channel: [mandatory] [ meta, cram, crai ]
     fasta         // channel: [mandatory] [ meta, fasta ]
     fasta_fai     // channel: [mandatory] [ meta, fasta_fai ]
-    intervals     // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi] or [ [], []] if no intervals; intervals file contains all intervals
 
     main:
     versions = Channel.empty()
 
-    // Combine cram and intervals, account for 0 intervals
-    cram_intervals = cram.combine(intervals).map { it ->
-        bed_gz = it.size() > 3 ? it[3] : []
-        bed_tbi = it.size() > 3 ? it[4] : []
-
-        [it[0], it[1], it[2], bed_gz, bed_tbi]
-    }
-
-    DYSGU(cram_intervals, fasta, fasta_fai, [])
+    DYSGU(cram, fasta, fasta_fai)
 
     dysgu_vcf = DYSGU.out.vcf
 
@@ -37,6 +28,5 @@ workflow BAM_VARIANT_CALLING_DYSGU {
 
     emit:
     vcf
-
     versions
 }

From bc3a3f99691e2c9b1769e236d5a07d228623d1e3 Mon Sep 17 00:00:00 2001
From: asp8200 <anders@mynucleus.com>
Date: Tue, 10 Sep 2024 07:52:56 +0000
Subject: [PATCH 15/18] Removing intervals from input to
 BAM_VARIANT_CALLING_DYSGU

---
 .../local/bam_variant_calling_germline_all/main.nf         | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf
index b03fa3dd12..3c47fe16ca 100644
--- a/subworkflows/local/bam_variant_calling_germline_all/main.nf
+++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf
@@ -198,12 +198,11 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
         BAM_VARIANT_CALLING_DYSGU (
             cram,
             fasta,
-            fasta_fai,
-            intervals
+            fasta_fai
         )
 
-        vcf_dysgu = BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.vcf
-        versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.versions)
+        vcf_dysgu = BAM_VARIANT_CALLING_DYSGU.out.vcf
+        versions = versions.mix(BAM_VARIANT_CALLING_DYSGU.out.versions)
     }
 
     // SENTIEON DNASCOPE

From 4ec3ece9dd403a54cd1efa46f2738b8815658a83 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Tue, 10 Sep 2024 10:42:01 +0200
Subject: [PATCH 16/18] version_update_dv

---
 modules/nf-core/deepvariant/main.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf
index 507b6c1174..e392fc2021 100644
--- a/modules/nf-core/deepvariant/main.nf
+++ b/modules/nf-core/deepvariant/main.nf
@@ -3,7 +3,7 @@ process DEEPVARIANT {
     label 'process_high'
 
     //Conda is not supported at the moment
-    container "nf-core/deepvariant:1.5.0"
+    container "nf-core/deepvariant:1.6.1"
 
     input:
     tuple val(meta), path(input), path(index), path(intervals)
@@ -38,8 +38,8 @@ process DEEPVARIANT {
         --output_gvcf=${prefix}.g.vcf.gz \\
         ${args} \\
         ${regions} \\
-        --intermediate_results_dir=. \\
-        --num_shards=${task.cpus}
+        --intermediate_results_dir=tmp \\
+        --num_shards=1
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

From ed8f8bac34d028af2ab500400da3eb2fc303ae32 Mon Sep 17 00:00:00 2001
From: Harsh Poddar <poddarharah15@gmail.com>
Date: Tue, 10 Sep 2024 10:42:39 +0200
Subject: [PATCH 17/18] removed_spaces

---
 modules/nf-core/dragmap/align/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/dragmap/align/main.nf b/modules/nf-core/dragmap/align/main.nf
index 30e47992f3..7c831063c2 100644
--- a/modules/nf-core/dragmap/align/main.nf
+++ b/modules/nf-core/dragmap/align/main.nf
@@ -3,7 +3,7 @@ process DRAGMAP_ALIGN {
     label 'process_high'
 
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:df80ed8d23d0a2c43181a2b3dd1b39f2d00fab5c-0':
         'biocontainers/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:df80ed8d23d0a2c43181a2b3dd1b39f2d00fab5c-0' }"
 

From c5e3d656254da43c51e3ae557c190970553958fb Mon Sep 17 00:00:00 2001
From: poddarharsh15 <45700858+poddarharsh15@users.noreply.github.com>
Date: Tue, 10 Sep 2024 13:01:38 +0200
Subject: [PATCH 18/18] Update main.nf

---
 modules/nf-core/deepvariant/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf
index e392fc2021..ed95f066be 100644
--- a/modules/nf-core/deepvariant/main.nf
+++ b/modules/nf-core/deepvariant/main.nf
@@ -3,7 +3,7 @@ process DEEPVARIANT {
     label 'process_high'
 
     //Conda is not supported at the moment
-    container "nf-core/deepvariant:1.6.1"
+    container "nf-core/deepvariant:1.5.0"
 
     input:
     tuple val(meta), path(input), path(index), path(intervals)