diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 741538bc..be927121 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
matrix:
parameters:
- ""
- - "--preset ONT_R10 --input https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/testdata/samplesheet_multisample_bam_ont.csv --split_fastq 2 --parallel_snv 1"
+ - "--preset ONT_R10 --input https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet_multisample_bam_ont.csv --split_fastq 2 --parallel_snv 1"
NXF_VER:
- "23.04.0"
- "latest-everything"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1eaa0a6d..b54e6a6d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#252](https://github.com/genomic-medicine-sweden/nallo/pull/252) - Added a new `SCATTER_GENOME` subworkflow
- [#255](https://github.com/genomic-medicine-sweden/nallo/pull/255) - Added a new `RANK_VARIANTS` subworkflow to rank SNVs using genmod
- [#261](https://github.com/genomic-medicine-sweden/nallo/pull/261) - Added a `--skip_rank_variants` parameter to skip the rank_variants subworkflow
+- [#264](https://github.com/genomic-medicine-sweden/nallo/pull/264) - Added a `project` column to the sampleheet
- [#266](https://github.com/genomic-medicine-sweden/nallo/pull/266) - Added CADD to dynamically calculate indel CADD-scores
- [#270](https://github.com/genomic-medicine-sweden/nallo/pull/270) - Added SNV phasing stats to MultiQC
- [#271](https://github.com/genomic-medicine-sweden/nallo/pull/271) - Added a `--skip_aligned_read_qc` parameter to skip the qc aligned reads subworkflow
@@ -43,6 +44,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#261](https://github.com/genomic-medicine-sweden/nallo/pull/261) - Changed SNV annotation to run in parallel
- [#261](https://github.com/genomic-medicine-sweden/nallo/pull/261) - Changed SNV output file names and directory structure
- [#262](https://github.com/genomic-medicine-sweden/nallo/pull/262) - Updated README
+- [#264](https://github.com/genomic-medicine-sweden/nallo/pull/264) - Changed PED file creation from groovy script to process
+- [#264](https://github.com/genomic-medicine-sweden/nallo/pull/264) - Changed all `multisample` filenames to `{project}` from samplesheet
- [#268](https://github.com/genomic-medicine-sweden/nallo/pull/268) - Only output unphased alignments when phasing is off
- [#268](https://github.com/genomic-medicine-sweden/nallo/pull/268) - Changed alignment output file names and directory structure
- [#270](https://github.com/genomic-medicine-sweden/nallo/pull/270) - Changed whatshap stats to always run, regardless of phasing software, and changed the output from `*.stats.tsv.gz` to `*.stats.tsv` to allow being picked up by MultiQC
@@ -57,6 +60,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#243](https://github.com/genomic-medicine-sweden/nallo/pull/243) - Removed VEP report from output files
- [#257](https://github.com/genomic-medicine-sweden/nallo/pull/257) - Removed obsolete TODO statements
- [#258](https://github.com/genomic-medicine-sweden/nallo/pull/258) - Removed VCF report from DeepVariant output
+- [#264](https://github.com/genomic-medicine-sweden/nallo/pull/264) - Removed the option to provide extra SNF files to Sniffles with `--extra_snfs`
### `Fixed`
@@ -69,14 +73,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| Old parameter | New parameter |
| ------------------ | -------------------------- |
-| | `--deepvariant_model_type` |
-| `--extra_gvcfs` | |
| `--skip_repeat_wf` | `--skip_repeat_calling` |
| `--skip_repeat_wf` | `--skip_repeat_annotation` |
+| | `--deepvariant_model_type` |
| | `--skip_rank_variants` |
| | `--skip_aligned_read_qc` |
| | `--cadd_resources` |
| | `--cadd_prescored` |
+| `--extra_gvcfs` | |
+| `--extra_snfs` | |
> [!NOTE]
> Parameter has been updated if both old and new parameter information is present.
diff --git a/README.md b/README.md
index 3c4b2acb..a557844f 100644
--- a/README.md
+++ b/README.md
@@ -57,9 +57,9 @@ Prepare a samplesheet with input data:
`samplesheet.csv`
```
-sample,file,family_id,paternal_id,maternal_id,sex,phenotype
-HG002,/path/to/HG002.fastq.gz,FAM1,HG003,HG004,1,2
-HG005,/path/to/HG005.bam,FAM1,HG003,HG004,2,1
+project,sample,file,family_id,paternal_id,maternal_id,sex,phenotype
+testrun,HG002,/path/to/HG002.fastq.gz,FAM1,HG003,HG004,1,2
+testrun,HG005,/path/to/HG005.bam,FAM1,HG003,HG004,2,1
```
Now, you can run the pipeline using:
diff --git a/assets/schema_gvcfs.json b/assets/schema_gvcfs.json
deleted file mode 100644
index 0ae2d250..00000000
--- a/assets/schema_gvcfs.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
- "$schema": "http://json-schema.org/draft-07/schema",
- "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_gvcfs.json",
- "title": "genomic-medicine-sweden/nallo pipeline - params.extra_gvcfs schema",
- "description": "Schema for the file provided with params.extra_gvcfs",
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "sample": {
- "type": "string",
- "pattern": "^\\S+$",
- "errorMessage": "Sample name must be provided and cannot contain spaces",
- "meta": ["id"]
- },
- "file": {
- "format": "file-path",
- "type": "string",
- "pattern": "^\\S+\\.(g\\.)?(g)?vcf\\.gz$",
- "errorMessage": "gVCF file must be provided, cannot contain spaces and must have extension 'g.vcf.gz' or 'gvcf.gz'"
- }
- },
- "required": ["sample", "file"]
- }
-}
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 63c61f99..46c00ba6 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -48,8 +48,14 @@
"enum": [0, 1, 2],
"errorMessage": "Phenoype must be provided as 0 (missing), 1 (unaffected) or 2 (affected)",
"meta": ["phenotype"]
+ },
+ "project": {
+ "type": "string",
+ "pattern": "^\\S+$",
+ "errorMessage": "Project name must be provided and cannot contain spaces, needs to be the same for all samples",
+ "meta": ["project"]
}
},
- "required": ["sample", "file", "family_id", "paternal_id", "maternal_id", "sex", "phenotype"]
+ "required": ["sample", "file", "family_id", "paternal_id", "maternal_id", "sex", "phenotype", "project"]
}
}
diff --git a/assets/schema_snfs.json b/assets/schema_snfs.json
deleted file mode 100644
index 59d45232..00000000
--- a/assets/schema_snfs.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
- "$schema": "http://json-schema.org/draft-07/schema",
- "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_snfs.json",
- "title": "genomic-medicine-sweden/nallo pipeline - params.extra_snfs schema",
- "description": "Schema for the file provided with params.extra_snfs",
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "sample": {
- "type": "string",
- "pattern": "^\\S+$",
- "errorMessage": "Sample name must be provided and cannot contain spaces",
- "meta": ["id"]
- },
- "file": {
- "format": "file-path",
- "type": "string",
- "pattern": "^\\S+\\.snf$",
- "errorMessage": "SNF file must be provided, cannot contain spaces and must have extension '.snf"
- }
- },
- "required": ["sample", "file"]
- }
-}
diff --git a/conf/modules/general.config b/conf/modules/general.config
index ea2e3e13..fe3718e5 100644
--- a/conf/modules/general.config
+++ b/conf/modules/general.config
@@ -94,6 +94,13 @@ process {
]
}
+ withName: '.*:NALLO:CREATE_PEDIGREE_FILE' {
+ publishDir = [
+ path: { "${params.outdir}/pedigree" },
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+ ]
+ }
+
withName: '.*:NALLO:SPLIT_BED_CHUNKS' {
publishDir = [
enabled: false
diff --git a/conf/modules/structural_variant_calling.config b/conf/modules/structural_variant_calling.config
index b5b73eed..8e1e5a28 100644
--- a/conf/modules/structural_variant_calling.config
+++ b/conf/modules/structural_variant_calling.config
@@ -33,10 +33,10 @@ process {
withName: '.*:STRUCTURAL_VARIANT_CALLING:SNIFFLES_MULTISAMPLE' {
- ext.prefix = 'multisample_sniffles'
+ ext.prefix = { "${meta.id}_sniffles" }
publishDir = [
- path: { "${params.outdir}/sv_calling/sniffles/multi_sample" },
+ path: { "${params.outdir}/sv_calling/sniffles/multi_sample/${meta.id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
diff --git a/conf/test.config b/conf/test.config
index ea54eaea..7c96ecd8 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -23,7 +23,7 @@ params {
// Genome references
fasta = params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz'
- input = params.pipelines_testdata_base_path + 'nallo/testdata/samplesheet.csv'
+ input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/2948776ddf24ea131f527aa1f2dc23a43bb7b952/testdata/samplesheet.csv'
bed = params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed'
diff --git a/docs/output.md b/docs/output.md
index a4a0302f..d288ad65 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -24,7 +24,7 @@
- [Repeat annotation](#repeat-annotation)
- [SNV Annotation](#snv-annotation)
- [Ranked Variants](#ranked-variants)
- - [SNV Calling](#snv-calling)
+ - [SV Calling](#sv-calling)
## Pipeline overview
@@ -245,10 +245,10 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files from Somalier
-- `{outputdir}/qc_aligned_reads/somalier/relate/mutlisample/`
- - `*.html`: HTML report
- - `*.pairs.tsv`: Output information in sample pairs
- - `*.samples.tsv`: Output information per sample
+- `{outputdir}/qc_aligned_reads/somalier/relate/{project}/`
+ - `{project}.html`: HTML report
+ - `{project}.pairs.tsv`: Output information in sample pairs
+ - `{project}.samples.tsv`: Output information per sample
### Raw read QC
@@ -274,7 +274,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files from TRGT
-- `{outputdir}/repeat_calling/trgt/multi_sample/multisample/`
+- `{outputdir}/repeat_calling/trgt/multi_sample/{project}/`
- `*.vcf.gz`: Merged VCF for all samples
- `*.vcf.gz.tbi`: Index of the corresponding VCF file
- `{outputdir}/repeat_calling/trgt/single_sample/{sample}/`
@@ -305,9 +305,9 @@ In case of affected samples, [echtvar](https://github.com/brentp/echtvar) and [V
Output files from SNV Annotation
-- `{outputdir}/databases/echtvar/encode/multisample/`
+- `{outputdir}/databases/echtvar/encode/{project}/`
- `*.zip`: Database with AF and AC for all samples run
-- `{outputdir}/snvs/{single_sample,multi_sample/multisample}/`
+- `{outputdir}/snvs/{single_sample,multi_sample/{project}/`
- `*_snvs_annotated*.vcf.gz`: VCF with annotated variants
- `*_snvs_annotated*.vcf.gz.tbi`: Index of the corresponding VCF file
- `{outputdir}/snvs/stats/single_sample/`
@@ -327,29 +327,26 @@ In case of affected samples, [echtvar](https://github.com/brentp/echtvar) and [V
Output files
-- `{outputdir}/snvs/{single_sample,multi_sample/multisample}/`
- - `*_snvs_annotated_ranked.vcf.gz`: VCF with annotated and ranked variants
- - `*_snvs_annotated_ranked.vcf.gz.tbi`: Index of the corresponding VCF file
+- `{outputdir}/snvs/single_sample/{sample}/`
+ - `{sample}_snv_annotated_ranked.vcf.gz`: VCF with annotated and ranked variants
+ - `{sample}_snv_annotated_ranked.vcf.gz.tbi`: Index of the corresponding VCF file
+- `{outputdir}/snvs/multi_sample/{project}/`
+ - `{project}_snv_annotated_ranked.vcf.gz`: VCF with annotated and ranked variants
+ - `{project}_snv_annotated_ranked.vcf.gz.tbi`: Index of the corresponding VCF file
-### SNV Calling
+### SV Calling
[Sniffles](https://github.com/fritzsedlazeck/Sniffles) is used to call and merge structural variants.
Output files from SNV Calling
-- `{outputdir}/sv_calling/multi_sample/`
+- `{outputdir}/sv_calling/multi_sample/{project}`
- `*.vcf.gz`: VCF with variants
- `*.vcf.gz.tbi`: Index of the corresponding VCF file
- `{outputdir}/sv_calling/single_sample/{sample}`
- `*.snf`: Sniffles SNF file
- `*.vcf.gz`: VCF with variants
- `*.vcf.gz.tbi`: Index of the corresponding VCF file
-- `{outputdir}/snv_calling/single_sample/deepvariant/gvcf/{sample}/`
- - `*.g.vcf.gz`: gVCF with variants
- - `*.g.vcf.gz.tbi`: Index of the corresponding gVCF file
-- `{outputdir}/snv_calling/single_sample/deepvariant/vcf/{sample}/`
- - `*.vcf.gz`: VCF with variants
- - `*.vcf.gz.tbi`: Index of the corresponding VCF file
diff --git a/docs/usage.md b/docs/usage.md
index 7dc2f7ad..7eb15085 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -57,18 +57,20 @@ You will need to create a samplesheet with information about the samples you wou
--input '[path to samplesheet file]'
```
-It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below.
+It has to be a comma-separated file with 7 columns, and a header row as shown in the examples below.
`file` can either be a gzipped-fastq file or an aligned or unalinged BAM file (BAM files will be converted to FASTQ and aligned again).
+`project` needs to be the same for all samples in a run.
If you don't have related samples, `family_id` could be set to sample name, and `paternal_id` and `maternal_id` should be set to 0.
```console
-sample,file,family_id,paternal_id,maternal_id,sex,phenotype
-HG002,/path/to/HG002.fastq.gz,FAM,HG003,0,1,2
-HG003,/path/to/HG003.bam,FAM,0,0,2,1
+project,sample,file,family_id,paternal_id,maternal_id,sex,phenotype
+testrun,HG002,/path/to/HG002.fastq.gz,FAM,HG003,0,1,2
+testrun,HG003,/path/to/HG003.bam,FAM,0,0,2,1
```
| Fields | Description |
| ------------- | ------------------------------------------------------------------------------------------------------------------------- |
+| `project` | Project name must be provided and cannot contain spaces, needs to be the same for all samples." |
| `sample` | Custom sample name, cannot contain spaces. |
| `file` | Absolute path to gzipped FASTQ or BAM file. File has to have the extension ".fastq.gz", .fq.gz" or ".bam". |
| `family_id` | "Family ID must be provided and cannot contain spaces. If no family ID is available you can use the same ID as the sample |
@@ -127,16 +129,6 @@ cadd,/path/to/cadd.v1.6.hg38.zip
- If running without `--skip_cnv_calling`, expected CN regions for your reference genome can be downloaded from [HiFiCNV GitHub](https://github.com/PacificBiosciences/HiFiCNV/tree/main/data) to supply with `--hificnv_xy`, `--hificnv_xx` (expected_cn) and `--hificnv_exclude` (excluded_regions).
-- If you want to include extra samples for mili-sample calling of SVs - prepare a samplesheet with .snf files from Sniffles to supply with `--extra_snfs`:
-
-`extra_snfs.csv`
-
-```
-sample,file
-HG01123,/path/to/HG01123_sniffles.snf
-HG01124,/path/to/HG01124_sniffles.snf
-```
-
- If running without `--skip_call_paralogs`, the reference genome needs to be hg38
- If running without `--skip_mapping_wf`, a VCF of known polymorphic sites (e.g. [sites.hg38.vcg.gz](https://github.com/brentp/somalier/files/3412456/sites.hg38.vcf.gz)) needs to be supplied with `--somalier_sites`, from which sex will be inferred if possible.
@@ -255,7 +247,6 @@ Different processes may need extra input files
| Parameter | Description | Type | Default | Required | Hidden |
| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ |
| `dipcall_par` | Provide a bed file of chrX PAR regions for dipcall | `string` | | | |
-| `extra_snfs` | Extra input files for Sniffles | `string` | | | |
| `tandem_repeats` | Tandem repeat BED-file for sniffles | `string` | | | |
| `trgt_repeats` | BED-file for repeats to be genotyped | `string` | | | |
| `snp_db` | Extra echtvar-databases to annotate SNVs with | `string` | | | |
diff --git a/lib/CustomFunctions.groovy b/lib/CustomFunctions.groovy
deleted file mode 100644
index 4f9979b4..00000000
--- a/lib/CustomFunctions.groovy
+++ /dev/null
@@ -1,21 +0,0 @@
-import nextflow.Nextflow
-
-class CustomFunctions {
-
- // Function to generate a pedigree file
- public static File makePed(samples, outdir) {
- def case_name = "multisample"
- def outfile = new File(outdir +"/pipeline_info/${case_name}" + '.ped')
- outfile.text = ['#family_id', 'sample_id', 'father', 'mother', 'sex', 'phenotype'].join('\t')
- def samples_list = []
- for(int i = 0; i${project}.ped
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ create_pedigree_file: v1.0
+ python: \$(python --version | sed 's/Python //g')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch ${project}.ped
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ create_pedigree_file: v1.0
+ python: \$(python --version | sed 's/Python //g')
+ END_VERSIONS
+ """
+}
diff --git a/nextflow.config b/nextflow.config
index 2f2dd9f5..592e5f28 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -15,7 +15,6 @@ params {
cadd_resources = null
cadd_prescored = null
dipcall_par = null
- extra_snfs = null
tandem_repeats = null
trgt_repeats = null
variant_catalog = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index ef0e0217..4b9c4470 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -422,14 +422,6 @@
"format": "file-path",
"exists": true
},
- "extra_snfs": {
- "type": "string",
- "description": "Extra input files for Sniffles",
- "pattern": "^\\S+\\.csv$",
- "format": "file-path",
- "schema": "assets/schema_snfs.json",
- "exists": true
- },
"tandem_repeats": {
"type": "string",
"format": "file-path",
diff --git a/subworkflows/local/bam_infer_sex.nf b/subworkflows/local/bam_infer_sex.nf
index 3f49e5e5..a394d7ff 100644
--- a/subworkflows/local/bam_infer_sex.nf
+++ b/subworkflows/local/bam_infer_sex.nf
@@ -11,7 +11,6 @@ workflow BAM_INFER_SEX {
ch_ped // channel: [ val(meta), path(ped) ]
main:
-
ch_versions = Channel.empty()
// Extract sites
@@ -24,9 +23,9 @@ workflow BAM_INFER_SEX {
ch_versions = ch_versions.mix(SOMALIER_EXTRACT.out.versions)
SOMALIER_EXTRACT.out.extract
- .map { meta, extract -> [ [ id: 'multisample' ], extract ] }
+ .map { meta, extract -> [ [ id: meta.project ], extract ] }
.groupTuple()
- .join( ch_ped.map { ped -> [ [ id:'multisample'], ped ] } )
+ .join( ch_ped )
.set { ch_somalier_relate_in }
// Infer sex
@@ -59,7 +58,8 @@ workflow BAM_INFER_SEX {
maternal_id : meta.maternal_id,
sex : meta.sex == 0 ? somalier.sex.toInteger() : meta.sex,
phenotype : meta.phenotype,
- single_end : meta.single_end
+ single_end : meta.single_end,
+ project : meta.project
]
[ new_meta, bam, bai ]
}
diff --git a/subworkflows/local/call_repeat_expansions/main.nf b/subworkflows/local/call_repeat_expansions/main.nf
index 5b6a43d1..4999e0bf 100644
--- a/subworkflows/local/call_repeat_expansions/main.nf
+++ b/subworkflows/local/call_repeat_expansions/main.nf
@@ -33,7 +33,7 @@ workflow CALL_REPEAT_EXPANSIONS {
BCFTOOLS_SORT_TRGT.out.vcf
.join( BCFTOOLS_SORT_TRGT.out.tbi )
- .map { meta, bcf, csi -> [ [ id : 'multisample' ], bcf, csi ] }
+ .map { meta, bcf, csi -> [ [ id : meta.project ], bcf, csi ] }
.groupTuple()
.set{ ch_bcftools_merge_in }
diff --git a/subworkflows/local/short_variant_calling/main.nf b/subworkflows/local/short_variant_calling/main.nf
index dacaf4ac..002ab504 100644
--- a/subworkflows/local/short_variant_calling/main.nf
+++ b/subworkflows/local/short_variant_calling/main.nf
@@ -60,13 +60,16 @@ workflow SHORT_VARIANT_CALLING {
// This creates a multisample VCF, with regions from ONE bed file
DEEPVARIANT.out.gvcf
- .map { meta, gvcf -> [ meta.region.name, meta.phenotype == 2, gvcf ] }
+ .map { meta, gvcf ->
+ [ meta.region.name, meta.project, meta.phenotype == 2, gvcf ]
+ }
.groupTuple() // Group all files together per region
// If any of the samples in the VCF have an affected phenotype (2)
// add this to the meta of the multisample VCF to know if we should run RANK_VARIANTS or not
- .map { region, affected, gvcfs ->
+ .map { meta, project, affected, gvcfs ->
new_meta = [
- 'id': region,
+ 'id': meta,
+ 'project': project.first(), // Works only because only one project per run is allowed
'contains_affected': affected.any(),
]
[ new_meta, gvcfs ]
diff --git a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap
index 589e60ef..8212abbf 100644
--- a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap
+++ b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap
@@ -75,10 +75,10 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:07:01.955811332"
+ "timestamp": "2024-08-09T12:33:33.642550865"
},
"2 samples - 2 bed, fasta, fai, bed": {
"content": [
@@ -214,10 +214,10 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:08:56.682263691"
+ "timestamp": "2024-08-09T12:34:36.087668576"
},
"2 samples - 2 bed, fasta, fai, bed - stub": {
"content": [
@@ -353,10 +353,10 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:09:52.997168986"
+ "timestamp": "2024-08-09T12:35:27.910838148"
},
"1 sample - no bed, fasta, fai, []": {
"content": [
@@ -442,10 +442,10 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:06:05.618203586"
+ "timestamp": "2024-08-09T12:32:38.562683632"
},
"1 sample - 1 bed, fasta, fai, []": {
"content": [
@@ -523,10 +523,10 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:06:33.975664464"
+ "timestamp": "2024-08-09T12:33:06.128266568"
},
"1 sample - 1 bed, fasta, fai, [] - stub": {
"content": [
@@ -604,10 +604,10 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:09:16.393719634"
+ "timestamp": "2024-08-09T12:34:56.051878451"
},
"1 sample - 1 bed, fasta, fai, bed - stub": {
"content": [
@@ -685,10 +685,10 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:09:26.530459549"
+ "timestamp": "2024-08-09T12:35:05.660557092"
},
"1 sample - no bed, fasta, fai, [] - stub": {
"content": [
@@ -774,10 +774,10 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:09:06.541104295"
+ "timestamp": "2024-08-09T12:34:45.861028555"
},
"1 sample - 2 bed, fasta, fai, bed": {
"content": [
@@ -891,10 +891,10 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:07:43.362545869"
+ "timestamp": "2024-08-09T12:34:03.458045229"
},
"1 sample - 2 bed, fasta, fai, bed - stub": {
"content": [
@@ -1008,9 +1008,9 @@
}
],
"meta": {
- "nf-test": "0.9.0",
+ "nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-24T11:09:38.575037214"
+ "timestamp": "2024-08-09T12:35:16.395171025"
}
}
\ No newline at end of file
diff --git a/subworkflows/local/snv_annotation/tests/main.nf.test.snap b/subworkflows/local/snv_annotation/tests/main.nf.test.snap
index bc3feffa..c8c3f045 100644
--- a/subworkflows/local/snv_annotation/tests/main.nf.test.snap
+++ b/subworkflows/local/snv_annotation/tests/main.nf.test.snap
@@ -153,6 +153,39 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-05T11:12:17.324211719"
+ "timestamp": "2024-08-09T12:31:54.375357945"
+ },
+ "bcf, db, vep_cache, '110', -stub": {
+ "content": [
+ [
+ "versions.yml:md5,797275193dd19766e99030e63c23bd5f",
+ "versions.yml:md5,992301857689684643c42695c032a7f2",
+ "versions.yml:md5,a07924ee4ebc2d4de5bb7ef897ddc30c",
+ "versions.yml:md5,c0e55e36a31ed71acf25702b7d059533"
+ ],
+ [
+ [
+ {
+ "id": "test_data.bed",
+ "contains_affected": false
+ },
+ "test_data.bed.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test_data.bed",
+ "contains_affected": false
+ },
+ "test_data.bed.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-09T12:32:10.118218292"
}
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/structural_variant_calling.nf b/subworkflows/local/structural_variant_calling.nf
index ace51636..be738c63 100644
--- a/subworkflows/local/structural_variant_calling.nf
+++ b/subworkflows/local/structural_variant_calling.nf
@@ -5,7 +5,6 @@ workflow STRUCTURAL_VARIANT_CALLING {
take:
ch_bam_bai // channel: [ val(meta), [[ bam ], [bai]] ]
- ch_snfs
ch_fasta
ch_fai
ch_tandem_repeats
@@ -15,13 +14,10 @@ workflow STRUCTURAL_VARIANT_CALLING {
SNIFFLES (ch_bam_bai, ch_fasta, ch_tandem_repeats, true, true)
- // Combine sniffles output with supplied extra snfs
SNIFFLES.out.snf
- .map{ it [1] }
- .concat(ch_snfs.map{ it[1] })
- .collect()
- .sort{ it.name }
- .map { snfs -> [ [id:'multisample'], snfs, [] ] }
+ .map { meta, snf -> [ [ 'id': meta.project ], snf ] }
+ .groupTuple()
+ .map { meta, snfs -> [ meta, snfs, [] ] }
.set{ ch_multisample_input }
SNIFFLES_MULTISAMPLE( ch_multisample_input, ch_fasta, ch_tandem_repeats, true, false )
diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
index ff0ca7b7..c25f7f3f 100644
--- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
@@ -200,7 +200,16 @@ workflow PIPELINE_INITIALISATION {
}
}
-
+ // Check that there's no more than one project
+ // TODO: Try to do this in nf-schema
+ ch_samplesheet
+ .map { meta, reads -> meta.project }
+ .unique()
+ .collect()
+ .filter{ it.size() == 1 }
+ .ifEmpty {
+ error("Only one project may be specified per run")
+ }
emit:
samplesheet = ch_samplesheet
versions = ch_versions
diff --git a/tests/main.nf.test b/tests/main.nf.test
index d2ba3580..04bb42d9 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -14,7 +14,7 @@ nextflow_pipeline {
pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/'
// Test files
fasta = params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz'
- input = params.pipelines_testdata_base_path + 'nallo/testdata/samplesheet.csv'
+ input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet.csv'
bed = params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed'
hificnv_xy = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XY.bed'
hificnv_xx = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XX.bed'
@@ -41,10 +41,10 @@ nextflow_pipeline {
{ assert workflow.success },
// Assert with snapshot multisample
{ assert snapshot(
- file("$outputDir/pipeline_info/multisample.ped"),
- file("$outputDir/qc_aligned_reads/somalier/relate/multisample/multisample.pairs.tsv"),
- file("$outputDir/qc_aligned_reads/somalier/relate/multisample/multisample.samples.tsv"),
- file("$outputDir/qc_aligned_reads/somalier/relate/multisample/multisample.html"),
+ file("$outputDir/pedigree/test.ped"),
+ file("$outputDir/qc_aligned_reads/somalier/relate/test/test.pairs.tsv"),
+ file("$outputDir/qc_aligned_reads/somalier/relate/test/test.samples.tsv"),
+ file("$outputDir/qc_aligned_reads/somalier/relate/test/test.html"),
file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"),
file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
file("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt"),
@@ -84,22 +84,22 @@ nextflow_pipeline {
file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_sorted.vcf.gz"),
file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_sorted.vcf.gz.tbi"),
bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_spanning_sorted.bam", stringency: 'silent').getReadsMD5(),
- file("$outputDir/snvs/stats/single_sample/HG002_Revio.vcf.gz.bcftools_stats.txt"),
+ file("$outputDir/snvs/stats/single_sample/HG002_Revio.vcf.gz.bcftools_stats.txt").readLines()[0..2],
).match() },
// Assert exists multisample
- { assert new File("$outputDir/databases/echtvar/encode/multisample/multisample.zip").exists() },
+ { assert new File("$outputDir/databases/echtvar/encode/test/test.zip").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc_data.json").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc.log").exists() },
{ assert new File("$outputDir/multiqc/multiqc_report.html").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt").exists() },
- { assert new File("$outputDir/repeat_calling/trgt/multi_sample/multisample/multisample.vcf.gz").exists() },
- { assert new File("$outputDir/repeat_calling/trgt/multi_sample/multisample/multisample.vcf.gz.tbi").exists() },
- { assert new File("$outputDir/snvs/multi_sample/multisample/multisample_snv_annotated_ranked.vcf.gz").exists() },
- { assert new File("$outputDir/snvs/multi_sample/multisample/multisample_snv_annotated_ranked.vcf.gz.tbi").exists() },
- { assert new File("$outputDir/sv_calling/sniffles/multi_sample/multisample_sniffles.vcf.gz").exists() },
- { assert new File("$outputDir/sv_calling/sniffles/multi_sample/multisample_sniffles.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz").exists() },
+ { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz.tbi").exists() },
// Assert exists HG002_Revio
{ assert new File("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam.bai").exists() },
{ assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam.bai").exists() },
@@ -146,7 +146,7 @@ nextflow_pipeline {
pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/'
// Test files
fasta = params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz'
- input = params.pipelines_testdata_base_path + 'nallo/testdata/samplesheet_multisample_bam.csv'
+ input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet_multisample_bam.csv'
bed = params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed'
hificnv_xy = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XY.bed'
hificnv_xx = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XX.bed'
@@ -173,7 +173,7 @@ nextflow_pipeline {
{ assert workflow.success },
// Assert with snapshot multisample
{ assert snapshot(
- file("$outputDir/pipeline_info/multisample.ped"),
+ file("$outputDir/pedigree/test.ped"),
file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"),
file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
// Assert with snapshot HG002_Revio_A
@@ -212,7 +212,7 @@ nextflow_pipeline {
file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_sorted.vcf.gz"),
file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_sorted.vcf.gz.tbi"),
bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam", stringency: 'silent').getReadsMD5(),
- file("$outputDir/snvs/stats/single_sample/HG002_Revio_A.vcf.gz.bcftools_stats.txt"),
+ file("$outputDir/snvs/stats/single_sample/HG002_Revio_A.vcf.gz.bcftools_stats.txt").readLines()[0..2],
// Assert with snapshot HG002_Revio_B
bam("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam", stringency: 'silent').getReadsMD5(),
file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap1.p_ctg.assembly_summary"),
@@ -249,13 +249,13 @@ nextflow_pipeline {
file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_sorted.vcf.gz"),
file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_sorted.vcf.gz.tbi"),
bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_spanning_sorted.bam", stringency: 'silent').getReadsMD5(),
- file("$outputDir/snvs/stats/single_sample/HG002_Revio_B.vcf.gz.bcftools_stats.txt"),
+ file("$outputDir/snvs/stats/single_sample/HG002_Revio_B.vcf.gz.bcftools_stats.txt").readLines()[0..2],
).match() },
// Assert exists multisample - note the trgt multisample that doesn't exist in singlesample
- { assert new File("$outputDir/databases/echtvar/encode/multisample/multisample.zip").exists() },
- { assert new File("$outputDir/qc_aligned_reads/somalier/relate/multisample/multisample.pairs.tsv").exists() },
- { assert new File("$outputDir/qc_aligned_reads/somalier/relate/multisample/multisample.samples.tsv").exists() },
- { assert new File("$outputDir/qc_aligned_reads/somalier/relate/multisample/multisample.html").exists() },
+ { assert new File("$outputDir/databases/echtvar/encode/test/test.zip").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.pairs.tsv").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.samples.tsv").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.html").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc_data.json").exists() },
@@ -263,12 +263,12 @@ nextflow_pipeline {
{ assert new File("$outputDir/multiqc/multiqc_report.html").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists() },
{ assert new File("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt").exists() },
- { assert new File("$outputDir/snvs/multi_sample/multisample/multisample_snv_annotated_ranked.vcf.gz").exists() },
- { assert new File("$outputDir/snvs/multi_sample/multisample/multisample_snv_annotated_ranked.vcf.gz.tbi").exists() },
- { assert new File("$outputDir/sv_calling/sniffles/multi_sample/multisample_sniffles.vcf.gz").exists() },
- { assert new File("$outputDir/sv_calling/sniffles/multi_sample/multisample_sniffles.vcf.gz.tbi").exists() },
- { assert new File("$outputDir/repeat_calling/trgt/multi_sample/multisample/multisample.vcf.gz").exists() },
- { assert new File("$outputDir/repeat_calling/trgt/multi_sample/multisample/multisample.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz").exists() },
+ { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() },
// Assert exists HG002_Revio_A
{ assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() },
{ assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam.bai").exists() },
diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
index 942dcf0c..31a1d818 100644
--- a/tests/main.nf.test.snap
+++ b/tests/main.nf.test.snap
@@ -1,10 +1,10 @@
{
"test profile": {
"content": [
- "multisample.ped:md5,f52b1fb9647cb255313b9602841481fd",
- "multisample.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595",
- "multisample.samples.tsv:md5,1685dc6cb8c6b9806ca636662980d686",
- "multisample.html:md5,d05e0eceb70ada3a0c25f99a16ad1889",
+ "test.ped:md5,bd5cec27ba7337a85cf98e787131e2b5",
+ "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595",
+ "test.samples.tsv:md5,1685dc6cb8c6b9806ca636662980d686",
+ "test.html:md5,d05e0eceb70ada3a0c25f99a16ad1889",
"multiqc_citations.txt:md5,a27affce20d456d20ed387097a4f0350",
"multiqc_fastqc.txt:md5,055c2c156136798feeb1658adf905e95",
"multiqc_somalier.txt:md5,20b4c5b2d5b94b77fb800548e07a874e",
@@ -43,17 +43,21 @@
"HG002_Revio_sorted.vcf.gz:md5,bc06de08b8e36b3b48e0d7b9e21df389",
"HG002_Revio_sorted.vcf.gz.tbi:md5,08a5c82838264c558eb30726906f47e0",
"110181f29066158df34abbad9e3becc8",
- "HG002_Revio.vcf.gz.bcftools_stats.txt:md5,ba63db6ea639dc7080fabdedf19779b4"
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats HG002_Revio.vcf.gz",
+ "#"
+ ]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-26T10:41:39.74321081"
+ "timestamp": "2024-08-09T13:23:09.224424657"
},
"test profile - multisample": {
"content": [
- "multisample.ped:md5,2d69697ac006715f975502a6578c9d1f",
+ "test.ped:md5,a1e82af069bce823564e204c316d5500",
"multiqc_citations.txt:md5,a27affce20d456d20ed387097a4f0350",
"multiqc_fastqc.txt:md5,234f2958710c30f62446a9406cbfcaae",
"74b4822241bd8d1bc42f494f1f3e326c",
@@ -91,7 +95,11 @@
"HG002_Revio_A_sorted.vcf.gz:md5,b95e709a27fe1df9ee1487b99f396bf4",
"HG002_Revio_A_sorted.vcf.gz.tbi:md5,b1eb1f21f36782089b8e0bb0a54105ed",
"110181f29066158df34abbad9e3becc8",
- "HG002_Revio_A.vcf.gz.bcftools_stats.txt:md5,735742ed6775d3c3a22966ce9080b1ce",
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats HG002_Revio_A.vcf.gz",
+ "#"
+ ],
"fe7bb70701d1100b2874c10a512a2144",
"HG002_Revio_B.asm.bp.hap1.p_ctg.assembly_summary:md5,4941730ceacb4012e771208be7a6673a",
"HG002_Revio_B.asm.bp.hap2.p_ctg.assembly_summary:md5,be7dcb093d25922b72ef0f7bc1bf0706",
@@ -127,12 +135,16 @@
"HG002_Revio_B_sorted.vcf.gz:md5,05ae66b46d2f87a2133fcdf93d30f38c",
"HG002_Revio_B_sorted.vcf.gz.tbi:md5,244a3f966e3434220cd69fcb04b08d01",
"18e3bd1fe43fc17ace2f57db5861498c",
- "HG002_Revio_B.vcf.gz.bcftools_stats.txt:md5,999ab680e9f012d0f1cb6f7aaafdc772"
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats HG002_Revio_B.vcf.gz",
+ "#"
+ ]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-07-26T10:46:15.4519051"
+ "timestamp": "2024-08-09T13:26:52.566315569"
}
}
\ No newline at end of file
diff --git a/workflows/nallo.nf b/workflows/nallo.nf
index aead21cf..9cc76c2d 100644
--- a/workflows/nallo.nf
+++ b/workflows/nallo.nf
@@ -32,6 +32,7 @@ include { STRUCTURAL_VARIANT_CALLING } from '../subworkflows/local/stru
*/
// local
+include { CREATE_PEDIGREE_FILE } from '../modules/local/create_pedigree_file'
include { ECHTVAR_ENCODE } from '../modules/local/echtvar/encode/main'
include { FQCRS } from '../modules/local/fqcrs'
include { SAMTOOLS_MERGE } from '../modules/nf-core/samtools/merge/main'
@@ -110,7 +111,17 @@ workflow NALLO {
if (params.phaser.matches('hiphase_sv|hiphase_snv') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" }
// Create PED from samplesheet
- ch_pedfile = ch_input.toList().map { file(CustomFunctions.makePed(it, params.outdir)) }
+ ch_input
+ .map { meta, files -> [ meta.project, meta ] }
+ .groupTuple()
+ .set { ch_ped_in }
+
+ ch_pedfile = CREATE_PEDIGREE_FILE ( ch_ped_in )
+ ch_versions = ch_versions.mix(CREATE_PEDIGREE_FILE.out.versions)
+
+ CREATE_PEDIGREE_FILE.out.ped
+ .map { project, ped -> [ [ 'id': project ], ped ] }
+ .set { ch_pedfile }
//
// Convert BAM files to FASTQ
@@ -284,8 +295,7 @@ workflow NALLO {
//
// Call SVs with Sniffles2
- //
- STRUCTURAL_VARIANT_CALLING( bam_bai , ch_extra_snfs, fasta, fai, ch_tandem_repeats )
+ STRUCTURAL_VARIANT_CALLING( bam_bai, fasta, fai, ch_tandem_repeats )
ch_versions = ch_versions.mix(STRUCTURAL_VARIANT_CALLING.out.versions)
//
@@ -356,7 +366,7 @@ workflow NALLO {
// Only run if we have affected individuals
RANK_VARIANTS_SNV (
ANN_CSQ_PLI_SNV.out.vcf_ann.filter { meta, vcf -> meta.contains_affected },
- ch_pedfile,
+ ch_pedfile.map { meta, ped -> ped },
ch_reduced_penetrance,
ch_score_config_snv
)
@@ -382,7 +392,7 @@ workflow NALLO {
}
ch_vcf_tbi_per_region
- .map { meta, vcf, tbi -> [ [ id: 'multisample' ], vcf, tbi ] }
+ .map { meta, vcf, tbi -> [ [ id: meta.project ], vcf, tbi ] }
.groupTuple()
.set { ch_bcftools_concat_in }