Skip to content

Commit

Permalink
replace dots with underscores
Browse files Browse the repository at this point in the history
  • Loading branch information
nvnieuwk committed Nov 20, 2024
1 parent deac9d5 commit ecf67ab
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1. Validation of all samples now use an intersect of the golden truth BED files with the BED file used to call the variants. This should fix the WES validation which was broken until this point.
2. A couple of small fixes to the vardict flow.
3. Only use the standard chromosomes for UPDio analysis.
4. Dots in sample and family names are now converted to an underscore automatically.

## v1.8.2 - Outstanding Oostkamp - [September 30 2024]

Expand Down
8 changes: 4 additions & 4 deletions assets/samplesheet.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample,family,cram,crai,roi,ped,truth_vcf,truth_tbi,truth_bed,vardict_min_af
NA24143,Proband_12345,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24143.cram,,,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/test.ped,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz.tbi,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/regions/roi.bed,0.01
NA24149,Proband_12345,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24149.cram,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24149.cram.crai,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/regions/roi.bed,,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24149.vcf.gz,,,
NA24385,Proband_12345,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24385.cram,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24385.cram.crai,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/regions/roi.bed,,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24385.vcf.gz,,,
sample,family,cram,crai,roi,truth_vcf,truth_tbi,truth_bed,vardict_min_af
NA24143,Proband_12.345,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24143.cram,,,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz.tbi,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/regions/roi.bed,0.01
NA24149,Proband_12.345,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24149.cram,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24149.cram.crai,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/regions/roi.bed,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24149.vcf.gz,,,
NA24385,Proband_12.345,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24385.cram,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/crams/NA24385.cram.crai,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/regions/roi.bed,https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/illumina/vcfs/NA24385.vcf.gz,,,
8 changes: 4 additions & 4 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
"properties": {
"sample": {
"type": "string",
"pattern": "^[^\\. ]+$",
"pattern": "^[a-zA-Z0-9_\\.]+$",
"meta": ["id", "sample"],
"errorMessage": "Sample name should be a string that does not contain dots or spaces"
"errorMessage": "Sample name should be a string that may contain underscores (_) and dots (.)"
},
"family": {
"type": "string",
"pattern": "^[^\\. ]+$",
"pattern": "^[a-zA-Z0-9_\\.]+$",
"meta": ["family"],
"errorMessage": "Family name should be a string that does not contain dots or spaces"
"errorMessage": "Family name should be a string that may contain underscores (_) and dots (.)"
},
"cram": {
"oneOf": [
Expand Down
1 change: 1 addition & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ params {
elfasta = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.elfasta"
sdf = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000_sdf.tar.gz"
strtablefile = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.strtable.zip"
ped = "https://github.com/nf-cmgg/test-datasets/raw/germline/data/genomics/homo_sapiens/genome/test_dots.ped"

// Pipeline specific parameters
filter = true
Expand Down
8 changes: 4 additions & 4 deletions lib/Pedigree.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,15 @@ class PedigreeEntry {
// Family ID
id = lineSplit[0]
if (id ==~ idRegex) {
familyId = id
familyId = id.replace(".", "_") // Replace dots with underscores to prevent breaking the multiqc report
} else {
exceptions.add("Invalid family ID (${id}). It should only contain these characters: a-z, A-Z, 0-9, _ and ." as String)
}

// Individual ID
id = lineSplit[1]
if (id ==~ idRegex) {
individualId = id
individualId = id.replace(".", "_") // Replace dots with underscores to prevent breaking the multiqc report
} else {
exceptions.add("Invalid individual ID (${id}). It should only contain these characters: a-z, A-Z, 0-9, _ and ." as String)
}
Expand All @@ -102,15 +102,15 @@ class PedigreeEntry {
// Paternal ID
id = lineSplit[2]
if (id ==~ idRegex) {
paternalId = id
paternalId = id.replace(".", "_") // Replace dots with underscores to prevent breaking the multiqc report
} else if (!validMissingIDs.contains(id)) {
exceptions.add("Invalid paternal ID (${id}). It should only contain these characters: a-z, A-Z, 0-9, _ and .; Use 0 if the paternal ID is missing" as String)
}

// Maternal ID
id = lineSplit[3]
if (id ==~ idRegex) {
maternalId = id
maternalId = id.replace(".", "_") // Replace dots with underscores to prevent breaking the multiqc report
} else if (!validMissingIDs.contains(id)) {
exceptions.add("Invalid maternal ID (${id}). It should only contain these characters: a-z, A-Z, 0-9, _ and .; Use 0 if the maternal ID is missing" as String)
}
Expand Down
5 changes: 5 additions & 0 deletions subworkflows/local/watchpath_handling/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ workflow WATCHPATH_HANDLING {
def samplesheet_list = samplesheetToList(input_samplesheet, samplesheet_schema)
// Do some calculations and manipulations here
.collect { row ->
// Replace dots with underscores in sample and family names to prevent breaking the multiqc report
row[0].id = row[0].id.replace(".", "_")
row[0].sample = row[0].sample.replace(".", "_")
row[0].family = row[0].family.replace(".", "_")

// Watchpath logic
def is_watch = false
row = row.collect { input ->
Expand Down

0 comments on commit ecf67ab

Please sign in to comment.