Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to version 0.2.0 #21

Merged
merged 11 commits into from
Sep 5, 2024
Merged
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.2.0] - 2024/09/05

### `Changed`

- Upgraded `locidex/merge` to version `0.2.3` and updated `input_assure.py` and test data for compatibility with the new `mlst.json` allele file format [PR20](https://github.com/phac-nml/gasnomenclature/pull/20)
- Removed `quay.io` docker repository tags from modules [PR19](https://github.com/phac-nml/gasnomenclature/pull/19)

This pipeline is now compatible only with output generated by [Locidex v0.2.3+](https://github.com/phac-nml/locidex) and [Mikrokondo v0.4.0+](https://github.com/phac-nml/mikrokondo/releases/tag/v0.4.0).

## [0.1.0] - 2024/06/28

Initial release of the Genomic Address Nomenclature pipeline to be used to assign cluster addresses to samples based on an existing cluster designations.
Expand All @@ -13,3 +22,4 @@ Initial release of the Genomic Address Nomenclature pipeline to be used to assig
- Output of assigned cluster addresses for any **query** samples using [profile_dists](https://github.com/phac-nml/profile_dists) and [gas call](https://github.com/phac-nml/genomic_address_service).

[0.1.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.1.0
[0.2.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.2.0
27 changes: 16 additions & 11 deletions bin/input_assure.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,43 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f
with open_file(json_file, "rt") as f:
json_data = json.load(f)

# Extract the profile from the json_data
profile = json_data.get("data", {}).get("profile", {})
# Check for multiple keys in the JSON file and define error message
keys = sorted(profile.keys())
original_key = keys[0] if keys else None

# Define a variable to store the match_status (True or False)
match_status = sample_id in json_data
match_status = sample_id in profile

# Initialize the error message
error_message = None

# Check for multiple keys in the JSON file and define error message
keys = list(json_data.keys())
original_key = keys[0] if keys else None

if len(keys) == 0:
error_message = f"{json_file} is completely empty!"
if not keys:
error_message = (
f"{json_file} is missing the 'profile' section or is completely empty!"
)
print(error_message)
sys.exit(1)
elif len(keys) > 1:
# Check if sample_id matches any key
if not match_status:
error_message = f"No key in the MLST JSON file ({json_file}) matches the specified sample ID '{sample_id}'. The first key '{original_key}' has been forcefully changed to '{sample_id}' and all other keys have been removed."
# Retain only the specified sample ID
json_data = {sample_id: json_data.pop(original_key)}
json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
else:
error_message = f"MLST JSON file ({json_file}) contains multiple keys: {keys}. The MLST JSON file has been modified to retain only the '{sample_id}' entry"
# Remove all keys expect the one matching sample_id
json_data = {sample_id: json_data[sample_id]}
# Retain only the specified sample_id in the profile
json_data["data"]["profile"] = {sample_id: profile[sample_id]}
elif not match_status:
# Define error message based on meta.address (query or reference)
if address == "null":
error_message = f"Query {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
else:
error_message = f"Reference {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness."
# Update the JSON file with the new sample ID
json_data[sample_id] = json_data.pop(original_key)
json_data["data"]["profile"] = {sample_id: profile.pop(original_key)}
json_data["data"]["sample_name"] = sample_id

# Write file containing relevant error messages
if error_message:
Expand Down
2 changes: 1 addition & 1 deletion modules/local/gas/call/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ process GAS_CALL{

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' :
'quay.io/biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"
'biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"


input:
Expand Down
5 changes: 3 additions & 2 deletions modules/local/locidex/merge/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ process LOCIDEX_MERGE {
label 'process_medium'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/locidex:0.1.1--pyhdfd78af_0' :
'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }"
"docker.io/mwells14/locidex:0.2.3" :
task.ext.override_configured_container_registry != false ? 'docker.io/mwells14/locidex:0.2.3' :
'mwells14/locidex:0.2.3' }"

input:
path input_values // [file(sample1), file(sample2), file(sample3), etc...]
Expand Down
2 changes: 1 addition & 1 deletion modules/local/profile_dists/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ process PROFILE_DISTS{

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/profile_dists%3A1.0.0--pyh7cba7a3_0' :
'quay.io/biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }"
'biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }"

input:
path query
Expand Down
5 changes: 4 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ docker.registry = 'quay.io'
podman.registry = 'quay.io'
singularity.registry = 'quay.io'

// Override the default Docker registry when required
process.ext.override_configured_container_registry = true

// Nextflow plugins
plugins {
id '[email protected]' // Validation of pipeline parameters and creation of an input channel from a sample sheet
Expand Down Expand Up @@ -219,7 +222,7 @@ manifest {
description = """Gas Nomenclature assignment pipeline"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
version = '0.1.0'
version = '0.2.0'
doi = ''
defaultBranch = 'main'
}
Expand Down
22 changes: 18 additions & 4 deletions tests/data/reports/sample1.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample1",
"profile": {
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
Binary file modified tests/data/reports/sample1.mlst.json.gz
Binary file not shown.
22 changes: 18 additions & 4 deletions tests/data/reports/sample2.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample2": {
"l1": "1",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample2",
"profile": {
"sample2": {
"l1": "1",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample2_missing.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample2": {
"l1": "-",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample2",
"profile": {
"sample2": {
"l1": "-",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample3.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample3",
"profile": {
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample3_missing.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample3": {
"l1": "-",
"l2": "1",
"l3": "2"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample3",
"profile": {
"sample3": {
"l1": "-",
"l2": "1",
"l3": "2"
}
},
"seq_data": {}
}
}
30 changes: 22 additions & 8 deletions tests/data/reports/sample3_multiplekeys.mlst.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
{
"extra_key": {
"l1": "1",
"l2": "1",
"l3": "2"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
"data": {
"sample_name": "sample3",
"profile": {
"extra_key": {
"l1": "1",
"l2": "1",
"l3": "2"
},
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
}
},
"seq_data": {}
}
}
30 changes: 22 additions & 8 deletions tests/data/reports/sample3_multiplekeys_nomatch.mlst.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
{
"sample4": {
"l1": "1",
"l2": "1",
"l3": "2"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"extra_key": {
"l1": "1",
"l2": "1",
"l3": "2"
"data": {
"sample_name": "sample4",
"profile": {
"sample4": {
"l1": "1",
"l2": "1",
"l3": "2"
},
"extra_key": {
"l1": "1",
"l2": "1",
"l3": "2"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sample7.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sample7": {
"l1": "1",
"l2": "1",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sample7",
"profile": {
"sample7": {
"l1": "1",
"l2": "1",
"l3": "1"
}
},
"seq_data": {}
}
}
22 changes: 18 additions & 4 deletions tests/data/reports/sampleF.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
{
"sampleF": {
"l1": "1",
"l2": "2",
"l3": "1"
"db_info": {},
"parameters": {
"mode": "normal",
"min_match_ident": 100,
"min_match_cov": 100,
"max_ambiguous": 0,
"max_internal_stops": 0
},
"data": {
"sample_name": "sampleF",
"profile": {
"sampleF": {
"l1": "1",
"l2": "2",
"l3": "1"
}
},
"seq_data": {}
}
}
Loading
Loading