diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b7a138..67ef724 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.2.0] - 2024/09/05 + +### `Changed` + +- Upgraded `locidex/merge` to version `0.2.3` and updated `input_assure.py` and test data for compatibility with the new `mlst.json` allele file format [PR20](https://github.com/phac-nml/gasnomenclature/pull/20) +- Removed `quay.io` docker repository tags from modules [PR19](https://github.com/phac-nml/gasnomenclature/pull/19) + +This pipeline is now compatible only with output generated by [Locidex v0.2.3+](https://github.com/phac-nml/locidex) and [Mikrokondo v0.4.0+](https://github.com/phac-nml/mikrokondo/releases/tag/v0.4.0). + ## [0.1.0] - 2024/06/28 Initial release of the Genomic Address Nomenclature pipeline to be used to assign cluster addresses to samples based on an existing cluster designations. @@ -13,3 +22,4 @@ Initial release of the Genomic Address Nomenclature pipeline to be used to assig - Output of assigned cluster addresses for any **query** samples using [profile_dists](https://github.com/phac-nml/profile_dists) and [gas call](https://github.com/phac-nml/genomic_address_service). [0.1.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.1.0 +[0.2.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.2.0 diff --git a/bin/input_assure.py b/bin/input_assure.py index d99bf2a..e2b7ac1 100755 --- a/bin/input_assure.py +++ b/bin/input_assure.py @@ -19,18 +19,22 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f with open_file(json_file, "rt") as f: json_data = json.load(f) + # Extract the profile from the json_data + profile = json_data.get("data", {}).get("profile", {}) + # Check for multiple keys in the JSON file and define error message + keys = sorted(profile.keys()) + original_key = keys[0] if keys else None + # Define a variable to store the match_status (True or False) - match_status = sample_id in json_data + match_status = sample_id in profile # Initialize the error message error_message = None - # Check for multiple keys in the JSON file and define error message - keys = list(json_data.keys()) - original_key = keys[0] if keys else None - - if len(keys) == 0: - error_message = f"{json_file} is completely empty!" + if not keys: + error_message = ( + f"{json_file} is missing the 'profile' section or is completely empty!" + ) print(error_message) sys.exit(1) elif len(keys) > 1: @@ -38,11 +42,11 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f if not match_status: error_message = f"No key in the MLST JSON file ({json_file}) matches the specified sample ID '{sample_id}'. The first key '{original_key}' has been forcefully changed to '{sample_id}' and all other keys have been removed." # Retain only the specified sample ID - json_data = {sample_id: json_data.pop(original_key)} + json_data["data"]["profile"] = {sample_id: profile.pop(original_key)} else: error_message = f"MLST JSON file ({json_file}) contains multiple keys: {keys}. The MLST JSON file has been modified to retain only the '{sample_id}' entry" - # Remove all keys expect the one matching sample_id - json_data = {sample_id: json_data[sample_id]} + # Retain only the specified sample_id in the profile + json_data["data"]["profile"] = {sample_id: profile[sample_id]} elif not match_status: # Define error message based on meta.address (query or reference) if address == "null": @@ -50,7 +54,8 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f else: error_message = f"Reference {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness." # Update the JSON file with the new sample ID - json_data[sample_id] = json_data.pop(original_key) + json_data["data"]["profile"] = {sample_id: profile.pop(original_key)} + json_data["data"]["sample_name"] = sample_id # Write file containing relevant error messages if error_message: diff --git a/modules/local/gas/call/main.nf b/modules/local/gas/call/main.nf index 3216c9e..4832e35 100644 --- a/modules/local/gas/call/main.nf +++ b/modules/local/gas/call/main.nf @@ -6,7 +6,7 @@ process GAS_CALL{ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' : - 'quay.io/biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }" + 'biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }" input: diff --git a/modules/local/locidex/merge/main.nf b/modules/local/locidex/merge/main.nf index 7721625..170cb88 100644 --- a/modules/local/locidex/merge/main.nf +++ b/modules/local/locidex/merge/main.nf @@ -5,8 +5,9 @@ process LOCIDEX_MERGE { label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/locidex:0.1.1--pyhdfd78af_0' : - 'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }" + "docker.io/mwells14/locidex:0.2.3" : + task.ext.override_configured_container_registry != false ? 'docker.io/mwells14/locidex:0.2.3' : + 'mwells14/locidex:0.2.3' }" input: path input_values // [file(sample1), file(sample2), file(sample3), etc...] diff --git a/modules/local/profile_dists/main.nf b/modules/local/profile_dists/main.nf index f43d63b..b6b92de 100644 --- a/modules/local/profile_dists/main.nf +++ b/modules/local/profile_dists/main.nf @@ -4,7 +4,7 @@ process PROFILE_DISTS{ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/profile_dists%3A1.0.0--pyh7cba7a3_0' : - 'quay.io/biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }" + 'biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }" input: path query diff --git a/nextflow.config b/nextflow.config index 423d59c..1882e6f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -168,6 +168,9 @@ docker.registry = 'quay.io' podman.registry = 'quay.io' singularity.registry = 'quay.io' +// Override the default Docker registry when required +process.ext.override_configured_container_registry = true + // Nextflow plugins plugins { id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet @@ -219,7 +222,7 @@ manifest { description = """Gas Nomenclature assignment pipeline""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.1.0' + version = '0.2.0' doi = '' defaultBranch = 'main' } diff --git a/tests/data/reports/sample1.mlst.json b/tests/data/reports/sample1.mlst.json index 01bc774..63a71b4 100644 --- a/tests/data/reports/sample1.mlst.json +++ b/tests/data/reports/sample1.mlst.json @@ -1,7 +1,21 @@ { - "sample1": { - "l1": "1", - "l2": "1", - "l3": "1" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "sample1", + "profile": { + "sample1": { + "l1": "1", + "l2": "1", + "l3": "1" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sample1.mlst.json.gz b/tests/data/reports/sample1.mlst.json.gz index 735e108..512cf98 100644 Binary files a/tests/data/reports/sample1.mlst.json.gz and b/tests/data/reports/sample1.mlst.json.gz differ diff --git a/tests/data/reports/sample2.mlst.json b/tests/data/reports/sample2.mlst.json index 7c0426c..3d9ee23 100644 --- a/tests/data/reports/sample2.mlst.json +++ b/tests/data/reports/sample2.mlst.json @@ -1,7 +1,21 @@ { - "sample2": { - "l1": "1", - "l2": "1", - "l3": "1" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "sample2", + "profile": { + "sample2": { + "l1": "1", + "l2": "1", + "l3": "1" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sample2_missing.mlst.json b/tests/data/reports/sample2_missing.mlst.json index 113e15b..58c2d70 100644 --- a/tests/data/reports/sample2_missing.mlst.json +++ b/tests/data/reports/sample2_missing.mlst.json @@ -1,7 +1,21 @@ { - "sample2": { - "l1": "-", - "l2": "1", - "l3": "1" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "sample2", + "profile": { + "sample2": { + "l1": "-", + "l2": "1", + "l3": "1" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sample3.mlst.json b/tests/data/reports/sample3.mlst.json index 43ea3c7..d57ee75 100644 --- a/tests/data/reports/sample3.mlst.json +++ b/tests/data/reports/sample3.mlst.json @@ -1,7 +1,21 @@ { - "sample3": { - "l1": "1", - "l2": "1", - "l3": "2" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "sample3", + "profile": { + "sample3": { + "l1": "1", + "l2": "1", + "l3": "2" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sample3_missing.mlst.json b/tests/data/reports/sample3_missing.mlst.json index 49942f8..ac6eab5 100644 --- a/tests/data/reports/sample3_missing.mlst.json +++ b/tests/data/reports/sample3_missing.mlst.json @@ -1,7 +1,21 @@ { - "sample3": { - "l1": "-", - "l2": "1", - "l3": "2" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "sample3", + "profile": { + "sample3": { + "l1": "-", + "l2": "1", + "l3": "2" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sample3_multiplekeys.mlst.json b/tests/data/reports/sample3_multiplekeys.mlst.json index 5d85e65..8b1fc52 100644 --- a/tests/data/reports/sample3_multiplekeys.mlst.json +++ b/tests/data/reports/sample3_multiplekeys.mlst.json @@ -1,12 +1,26 @@ { - "extra_key": { - "l1": "1", - "l2": "1", - "l3": "2" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 }, - "sample3": { - "l1": "1", - "l2": "1", - "l3": "2" + "data": { + "sample_name": "sample3", + "profile": { + "extra_key": { + "l1": "1", + "l2": "1", + "l3": "2" + }, + "sample3": { + "l1": "1", + "l2": "1", + "l3": "2" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sample3_multiplekeys_nomatch.mlst.json b/tests/data/reports/sample3_multiplekeys_nomatch.mlst.json index 6d7878d..42f8bd8 100644 --- a/tests/data/reports/sample3_multiplekeys_nomatch.mlst.json +++ b/tests/data/reports/sample3_multiplekeys_nomatch.mlst.json @@ -1,12 +1,26 @@ { - "sample4": { - "l1": "1", - "l2": "1", - "l3": "2" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 }, - "extra_key": { - "l1": "1", - "l2": "1", - "l3": "2" + "data": { + "sample_name": "sample4", + "profile": { + "sample4": { + "l1": "1", + "l2": "1", + "l3": "2" + }, + "extra_key": { + "l1": "1", + "l2": "1", + "l3": "2" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sample7.mlst.json b/tests/data/reports/sample7.mlst.json index 41d6312..2e33871 100644 --- a/tests/data/reports/sample7.mlst.json +++ b/tests/data/reports/sample7.mlst.json @@ -1,7 +1,21 @@ { - "sample7": { - "l1": "1", - "l2": "1", - "l3": "1" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "sample7", + "profile": { + "sample7": { + "l1": "1", + "l2": "1", + "l3": "1" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sampleF.mlst.json b/tests/data/reports/sampleF.mlst.json index 8c09d39..4792244 100644 --- a/tests/data/reports/sampleF.mlst.json +++ b/tests/data/reports/sampleF.mlst.json @@ -1,7 +1,21 @@ { - "sampleF": { - "l1": "1", - "l2": "2", - "l3": "1" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "sampleF", + "profile": { + "sampleF": { + "l1": "1", + "l2": "2", + "l3": "1" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sampleN.mlst.json b/tests/data/reports/sampleN.mlst.json index 178b6db..fd60cec 100644 --- a/tests/data/reports/sampleN.mlst.json +++ b/tests/data/reports/sampleN.mlst.json @@ -1,7 +1,21 @@ { - "sampleN": { - "l1": "1", - "l2": "2", - "l3": "1" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "sampleN", + "profile": { + "sampleN": { + "l1": "1", + "l2": "2", + "l3": "1" + } + }, + "seq_data": {} } } diff --git a/tests/data/reports/sampleQ.mlst.json b/tests/data/reports/sampleQ.mlst.json index c6cca43..a8f60ba 100644 --- a/tests/data/reports/sampleQ.mlst.json +++ b/tests/data/reports/sampleQ.mlst.json @@ -1,7 +1,21 @@ { - "sampleQ": { - "l1": "1", - "l2": "2", - "l3": "1" + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "sampleQ", + "profile": { + "sampleQ": { + "l1": "1", + "l2": "2", + "l3": "1" + } + }, + "seq_data": {} } } diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test index 223b7de..97a935e 100644 --- a/tests/pipelines/main.nf.test +++ b/tests/pipelines/main.nf.test @@ -317,7 +317,7 @@ nextflow_pipeline { // Ensure that the error_reports are generated for query and reference samples lines = path("$launchDir/results/input/sample3_error_report.csv").readLines() - assert lines.contains("sample3,\"[\'sample4\', \'extra_key\']\",No key in the MLST JSON file (sample3_multiplekeys_nomatch.mlst.json) matches the specified sample ID \'sample3\'. The first key \'sample4\' has been forcefully changed to \'sample3\' and all other keys have been removed.") + assert lines.contains('sample3,"[\'extra_key\', \'sample4\']",No key in the MLST JSON file (sample3_multiplekeys_nomatch.mlst.json) matches the specified sample ID \'sample3\'. The first key \'extra_key\' has been forcefully changed to \'sample3\' and all other keys have been removed.') // Check filtered query csv results lines = path("$launchDir/results/filter/new_addresses.csv").readLines() @@ -351,7 +351,7 @@ nextflow_pipeline { then { assert workflow.failed - assert (workflow.stdout =~ /sample2_empty.mlst.json is completely empty!/).find() + assert (workflow.stdout =~ /sample2_empty.mlst.json is missing the 'profile' section or is completely empty!/).find() } } }