From 27a65285f385f5e4c83ca5877904dbf482f455c2 Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Wed, 11 Sep 2024 14:19:04 -0400 Subject: [PATCH 1/5] Update csvtk parameters in FILTER_QUERY to support long command-line strings --- modules/local/filter_query/main.nf | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/modules/local/filter_query/main.nf b/modules/local/filter_query/main.nf index 9912ee5..2647fbe 100644 --- a/modules/local/filter_query/main.nf +++ b/modules/local/filter_query/main.nf @@ -22,24 +22,27 @@ process FILTER_QUERY { def out_delimiter = out_format == "tsv" ? "\t" : (out_format == "csv" ? "," : out_format) def out_extension = out_format == "tsv" ? 'tsv' : 'csv' - // Join the query IDs in the correct csvtk filter2 required format - def queryID = query_ids.collect { id -> "\$id == \"${id}\"" }.join(" || ") + // Write the query IDs to a temporary file + def queryFile = file("query_ids.txt") + queryFile.text = query_ids.join("\n") """ # Filter the query samples only; keep only the 'id' and 'address' columns - csvtk filter2 \\ + csvtk grep \\ ${addresses} \\ - --filter '$queryID' \\ + -f 1 \\ + -P ${queryFile} \\ --delimiter "${delimiter}" \\ --out-delimiter "${out_delimiter}" | \\ csvtk cut -f id,address > ${outputFile}.${out_extension} + # Remove the query_ids file after the command runs + rm -f ${queryFile} + cat <<-END_VERSIONS > versions.yml "${task.process}": csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" )) END_VERSIONS """ - - } From dd42d61069252a803cf8595753bb0878ddb4e5ab Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Thu, 12 Sep 2024 10:56:48 -0400 Subject: [PATCH 2/5] Collect query_ids into a file for FILTER_QUERY --- modules/local/filter_query/main.nf | 10 +--------- workflows/gas_nomenclature.nf | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/modules/local/filter_query/main.nf b/modules/local/filter_query/main.nf index 2647fbe..1635296 100644 --- a/modules/local/filter_query/main.nf +++ b/modules/local/filter_query/main.nf @@ -22,27 +22,19 @@ process FILTER_QUERY { def out_delimiter = out_format == "tsv" ? "\t" : (out_format == "csv" ? "," : out_format) def out_extension = out_format == "tsv" ? 'tsv' : 'csv' - // Write the query IDs to a temporary file - def queryFile = file("query_ids.txt") - queryFile.text = query_ids.join("\n") - """ # Filter the query samples only; keep only the 'id' and 'address' columns csvtk grep \\ ${addresses} \\ -f 1 \\ - -P ${queryFile} \\ + -P ${query_ids} \\ --delimiter "${delimiter}" \\ --out-delimiter "${out_delimiter}" | \\ csvtk cut -f id,address > ${outputFile}.${out_extension} - # Remove the query_ids file after the command runs - rm -f ${queryFile} - cat <<-END_VERSIONS > versions.yml "${task.process}": csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" )) END_VERSIONS """ } - diff --git a/workflows/gas_nomenclature.nf b/workflows/gas_nomenclature.nf index 8972669..4daaf71 100644 --- a/workflows/gas_nomenclature.nf +++ b/workflows/gas_nomenclature.nf @@ -144,7 +144,7 @@ workflow GAS_NOMENCLATURE { ch_versions = ch_versions.mix(called_data.versions) // Filter the new queried samples and addresses into a CSV/JSON file for the IRIDANext plug in - query_ids = profiles.query.collect { it[0].id } + query_ids = profiles.query.collectFile { it[0].id + '\n' } new_addresses = FILTER_QUERY(query_ids, called_data.distances, "tsv", "csv") ch_versions = ch_versions.mix(new_addresses.versions) From 3d7a03471ed2823885bb449075e677ca4eccf7bc Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Thu, 12 Sep 2024 16:33:58 -0400 Subject: [PATCH 3/5] Updated CHANGELOG.md --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fab4ffb..421db40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.2.2] - 2024/09/.. + +### `Changed` + +- Updated FILTER_QUERY to process query IDs from a file rather than passing them as a string, preventing errors caused by long argument strings [PR24](https://github.com/phac-nml/gasnomenclature/pull/24) + ## [0.2.1] - 2024/09/10 ### `Changed` @@ -29,3 +35,5 @@ Initial release of the Genomic Address Nomenclature pipeline to be used to assig [0.1.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.1.0 [0.2.0]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.2.0 +[0.2.1]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.2.1 +[0.2.2]: https://github.com/phac-nml/gasnomenclature/releases/tag/0.2.2 From 72dff81cd881e6b93fba26a4bee42cda07eeaa48 Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Thu, 12 Sep 2024 16:40:37 -0400 Subject: [PATCH 4/5] Update versioning in config files --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index dbc1487..fe19697 100644 --- a/nextflow.config +++ b/nextflow.config @@ -222,7 +222,7 @@ manifest { description = """Gas Nomenclature assignment pipeline""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.2.1' + version = '0.2.2' doi = '' defaultBranch = 'main' } From 6cfe9ba7113edc44988c275117893f54e1ce2f55 Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Fri, 13 Sep 2024 15:23:52 -0400 Subject: [PATCH 5/5] Update release date in CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 421db40..fe7b8fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.2.2] - 2024/09/.. +## [0.2.2] - 2024/09/13 ### `Changed`