diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d23f0f..2015c47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# Unreleased + +- Added documentation for running test cases. +- Added test cases to verify that missing values in CSV will be encoded as empty strings in IRIDA Next JSON file in the sample metadata section. +- Added test cases for passing missing values in a JSON file. + # 0.2.0 - 2024/01/22 - Added support for writing JSON output file when using `-resume` in a pipeline. diff --git a/README.md b/README.md index 7982f5e..b5cf8f0 100644 --- a/README.md +++ b/README.md @@ -387,8 +387,81 @@ iridanext { } ``` +### Missing values in metadata + +There are two different scenarios where metadata key/value pairs could be missing for a sample, which result in different behaviours in IRIDA Next. + +1. **Ignore key**: If the `key` is left out of the samples metadata in the IRIDA Next JSON, then nothing is written for that `key` for the sample. Any existing metadata under that `key` will remain in IRIDA Next. + +2. **Delete key**: If a metadata value is an empty string (`"key": ""`) or null (`"key": null`), then IRIDA Next will remove that particular metadata key/value pair from the sample metadata if it exists. This is the expected scenario if pipeline results contain missing (or N/A) values (deleting older metadata keys prevents mixing up old and new pipeline analysis results in the metadata table). + +The following are the expectations for writing missing values in the final IRIDA Next JSON file (in order to delete the key/value pairs in IRIDA Next). + +#### Encoding missing metadata values using JSON + +If the metadata key `b` for **SAMPLE1** is encoded as an empty string `""` or `null` in the JSON file like the below example: + +**output.json** +```json +{ + "SAMPLE1": { + "a": "value1", + "b": "" + } +} +``` + +Then the final IRIDA Next JSON file will preserve the empty string/null value in the samples metadata section: + +**iridanext.output.json.gz** +```json +"metadata": { + "samples": { + "SAMPLE1": { "a": "value1", "b": "" } + } +} +``` + +#### Encoding missing metadata values using CSV + +If the metadata key `b` for **SAMPLE1** is left empty in the CSV file like the below two examples: + +**output.csv** as table +| column1 | b | c | +|--|--|--| +| SAMPLE1 | | 3 | +| SAMPLE2 | 4 | 5 | +| SAMPLE3 | 6 | 7 | + +**output.csv** as CSV +``` +column1,b,c +SAMPLE1,,3 +SAMPLE2,4,5 +Sample3,6,7 +``` + +Then the value for `b` for **SAMPLE1** will be written as an empty string in the IRIDA Next JSON file: + +**iridanext.output.json.gz** +```json +"metadata": { + "samples": { + "SAMPLE1": { "b": "", "c": "3" }, + "SAMPLE2": { "b": "4", "c": "5" }, + "SAMPLE3": { "b": "6", "c": "7" } + } +} +``` + # Development +In order to build this plugin you will need a Java Development Kit (such as [OpenJDK](https://openjdk.org/)) and [Groovy](https://groovy-lang.org/index.html). For Ubuntu, this can be installed with: + +```bash +sudo apt install default-jdk groovy +``` + ## Build and install from source In order to build and install the plugin from source, please do the following: @@ -421,6 +494,20 @@ plugins { } ``` +## Run unit/integration tests + +In order to run the test cases, please clone this repository and run the following command: + +```bash +./gradlew check +``` + +To get more information for any failed tests, please run: + +```bash +./gradlew check --info +``` + # Example: nf-core/fetchngs One use case of this plugin is to structure reads and metadata downloaded from NCBI/ENA for storage in IRIDA Next by making use of the [nf-core/fetchngs][nf-core/fetchngs] pipeline. The example configuration [fetchngs.conf][] can be used for this purpose. To test, please run the following (using [ids.csv][fetchngs-ids.csv] as example data accessions): diff --git a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy index 5b79c7c..d538f88 100644 --- a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy +++ b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy @@ -5,6 +5,7 @@ import java.nio.file.FileSystems import nextflow.iridanext.MetadataParser import nextflow.iridanext.MetadataParserCSV import spock.lang.Specification +import groovy.lang.MissingPropertyException import nextflow.iridanext.TestHelper @@ -70,4 +71,36 @@ class MetadataParserCSVTest extends Specification { ] csvMapUnmatch == [:] } + + def 'Test parse CSV file with missing values' () { + when: + def csvContent = """a,b,c + |1,2, + |4,,""".stripMargin() + def csvFile = TestHelper.createInMemTempFile("temp.csv", csvContent) + def parser = new MetadataParserCSV("a", ",") + def csvMapColA = parser.parseMetadata(csvFile) + + then: + csvMapColA == [ + "1": ["b": "2", "c": ""], + "4": ["b": "", "c": ""] + ] + } + + def 'Test parse CSV file with missing ids' () { + when: + def csvContent = """a,b,c + |1,2,3 + |4,,6""".stripMargin() + def csvFile = TestHelper.createInMemTempFile("temp.csv", csvContent) + + parser = new MetadataParserCSV("b", ",") + def csvMapColB = parser.parseMetadata(csvFile) + + then: + // the column of identifiers is column "b", which has a missing value + // and so should trigger an exception + thrown(MissingPropertyException) + } } \ No newline at end of file diff --git a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserJSONTest.groovy b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserJSONTest.groovy index 5fac93f..f8cf87a 100644 --- a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserJSONTest.groovy +++ b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserJSONTest.groovy @@ -46,4 +46,22 @@ class MetadataParserJSONTest extends Specification { "2": ["coords": ["x": 0, "y": 1], "coords.x": 4] ] } + + def 'Test parse JSON file missing values' () { + when: + def jsonContent = '''{ + "1": {"b": "", "c": "3"}, + "2": {"b": "3", "c": null} + }'''.stripMargin() + + def jsonFile = TestHelper.createInMemTempFile("temp.json", jsonContent) + def parser = new MetadataParserJSON() + def outputData = parser.parseMetadata(jsonFile) + + then: + outputData == [ + "1": ["b": "", "c": "3"], + "2": ["b": "3", "c": null] + ] + } } \ No newline at end of file