From 10828a04466d9199ed4c0a8bbe795bf5503ee6ad Mon Sep 17 00:00:00 2001 From: Eric Date: Thu, 23 Jan 2025 12:59:40 -0600 Subject: [PATCH 01/16] Adding locking parameter. --- nextflow.config | 4 +--- workflows/metadatatransformation.nf | 10 ++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index bfe382b..c2ca704 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,9 +11,7 @@ params { // Input options input = null - project_name = 'assembly' - assembler = 'stub' - random_seed = 1 + transformation = null // Boilerplate options outdir = null diff --git a/workflows/metadatatransformation.nf b/workflows/metadatatransformation.nf index da14227..6eb4b4e 100644 --- a/workflows/metadatatransformation.nf +++ b/workflows/metadatatransformation.nf @@ -64,6 +64,16 @@ workflow METADATATRANSFORMATION { input.view() + if(params.transformation == 'lock') { + + } + else if (params.transformation == null) { + exit 1, "Unspecified transformation '--transformation'. Exiting now." + } + else { + exit 1, "Unrecognized transformation '--transformation ${params.transformation}'. Exiting now." + } + //CUSTOM_DUMPSOFTWAREVERSIONS ( // ch_versions.unique().collectFile(name: 'collated_versions.yml') //) From 6805867d6580bd922f1795027e0cbc83de765d5c Mon Sep 17 00:00:00 2001 From: Eric Date: Thu, 23 Jan 2025 15:05:24 -0600 Subject: [PATCH 02/16] Basic CSV writing, IRIDA plugin not working. --- assets/samplesheet.csv | 2 +- nextflow.config | 18 ++++++++++++++++++ nextflow_schema.json | 21 --------------------- workflows/metadatatransformation.nf | 6 +++--- 4 files changed, 22 insertions(+), 25 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 814a27d..594c685 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,4 +1,4 @@ sample,fastq_1,fastq_2 SAMPLE1,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R2.fastq.gz SAMPLE2,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample2_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample2_R2.fastq.gz -SAMPLE3,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R1.fastq.gz, +SAMPLE3,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R2.fastq.gz diff --git a/nextflow.config b/nextflow.config index c2ca704..56c2035 100644 --- a/nextflow.config +++ b/nextflow.config @@ -158,8 +158,26 @@ process.ext.override_configured_container_registry = true // Nextflow plugins plugins { id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-iridanext' } +iridanext { + enabled = true + output { + path = "${params.outdir}/iridanext.output.json.gz" + overwrite = true + metadata { + samples { + csv { + path = "**/lock/locked.csv" + idcol = "column1" + } + } + } + } +} + + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. diff --git a/nextflow_schema.json b/nextflow_schema.json index 6e40139..a379fbb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -29,27 +29,6 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, - "project_name": { - "type": "string", - "default": "assembly", - "pattern": "^\\S+$", - "description": "The name of the project.", - "fa_icon": "fas fa-tag" - }, - "assembler": { - "type": "string", - "default": "stub", - "fa_icon": "fas fa-desktop", - "description": "The sequence assembler to use for sequence assembly.", - "enum": ["default", "stub", "experimental"] - }, - "random_seed": { - "type": "integer", - "default": 1, - "fa_icon": "fas fa-dice-six", - "description": "The random seed to use for sequence assembly.", - "minimum": 1 - }, "email": { "type": "string", "description": "Email address for completion summary.", diff --git a/workflows/metadatatransformation.nf b/workflows/metadatatransformation.nf index 6eb4b4e..821edc3 100644 --- a/workflows/metadatatransformation.nf +++ b/workflows/metadatatransformation.nf @@ -37,6 +37,7 @@ WorkflowMetadatatransformation.initialise(params, log) IMPORT NF-CORE MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { LOCK_METADATA } from '../modules/local/lock/main' // // MODULE: Installed directly from nf-core/modules @@ -62,10 +63,9 @@ workflow METADATATRANSFORMATION { fastq_2 ? tuple(meta, [ file(fastq_1), file(fastq_2) ]) : tuple(meta, [ file(fastq_1) ])} - input.view() - + // LOCK METADATA if(params.transformation == 'lock') { - + LOCK_METADATA (input.collect{ [it] }) } else if (params.transformation == null) { exit 1, "Unspecified transformation '--transformation'. Exiting now." From e09d346e6ac5adba4bb69c87d65b4f072d8a2360 Mon Sep 17 00:00:00 2001 From: Eric Date: Thu, 23 Jan 2025 15:05:47 -0600 Subject: [PATCH 03/16] Adding lock module. --- modules/local/lock/main.nf | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 modules/local/lock/main.nf diff --git a/modules/local/lock/main.nf b/modules/local/lock/main.nf new file mode 100644 index 0000000..cf28b80 --- /dev/null +++ b/modules/local/lock/main.nf @@ -0,0 +1,24 @@ +process LOCK_METADATA { + tag "Locks metadata" + label 'process_single' + + input: + val input + + output: + path("locked.csv"), emit: locked + + exec: + task.workDir.resolve("locked.csv").withWriter { writer -> + // Header: + writer.writeLine("column1,column2,column3") + + // Contents: + input.each { + name = it[0].id + metadata = it[1] + line = ([name] + metadata).join(",") + writer.writeLine(line) + } + } +} \ No newline at end of file From 32f44d00a1b937e27fc241c06cdb7aff6333ca87 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 28 Jan 2025 15:05:36 -0600 Subject: [PATCH 04/16] Channel operator for manually adding IDs to valid IDs. --- nextflow.config | 2 +- workflows/metadatatransformation.nf | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 56c2035..3d323fa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -158,7 +158,7 @@ process.ext.override_configured_container_registry = true // Nextflow plugins plugins { id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet - id 'nf-iridanext' + id 'nf-iridanext@0.3.0' } iridanext { diff --git a/workflows/metadatatransformation.nf b/workflows/metadatatransformation.nf index 821edc3..56dc76a 100644 --- a/workflows/metadatatransformation.nf +++ b/workflows/metadatatransformation.nf @@ -5,6 +5,7 @@ */ include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' +include { parseSamplesheet } from 'plugin/nf-iridanext' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' @@ -63,6 +64,8 @@ workflow METADATATRANSFORMATION { fastq_2 ? tuple(meta, [ file(fastq_1), file(fastq_2) ]) : tuple(meta, [ file(fastq_1) ])} + input.parseSamplesheet() + // LOCK METADATA if(params.transformation == 'lock') { LOCK_METADATA (input.collect{ [it] }) From f2fd8621cd451c0381078856dc1e2b9ea49ace3a Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 29 Jan 2025 14:13:55 -0600 Subject: [PATCH 05/16] Adding metadata. --- assets/samplesheet.csv | 8 +-- assets/schema_input.json | 76 +++++++++++++++++++++-------- modules/local/lock/main.nf | 12 ++--- nextflow.config | 12 ++++- nextflow_schema.json | 52 +++++++++++++++++++- workflows/metadatatransformation.nf | 30 +++++++++--- 6 files changed, 150 insertions(+), 40 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 594c685..e398fa9 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,4 +1,4 @@ -sample,fastq_1,fastq_2 -SAMPLE1,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R2.fastq.gz -SAMPLE2,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample2_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample2_R2.fastq.gz -SAMPLE3,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/amplicon/sample1_R2.fastq.gz +sample,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 +sample2,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 +sample3,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 \ No newline at end of file diff --git a/assets/schema_input.json b/assets/schema_input.json index 80fecd8..59abfc8 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -14,25 +14,63 @@ "unique": true, "errorMessage": "Sample name must be provided and cannot contain spaces" }, - "fastq_1": { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q(\\.gz)?$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have the extension: '.fq', '.fastq', '.fq.gz' or '.fastq.gz'" - }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have the extension: '.fq', '.fastq', '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q(\\.gz)?$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "metadata_1": { + "type": "string", + "meta": ["metadata_1"], + "errorMessage": "Metadata associated with the sample (metadata_1).", + "default": "", + "pattern": "^[^\\n\\t\"]+$" + }, + "metadata_2": { + "type": "string", + "meta": ["metadata_2"], + "errorMessage": "Metadata associated with the sample (metadata_2).", + "default": "", + "pattern": "^[^\\n\\t\"]+$" + }, + "metadata_3": { + "type": "string", + "meta": ["metadata_3"], + "errorMessage": "Metadata associated with the sample (metadata_3).", + "default": "", + "pattern": "^[^\\n\\t\"]+$" + }, + "metadata_4": { + "type": "string", + "meta": ["metadata_4"], + "errorMessage": "Metadata associated with the sample (metadata_4).", + "default": "", + "pattern": "^[^\\n\\t\"]+$" + }, + "metadata_5": { + "type": "string", + "meta": ["metadata_5"], + "errorMessage": "Metadata associated with the sample (metadata_5).", + "default": "", + "pattern": "^[^\\n\\t\"]+$" + }, + "metadata_6": { + "type": "string", + "meta": ["metadata_6"], + "errorMessage": "Metadata associated with the sample (metadata_6).", + "default": "", + "pattern": "^[^\\n\\t\"]+$" + }, + "metadata_7": { + "type": "string", + "meta": ["metadata_7"], + "errorMessage": "Metadata associated with the sample (metadata_7).", + "default": "", + "pattern": "^[^\\n\\t\"]+$" + }, + "metadata_8": { + "type": "string", + "meta": ["metadata_8"], + "errorMessage": "Metadata associated with the sample (metadata_8).", + "default": "", + "pattern": "^[^\\n\\t\"]+$" } }, - "required": ["sample", "fastq_1"] + "required": ["sample"] } -} +} \ No newline at end of file diff --git a/modules/local/lock/main.nf b/modules/local/lock/main.nf index cf28b80..1356ab7 100644 --- a/modules/local/lock/main.nf +++ b/modules/local/lock/main.nf @@ -3,7 +3,8 @@ process LOCK_METADATA { label 'process_single' input: - val input + val metadata_headers + val metadata_rows output: path("locked.csv"), emit: locked @@ -11,14 +12,11 @@ process LOCK_METADATA { exec: task.workDir.resolve("locked.csv").withWriter { writer -> // Header: - writer.writeLine("column1,column2,column3") + writer.writeLine(metadata_headers.join(",")) // Contents: - input.each { - name = it[0].id - metadata = it[1] - line = ([name] + metadata).join(",") - writer.writeLine(line) + metadata_rows.each { + writer.writeLine(it.join(",")) } } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 3d323fa..8b3ca6b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,6 +43,16 @@ params { validationSchemaIgnoreParams = 'genomes,igenomes_base' validationShowHiddenParams = false validate_params = true + + // Metadata + metadata_1_header = "metadata_1" + metadata_2_header = "metadata_2" + metadata_3_header = "metadata_3" + metadata_4_header = "metadata_4" + metadata_5_header = "metadata_5" + metadata_6_header = "metadata_6" + metadata_7_header = "metadata_7" + metadata_8_header = "metadata_8" } // Load base.config by default for all pipelines @@ -170,7 +180,7 @@ iridanext { samples { csv { path = "**/lock/locked.csv" - idcol = "column1" + idcol = "sample" } } } diff --git a/nextflow_schema.json b/nextflow_schema.json index a379fbb..635aa17 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -190,6 +190,46 @@ "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } + }, + "metadata": { + "title": "Metadata", + "type": "object", + "description": "", + "default": "", + "properties": { + "metadata_1_header": { + "type": "string", + "default": "metadata_1" + }, + "metadata_2_header": { + "type": "string", + "default": "metadata_2" + }, + "metadata_3_header": { + "type": "string", + "default": "metadata_3" + }, + "metadata_4_header": { + "type": "string", + "default": "metadata_4" + }, + "metadata_5_header": { + "type": "string", + "default": "metadata_5" + }, + "metadata_6_header": { + "type": "string", + "default": "metadata_6" + }, + "metadata_7_header": { + "type": "string", + "default": "metadata_7" + }, + "metadata_8_header": { + "type": "string", + "default": "metadata_8" + } + } } }, "allOf": [ @@ -204,6 +244,16 @@ }, { "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/metadata" + } + ], + "properties": { + "transformation": { + "type": "string", + "description": "The type of transformation to perform.", + "default": "lock" } - ] + } } diff --git a/workflows/metadatatransformation.nf b/workflows/metadatatransformation.nf index 56dc76a..9f3f86d 100644 --- a/workflows/metadatatransformation.nf +++ b/workflows/metadatatransformation.nf @@ -52,23 +52,36 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft */ workflow METADATATRANSFORMATION { - + SAMPLE_HEADER = "sample" ch_versions = Channel.empty() // Create a new channel of metadata from a sample sheet // NB: `input` corresponds to `params.input` and associated sample sheet schema input = Channel.fromSamplesheet("input") - // Map the inputs so that they conform to the nf-core-expected "reads" format. - // Either [meta, [fastq_1]] or [meta, [fastq_1, fastq_2]] if fastq_2 exists - .map { meta, fastq_1, fastq_2 -> - fastq_2 ? tuple(meta, [ file(fastq_1), file(fastq_2) ]) : - tuple(meta, [ file(fastq_1) ])} - input.parseSamplesheet() + metadata_headers = Channel.of( + tuple( + SAMPLE_HEADER, + params.metadata_1_header, params.metadata_2_header, + params.metadata_3_header, params.metadata_4_header, + params.metadata_5_header, params.metadata_6_header, + params.metadata_7_header, params.metadata_8_header) + ) + + input.view() + metadata_rows = input.map{ + meta = it[0] + tuple(meta.id, + meta.metadata_1, meta.metadata_2, meta.metadata_3, meta.metadata_4, + meta.metadata_5, meta.metadata_6, meta.metadata_7, meta.metadata_8) + }.toList() + metadata_rows.view() + // LOCK METADATA + //* if(params.transformation == 'lock') { - LOCK_METADATA (input.collect{ [it] }) + LOCK_METADATA (metadata_headers, metadata_rows) } else if (params.transformation == null) { exit 1, "Unspecified transformation '--transformation'. Exiting now." @@ -76,6 +89,7 @@ workflow METADATATRANSFORMATION { else { exit 1, "Unrecognized transformation '--transformation ${params.transformation}'. Exiting now." } + //*/ //CUSTOM_DUMPSOFTWAREVERSIONS ( // ch_versions.unique().collectFile(name: 'collated_versions.yml') From 26e35e150b62dfc0d5a7c9829a88805873f06644 Mon Sep 17 00:00:00 2001 From: Eric Date: Thu, 30 Jan 2025 12:22:22 -0600 Subject: [PATCH 06/16] Adding support for sample IDs and Irida IDs. --- assets/samplesheet.csv | 8 ++++---- assets/schema_input.json | 9 +++++++-- workflows/metadatatransformation.nf | 22 ++++++++++++++++++++-- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index e398fa9..e615341 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,4 +1,4 @@ -sample,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -sample1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 -sample2,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 -sample3,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 \ No newline at end of file +sample,sample_name,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,"ABC",1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 +sample2,"DEF",2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 +sample3,"GHI",3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 \ No newline at end of file diff --git a/assets/schema_input.json b/assets/schema_input.json index 59abfc8..fa21337 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,9 +10,14 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "meta": ["id"], + "meta": ["irida_id"], "unique": true, - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces." + }, + "sample_name": { + "type": "string", + "meta": ["id"], + "errorMessage": "Sample name is optional, if provided will replace sample for filenames and outputs" }, "metadata_1": { "type": "string", diff --git a/workflows/metadatatransformation.nf b/workflows/metadatatransformation.nf index 9f3f86d..ee4c526 100644 --- a/workflows/metadatatransformation.nf +++ b/workflows/metadatatransformation.nf @@ -55,9 +55,29 @@ workflow METADATATRANSFORMATION { SAMPLE_HEADER = "sample" ch_versions = Channel.empty() + // Track processed IDs + def processedIDs = [] as Set + // Create a new channel of metadata from a sample sheet // NB: `input` corresponds to `params.input` and associated sample sheet schema input = Channel.fromSamplesheet("input") + input = input.map { + meta = it[0] + if (!meta.id) { + meta.id = meta.irida_id + } else { + // Non-alphanumeric characters (excluding _,-,.) will be replaced with "_" + meta.id = meta.id.replaceAll(/[^A-Za-z0-9_.\-]/, '_') + } + // Ensure ID is unique by appending meta.irida_id if needed + while (processedIDs.contains(meta.id)) { + meta.id = "${meta.id}_${meta.irida_id}" + } + // Add the ID to the set of processed IDs + processedIDs << meta.id + + tuple(meta) + } input.parseSamplesheet() metadata_headers = Channel.of( @@ -69,14 +89,12 @@ workflow METADATATRANSFORMATION { params.metadata_7_header, params.metadata_8_header) ) - input.view() metadata_rows = input.map{ meta = it[0] tuple(meta.id, meta.metadata_1, meta.metadata_2, meta.metadata_3, meta.metadata_4, meta.metadata_5, meta.metadata_6, meta.metadata_7, meta.metadata_8) }.toList() - metadata_rows.view() // LOCK METADATA //* From b8c31125ebc81f4da97fa40cdb3bc363a924eb82 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 4 Feb 2025 15:15:36 -0600 Subject: [PATCH 07/16] Updating to match latest plugin code, outputting both IDs. --- workflows/metadatatransformation.nf | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/workflows/metadatatransformation.nf b/workflows/metadatatransformation.nf index ee4c526..70631bb 100644 --- a/workflows/metadatatransformation.nf +++ b/workflows/metadatatransformation.nf @@ -5,7 +5,7 @@ */ include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' -include { parseSamplesheet } from 'plugin/nf-iridanext' +include { loadIridaSampleIds } from 'plugin/nf-iridanext' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' @@ -52,7 +52,8 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft */ workflow METADATATRANSFORMATION { - SAMPLE_HEADER = "sample" + SAMPLE_ID_HEADER = "sample_id" + IRIDA_ID_HEADER = "irida_id" ch_versions = Channel.empty() // Track processed IDs @@ -77,12 +78,11 @@ workflow METADATATRANSFORMATION { processedIDs << meta.id tuple(meta) - } - input.parseSamplesheet() + }.loadIridaSampleIds() metadata_headers = Channel.of( tuple( - SAMPLE_HEADER, + SAMPLE_ID_HEADER, IRIDA_ID_HEADER, params.metadata_1_header, params.metadata_2_header, params.metadata_3_header, params.metadata_4_header, params.metadata_5_header, params.metadata_6_header, @@ -91,13 +91,12 @@ workflow METADATATRANSFORMATION { metadata_rows = input.map{ meta = it[0] - tuple(meta.id, + tuple(meta.id, meta.irida_id, meta.metadata_1, meta.metadata_2, meta.metadata_3, meta.metadata_4, meta.metadata_5, meta.metadata_6, meta.metadata_7, meta.metadata_8) }.toList() // LOCK METADATA - //* if(params.transformation == 'lock') { LOCK_METADATA (metadata_headers, metadata_rows) } @@ -107,11 +106,10 @@ workflow METADATATRANSFORMATION { else { exit 1, "Unrecognized transformation '--transformation ${params.transformation}'. Exiting now." } - //*/ - //CUSTOM_DUMPSOFTWAREVERSIONS ( - // ch_versions.unique().collectFile(name: 'collated_versions.yml') - //) + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) } /* From 3a182e4c9735f204be5b9c4fa54e7d8ec74e9b6a Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 4 Feb 2025 15:23:08 -0600 Subject: [PATCH 08/16] Linting fixes. --- nextflow.config | 2 +- nextflow_schema.json | 120 +++++++++++++++++++++++++------------------ 2 files changed, 70 insertions(+), 52 deletions(-) diff --git a/nextflow.config b/nextflow.config index 8b3ca6b..cd3255d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,7 +11,7 @@ params { // Input options input = null - transformation = null + transformation = 'lock' // Boilerplate options outdir = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 635aa17..b15730a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -38,6 +38,68 @@ } } }, + "transformation_options": { + "title": "Transformation options", + "type": "object", + "description": "", + "default": "", + "properties": { + "transformation": { + "type": "string", + "description": "The type of transformation to perform.", + "default": "lock", + "enum": ["lock"] + } + } + }, + "metadata": { + "title": "Metadata", + "type": "object", + "description": "", + "default": "", + "properties": { + "metadata_1_header": { + "type": "string", + "default": "metadata_1", + "description": "The column header name for the 1st metadata column." + }, + "metadata_2_header": { + "type": "string", + "default": "metadata_2", + "description": "The column header name for the 2nd metadata column." + }, + "metadata_3_header": { + "type": "string", + "default": "metadata_3", + "description": "The column header name for the 3rd metadata column." + }, + "metadata_4_header": { + "type": "string", + "default": "metadata_4", + "description": "The column header name for the 4th metadata column." + }, + "metadata_5_header": { + "type": "string", + "default": "metadata_5", + "description": "The column header name for the 5th metadata column." + }, + "metadata_6_header": { + "type": "string", + "default": "metadata_6", + "description": "The column header name for the 6th metadata column." + }, + "metadata_7_header": { + "type": "string", + "default": "metadata_7", + "description": "The column header name for the 7th metadata column." + }, + "metadata_8_header": { + "type": "string", + "default": "metadata_8", + "description": "The column header name for the 8th metadata column." + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -190,52 +252,18 @@ "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } - }, - "metadata": { - "title": "Metadata", - "type": "object", - "description": "", - "default": "", - "properties": { - "metadata_1_header": { - "type": "string", - "default": "metadata_1" - }, - "metadata_2_header": { - "type": "string", - "default": "metadata_2" - }, - "metadata_3_header": { - "type": "string", - "default": "metadata_3" - }, - "metadata_4_header": { - "type": "string", - "default": "metadata_4" - }, - "metadata_5_header": { - "type": "string", - "default": "metadata_5" - }, - "metadata_6_header": { - "type": "string", - "default": "metadata_6" - }, - "metadata_7_header": { - "type": "string", - "default": "metadata_7" - }, - "metadata_8_header": { - "type": "string", - "default": "metadata_8" - } - } } }, "allOf": [ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/transformation_options" + }, + { + "$ref": "#/definitions/metadata" + }, { "$ref": "#/definitions/institutional_config_options" }, @@ -244,16 +272,6 @@ }, { "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/metadata" - } - ], - "properties": { - "transformation": { - "type": "string", - "description": "The type of transformation to perform.", - "default": "lock" } - } + ] } From 0e7860e67c5f7319a63115c3113c90699a124d67 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 4 Feb 2025 15:27:25 -0600 Subject: [PATCH 09/16] Missing newlines. --- assets/samplesheet.csv | 2 +- modules/local/lock/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index e615341..c34a2fc 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,4 +1,4 @@ sample,sample_name,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 sample1,"ABC",1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 sample2,"DEF",2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 -sample3,"GHI",3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 \ No newline at end of file +sample3,"GHI",3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 diff --git a/modules/local/lock/main.nf b/modules/local/lock/main.nf index 1356ab7..4a8cbc0 100644 --- a/modules/local/lock/main.nf +++ b/modules/local/lock/main.nf @@ -19,4 +19,4 @@ process LOCK_METADATA { writer.writeLine(it.join(",")) } } -} \ No newline at end of file +} From 20c1f9c584b674a2d53357cdf24abfb0e51d5d62 Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 5 Feb 2025 11:01:23 -0600 Subject: [PATCH 10/16] Setting up basic tests. --- nextflow.config | 2 +- tests/data/configs/irida_id.config | 4 + tests/data/samplesheets/basic.csv | 4 + tests/nextflow.config | 2 + tests/pipelines/integration.nf.test | 140 ++++++++++++++++++++++++++++ 5 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 tests/data/configs/irida_id.config create mode 100644 tests/data/samplesheets/basic.csv create mode 100644 tests/pipelines/integration.nf.test diff --git a/nextflow.config b/nextflow.config index cd3255d..b6b1f0f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -180,7 +180,7 @@ iridanext { samples { csv { path = "**/lock/locked.csv" - idcol = "sample" + idcol = "sample_id" } } } diff --git a/tests/data/configs/irida_id.config b/tests/data/configs/irida_id.config new file mode 100644 index 0000000..3e5d289 --- /dev/null +++ b/tests/data/configs/irida_id.config @@ -0,0 +1,4 @@ +iridanext.output.path = "${params.outdir}/iridanext.output.json" +iridanext.output.metadata.samples.csv.path = "**/lock/locked.csv" +iridanext.output.metadata.samples.csv.idcol = "irida_id" +iridanext.output.files.idkey = "irida_id" \ No newline at end of file diff --git a/tests/data/samplesheets/basic.csv b/tests/data/samplesheets/basic.csv new file mode 100644 index 0000000..c34a2fc --- /dev/null +++ b/tests/data/samplesheets/basic.csv @@ -0,0 +1,4 @@ +sample,sample_name,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,"ABC",1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 +sample2,"DEF",2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 +sample3,"GHI",3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 diff --git a/tests/nextflow.config b/tests/nextflow.config index c19b1ad..29c45b6 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -3,3 +3,5 @@ Nextflow config file for running tests ======================================================================================== */ + +iridanext.output.path = "${params.outdir}/iridanext.output.json" \ No newline at end of file diff --git a/tests/pipelines/integration.nf.test b/tests/pipelines/integration.nf.test new file mode 100644 index 0000000..bb9dead --- /dev/null +++ b/tests/pipelines/integration.nf.test @@ -0,0 +1,140 @@ +nextflow_pipeline { + + name "Metadata Transformation Testing" + script "main.nf" + + test("Basic lock - sample ID") { + tag "pipeline_basic_sample_id" + + when { + params { + input = "$baseDir/tests/data/samplesheets/basic.csv" + outdir = "results" + + transformation = "lock" + } + } + + then { + assert workflow.success + assert path("$launchDir/results").exists() + + // Check Locked Results + def locked = path("$launchDir/results/lock/locked.csv") + assert locked.exists() + + assert locked.text.contains("sample_id,irida_id,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8") + assert locked.text.contains("ABC,sample1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8") + assert locked.text.contains("DEF,sample2,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8") + assert locked.text.contains("GHI,sample3,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8") + + // Check IRIDA Next JSON Output + def iridanext_json = path("$launchDir/results/iridanext.output.json").json + def iridanext_metadata = iridanext_json.metadata.samples + + assert iridanext_metadata.size() == 3 + + assert iridanext_metadata.containsKey("ABC") + assert iridanext_metadata.ABC.irida_id == "sample1" + assert iridanext_metadata.ABC.metadata_1 == "1.1" + assert iridanext_metadata.ABC.metadata_2 == "1.2" + assert iridanext_metadata.ABC.metadata_3 == "1.3" + assert iridanext_metadata.ABC.metadata_4 == "1.4" + assert iridanext_metadata.ABC.metadata_5 == "1.5" + assert iridanext_metadata.ABC.metadata_6 == "1.6" + assert iridanext_metadata.ABC.metadata_7 == "1.7" + assert iridanext_metadata.ABC.metadata_8 == "1.8" + + assert iridanext_metadata.containsKey("DEF") + assert iridanext_metadata.DEF.irida_id == "sample2" + assert iridanext_metadata.DEF.metadata_1 == "2.1" + assert iridanext_metadata.DEF.metadata_2 == "2.2" + assert iridanext_metadata.DEF.metadata_3 == "2.3" + assert iridanext_metadata.DEF.metadata_4 == "2.4" + assert iridanext_metadata.DEF.metadata_5 == "2.5" + assert iridanext_metadata.DEF.metadata_6 == "2.6" + assert iridanext_metadata.DEF.metadata_7 == "2.7" + assert iridanext_metadata.DEF.metadata_8 == "2.8" + + assert iridanext_metadata.containsKey("GHI") + assert iridanext_metadata.GHI.irida_id == "sample3" + assert iridanext_metadata.GHI.metadata_1 == "3.1" + assert iridanext_metadata.GHI.metadata_2 == "3.2" + assert iridanext_metadata.GHI.metadata_3 == "3.3" + assert iridanext_metadata.GHI.metadata_4 == "3.4" + assert iridanext_metadata.GHI.metadata_5 == "3.5" + assert iridanext_metadata.GHI.metadata_6 == "3.6" + assert iridanext_metadata.GHI.metadata_7 == "3.7" + assert iridanext_metadata.GHI.metadata_8 == "3.8" + } + } + + test("Basic lock - IRIDA ID") { + tag "pipeline_basic_irida_id" + config "$baseDir/tests/data/configs/irida_id.config" + + when { + params { + input = "$baseDir/tests/data/samplesheets/basic.csv" + outdir = "results" + + transformation = "lock" + } + } + + then { + assert workflow.success + assert path("$launchDir/results").exists() + + // Check Locked Results + def locked = path("$launchDir/results/lock/locked.csv") + assert locked.exists() + + assert locked.text.contains("sample_id,irida_id,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8") + assert locked.text.contains("ABC,sample1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8") + assert locked.text.contains("DEF,sample2,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8") + assert locked.text.contains("GHI,sample3,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8") + + // Check IRIDA Next JSON Output + def iridanext_json = path("$launchDir/results/iridanext.output.json").json + def iridanext_metadata = iridanext_json.metadata.samples + + assert iridanext_metadata.size() == 3 + + assert iridanext_metadata.containsKey("sample1") + assert iridanext_metadata.sample1.sample_id == "ABC" + assert iridanext_metadata.sample1.metadata_1 == "1.1" + assert iridanext_metadata.sample1.metadata_2 == "1.2" + assert iridanext_metadata.sample1.metadata_3 == "1.3" + assert iridanext_metadata.sample1.metadata_4 == "1.4" + assert iridanext_metadata.sample1.metadata_5 == "1.5" + assert iridanext_metadata.sample1.metadata_6 == "1.6" + assert iridanext_metadata.sample1.metadata_7 == "1.7" + assert iridanext_metadata.sample1.metadata_8 == "1.8" + + /* + assert iridanext_metadata.containsKey("DEF") + assert iridanext_metadata.DEF.irida_id == "sample2" + assert iridanext_metadata.DEF.metadata_1 == "2.1" + assert iridanext_metadata.DEF.metadata_2 == "2.2" + assert iridanext_metadata.DEF.metadata_3 == "2.3" + assert iridanext_metadata.DEF.metadata_4 == "2.4" + assert iridanext_metadata.DEF.metadata_5 == "2.5" + assert iridanext_metadata.DEF.metadata_6 == "2.6" + assert iridanext_metadata.DEF.metadata_7 == "2.7" + assert iridanext_metadata.DEF.metadata_8 == "2.8" + + assert iridanext_metadata.containsKey("GHI") + assert iridanext_metadata.GHI.irida_id == "sample3" + assert iridanext_metadata.GHI.metadata_1 == "3.1" + assert iridanext_metadata.GHI.metadata_2 == "3.2" + assert iridanext_metadata.GHI.metadata_3 == "3.3" + assert iridanext_metadata.GHI.metadata_4 == "3.4" + assert iridanext_metadata.GHI.metadata_5 == "3.5" + assert iridanext_metadata.GHI.metadata_6 == "3.6" + assert iridanext_metadata.GHI.metadata_7 == "3.7" + assert iridanext_metadata.GHI.metadata_8 == "3.8" + */ + } + } +} From cdf4a54b80e81c1462ec3fa185a20abf314ee366 Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 5 Feb 2025 13:09:11 -0600 Subject: [PATCH 11/16] Parameter tests. --- tests/pipelines/integration.nf.test | 38 +++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/pipelines/integration.nf.test b/tests/pipelines/integration.nf.test index bb9dead..edb19f3 100644 --- a/tests/pipelines/integration.nf.test +++ b/tests/pipelines/integration.nf.test @@ -137,4 +137,42 @@ nextflow_pipeline { */ } } + + test("Invalid transformation") { + tag "pipeline_invalid_transformation" + + when { + params { + input = "$baseDir/tests/data/samplesheets/basic.csv" + outdir = "results" + + transformation = "UNKNOWN123" + } + } + + then { + assert workflow.failed + def output = workflow.stdout.join("\n") + assert output.contains("Validation of pipeline parameters failed!") + } + } + + test("Null transformation") { + tag "pipeline_null_transformation" + + when { + params { + input = "$baseDir/tests/data/samplesheets/basic.csv" + outdir = "results" + + transformation = null + } + } + + then { + assert workflow.failed + def output = workflow.stdout.join("\n") + assert output.contains("Unspecified transformation '--transformation'. Exiting now.") + } + } } From 2acfcea1061181ab3558c5f48ecd489bac1f6f6a Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 5 Feb 2025 15:06:28 -0600 Subject: [PATCH 12/16] More tests. --- tests/data/samplesheets/missing_metadata.csv | 4 ++ tests/pipelines/integration.nf.test | 66 ++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 tests/data/samplesheets/missing_metadata.csv diff --git a/tests/data/samplesheets/missing_metadata.csv b/tests/data/samplesheets/missing_metadata.csv new file mode 100644 index 0000000..652e594 --- /dev/null +++ b/tests/data/samplesheets/missing_metadata.csv @@ -0,0 +1,4 @@ +sample,sample_name,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,"ABC",,1.2,1.3,,1.5,1.6,,1.8 +sample2,,2.1,,2.3,2.4,,2.6,2.7, +sample3,"GHI",3.1,3.2,,3.4,3.5,,3.7,3.8 diff --git a/tests/pipelines/integration.nf.test b/tests/pipelines/integration.nf.test index edb19f3..7617943 100644 --- a/tests/pipelines/integration.nf.test +++ b/tests/pipelines/integration.nf.test @@ -175,4 +175,70 @@ nextflow_pipeline { assert output.contains("Unspecified transformation '--transformation'. Exiting now.") } } + + test("Basic lock - missing metadata") { + tag "pipeline_missing_metadata" + + when { + params { + input = "$baseDir/tests/data/samplesheets/missing_metadata.csv" + outdir = "results" + + transformation = "lock" + } + } + + then { + assert workflow.success + assert path("$launchDir/results").exists() + + // Check Locked Results + def locked = path("$launchDir/results/lock/locked.csv") + assert locked.exists() + + assert locked.text.contains("sample_id,irida_id,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8") + assert locked.text.contains("ABC,sample1,,1.2,1.3,,1.5,1.6,,1.8") + assert locked.text.contains("sample2,sample2,2.1,,2.3,2.4,,2.6,2.7,") + assert locked.text.contains("GHI,sample3,3.1,3.2,,3.4,3.5,,3.7,3.8") + + // Check IRIDA Next JSON Output + def iridanext_json = path("$launchDir/results/iridanext.output.json").json + def iridanext_metadata = iridanext_json.metadata.samples + + assert iridanext_metadata.size() == 3 + + assert iridanext_metadata.containsKey("ABC") + assert iridanext_metadata.ABC.irida_id == "sample1" + assert iridanext_metadata.ABC.metadata_1 == "" + assert iridanext_metadata.ABC.metadata_2 == "1.2" + assert iridanext_metadata.ABC.metadata_3 == "1.3" + assert iridanext_metadata.ABC.metadata_4 == "" + assert iridanext_metadata.ABC.metadata_5 == "1.5" + assert iridanext_metadata.ABC.metadata_6 == "1.6" + assert iridanext_metadata.ABC.metadata_7 == "" + assert iridanext_metadata.ABC.metadata_8 == "1.8" + + assert iridanext_metadata.containsKey("sample2") + assert iridanext_metadata.sample2.irida_id == "sample2" + assert iridanext_metadata.sample2.metadata_1 == "2.1" + assert iridanext_metadata.sample2.metadata_2 == "" + assert iridanext_metadata.sample2.metadata_3 == "2.3" + assert iridanext_metadata.sample2.metadata_4 == "2.4" + assert iridanext_metadata.sample2.metadata_5 == "" + assert iridanext_metadata.sample2.metadata_6 == "2.6" + assert iridanext_metadata.sample2.metadata_7 == "2.7" + assert iridanext_metadata.sample2.metadata_8 == "" + + assert iridanext_metadata.containsKey("GHI") + assert iridanext_metadata.GHI.irida_id == "sample3" + assert iridanext_metadata.GHI.metadata_1 == "3.1" + assert iridanext_metadata.GHI.metadata_2 == "3.2" + assert iridanext_metadata.GHI.metadata_3 == "" + assert iridanext_metadata.GHI.metadata_4 == "3.4" + assert iridanext_metadata.GHI.metadata_5 == "3.5" + assert iridanext_metadata.GHI.metadata_6 == "" + assert iridanext_metadata.GHI.metadata_7 == "3.7" + assert iridanext_metadata.GHI.metadata_8 == "3.8" + } + } } From da16c137e7cc5a6a0d96ddbc2012305f74bb8ce7 Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 5 Feb 2025 15:56:21 -0600 Subject: [PATCH 13/16] Updating docs. --- README.md | 93 ++++++++++++++++++++++++++++---------------------- docs/output.md | 74 ++++----------------------------------- docs/usage.md | 55 ++++++++--------------------- 3 files changed, 73 insertions(+), 149 deletions(-) diff --git a/README.md b/README.md index 16bbf29..0b56ebb 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,16 @@ [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A523.04.3-brightgreen.svg)](https://www.nextflow.io/) -# Example Pipeline for IRIDA Next +# Metadata Transformation Pipeline for IRIDA Next -This is an example pipeline to be used for integration with IRIDA Next. +This pipeline transforms metadata from IRIDA Next. # Input -The input to the pipeline is a standard sample sheet (passed as `--input samplesheet.csv`) that looks like: +The input to the pipeline is a sample sheet (passed as `--input samplesheet.csv`) that looks like: -| sample | fastq_1 | fastq_2 | -| ------- | --------------- | --------------- | -| SampleA | file_1.fastq.gz | file_2.fastq.gz | +| sample | sample_name | metadata_1 | metadata_2 | metadata_3 | metadata_4 | metadata_5 | metadata_6 | metadata_7 | metadata_8 | +| ------- | ----------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | +| Sample1 | SampleA | meta_1 | meta_2 | meta_3 | meta_4 | meta_5 | meta_6 | meta_7 | meta_8 | The structure of this file is defined in [assets/schema_input.json](assets/schema_input.json). Validation of the sample sheet is performed by [nf-validation](https://nextflow-io.github.io/nf-validation/). @@ -18,6 +18,16 @@ The structure of this file is defined in [assets/schema_input.json](assets/schem The main parameters are `--input` as defined above and `--output` for specifying the output results directory. You may wish to provide `-profile singularity` to specify the use of singularity containers and `-r [branch]` to specify which GitHub branch you would like to run. +## Transformation + +You may specify the metadata transformation with the `--transformation` parameter. For example, `--transformation lock` will perform the lock transformation. The available transformations are as follows: + +| Transformation | Explanation | +| -------------- | --------------------------------- | +| lock | Locks the metadata in IRIDA Next. | + +## Other Parameters + Other parameters (defaults from nf-core) are defined in [nextflow_schema.json](nextflow_schema.json). # Running @@ -25,7 +35,7 @@ Other parameters (defaults from nf-core) are defined in [nextflow_schema.json](n To run the pipeline, please do: ```bash -nextflow run phac-nml/metadatatransformation -profile singularity -r main -latest --input assets/samplesheet.csv --outdir results +nextflow run phac-nml/metadatatransformation -profile singularity -r main -latest --input assets/samplesheet.csv --outdir results --transformation lock ``` Where the `samplesheet.csv` is structured as specified in the [Input](#input) section. @@ -41,64 +51,65 @@ An example of the what the contents of the IRIDA Next JSON file looks like for t { "files": { "global": [ - { - "path": "summary/summary.txt.gz" - } + ], "samples": { - "SAMPLE1": [ - { - "path": "assembly/SAMPLE1.assembly.fa.gz" - } - ], - "SAMPLE2": [ - { - "path": "assembly/SAMPLE2.assembly.fa.gz" - } - ], - "SAMPLE3": [ - { - "path": "assembly/SAMPLE3.assembly.fa.gz" - } - ] + } }, "metadata": { "samples": { - "SAMPLE1": { - "reads.1": "sample1_R1.fastq.gz", - "reads.2": "sample1_R2.fastq.gz" + "ABC": { + "irida_id": "sample1", + "metadata_1": "1.1", + "metadata_2": "1.2", + "metadata_3": "1.3", + "metadata_4": "1.4", + "metadata_5": "1.5", + "metadata_6": "1.6", + "metadata_7": "1.7", + "metadata_8": "1.8" }, - "SAMPLE2": { - "reads.1": "sample2_R1.fastq.gz", - "reads.2": "sample2_R2.fastq.gz" + "DEF": { + "irida_id": "sample2", + "metadata_1": "2.1", + "metadata_2": "2.2", + "metadata_3": "2.3", + "metadata_4": "2.4", + "metadata_5": "2.5", + "metadata_6": "2.6", + "metadata_7": "2.7", + "metadata_8": "2.8" }, - "SAMPLE3": { - "reads.1": "sample1_R1.fastq.gz", - "reads.2": "null" + "GHI": { + "irida_id": "sample3", + "metadata_1": "3.1", + "metadata_2": "3.2", + "metadata_3": "3.3", + "metadata_4": "3.4", + "metadata_5": "3.5", + "metadata_6": "3.6", + "metadata_7": "3.7", + "metadata_8": "3.8" } } } } ``` -Within the `files` section of this JSON file, all of the output paths are relative to the `outdir`. Therefore, `"path": "assembly/SAMPLE1.assembly.fa.gz"` refers to a file located within `outdir/assembly/SAMPLE1.assembly.fa.gz`. - -There is also a pipeline execution summary output file provided (specified in the above JSON as `"global": [{"path":"summary/summary.txt.gz"}]`). However, there is no formatting specification for this file. - -For more information see [output doc](docs/output.md) +For more information see [output doc](docs/output.md). ## Test profile To run with the test profile, please do: ```bash -nextflow run phac-nml/metadatatransformation -profile docker,test -r main -latest --outdir results +nextflow run phac-nml/metadatatransformation -profile docker,test -r main -latest --outdir results --transformation lock ``` # Legal -Copyright 2023 Government of Canada +Copyright 2025 Government of Canada Licensed under the MIT License (the "License"); you may not use this work except in compliance with the License. You may obtain a copy of the diff --git a/docs/output.md b/docs/output.md index e617166..5fd0fc9 100644 --- a/docs/output.md +++ b/docs/output.md @@ -4,13 +4,9 @@ This document describes the output produced by the pipeline. -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. +The directories listed below may be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. The exact directories created may depend on which metadata transformation is performed. -- assembly: very small mock assembly files for each sample -- generate: intermediate files used in generating the IRIDA Next JSON output -- pipeline_info: information about the pipeline's execution -- simplify: simplified intermediate files used in generating the IRIDA Next JSON output -- summary: summary report about the pipeline's execution and results +- lock: the outputs of the metadata lock operation The IRIDA Next-compliant JSON output file will be named `iridanext.output.json.gz` and will be written to the top-level of the results directory. This file is compressed using GZIP and conforms to the [IRIDA Next JSON output specifications](https://github.com/phac-nml/pipeline-standards#42-irida-next-json). @@ -18,73 +14,15 @@ The IRIDA Next-compliant JSON output file will be named `iridanext.output.json.g The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [Assembly stub](#assembly-stub) - Performs a stub assembly by generating a mock assembly -- [Generate sample JSON](#generate-sample-json) - Generates a JSON file for each sample -- [Generate summary](#generate-summary) - Generates a summary text file describing the samples and assemblies -- [Simplify IRIDA JSON](#simplify-irida-json) - Simplifies the sample JSONs by limiting nesting depth -- [IRIDA Next Output](#irida-next-output) - Generates a JSON output file that is compliant with IRIDA Next -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [Lock](#lock) - Locks the metadata for IRIDA Next. -### Assembly stub +### Lock
Output files -- `assembly/` - - Mock assembly files: `ID.assembly.fa.gz` - -
- -### Generate sample JSON - -
-Output files - -- `generate/` - - JSON files: `ID.json.gz` - -
- -### Generate summary - -
-Output files - -- `summary/` - - Text summary describing samples and assemblies: `summary.txt.gz` - -
- -### Simplify IRIDA JSON - -
-Output files - -- `simplify/` - - Simplified JSON files: `ID.simple.json.gz` - -
- -### IRIDA Next Output - -
-Output files - -- `/` - - IRIDA Next-compliant JSON output: `iridanext.output.json.gz` - -
- -### Pipeline information - -
-Output files - -- `pipeline_info/` - - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - - Parameters used by the pipeline run: `params.json`. +- `lock/` + - A CSV-format file reporting locked files: `locked.csv`
diff --git a/docs/usage.md b/docs/usage.md index f2a60fc..880316e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -2,11 +2,11 @@ ## Introduction -This pipeline is an example that illustrates running a nf-core-compliant pipeline on IRIDA Next. +This pipeline transforms metadata from IRIDA Next. -## Samplesheet input +## Sample sheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a sample sheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 10 columns, and a header row as shown in the examples below. ```bash --input '[path to samplesheet file]' @@ -14,22 +14,22 @@ You will need to create a samplesheet with information about the samples you wou ### Full samplesheet -The input samplesheet must contain three columns: `ID`, `fastq_1`, `fastq_2`. The IDs within a samplesheet should be unique. All other columns will be ignored. +The input samplesheet must contain the following columns: `sample`, and `metadata_1` through `metadata_8`. The IDs within a samplesheet should be unique. You may optionally provide a `sample_name` column, which will replace the Irida Next IDs in the `sample` column if available. All other columns will be ignored. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. +A final samplesheet file contain the `sample_name` column may look something like the one below. ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -SAMPLE1,sample1_R1.fastq.gz,sample1_R2.fastq.gz -SAMPLE2,sample2_R1.fastq.gz,sample2_R2.fastq.gz -SAMPLE3,sample1_R1.fastq.gz, +sample,sample_name,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 +sample1,"ABC",1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 +sample2,"DEF",2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 +sample3,"GHI",3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. Samples should be unique within a samplesheet. | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------ | +| `sample` | Sample ID. Samples should be unique within a samplesheet. Likely Irida Next IDs. | +| `sample_name` | Sample name. Likely user-provided IDs that should be unique, but are not required to be unique. Will be used over `sample` when available. | +| `metadata_1..metadata_8` | Metadata that will be used in the metadata transformations. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -38,7 +38,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run main.nf --input ./samplesheet.csv --outdir ./results -profile singularity +nextflow run phac-nml/metadatatransformation -profile singularity -r main -latest --input assets/samplesheet.csv --outdir results --transformation lock ``` This will launch the pipeline with the `singularity` configuration profile. See below for more information about profiles. @@ -58,31 +58,6 @@ Pipeline settings can be provided in a `yaml` or `json` file via `-params-file < Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -### Overriding Container Registries with the `container` Directive - -The `metadatatransformation` has implemented the process `override_configured_container_registry` ([detailed here](https://github.com/phac-nml/pipeline-standards?tab=readme-ov-file#5221-example-overriding-container-registries-with-the-container-directive)) to allow `docker.io` to be used when default registry is `quay.io` to [customize the container](#custom-containers) for the [process](/modules/local/simplifyiridajson/main.nf) `SIMPLIFY_IRIDA_JSON`. The process can be changed in the [nextflow.config](/./nextflow.config#L158) - -```bash -// Override the default Docker registry when required -process.ext.override_configured_container_registry = true -``` - -The above pipeline run specified with a params file in yaml format: - -```bash -nextflow run phac-nml/metadatatransformation -profile docker -params-file params.yaml -``` - -with `params.yaml` containing: - -```yaml -input: './samplesheet.csv' -outdir: './results/' -<...> -``` - -You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). - ### Reproducibility It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. From cc134daf2ab9be305768484dbb58a89f1c23306d Mon Sep 17 00:00:00 2001 From: Eric Date: Fri, 7 Feb 2025 09:51:01 -0600 Subject: [PATCH 14/16] Changing lock/locked.csv to transformation/transformation.csv to support other transformations. --- conf/modules.config | 9 +++++++++ docs/output.md | 4 ++-- modules/local/lock/main.nf | 6 +++--- nextflow.config | 2 +- tests/data/configs/irida_id.config | 2 +- tests/pipelines/integration.nf.test | 6 +++--- 6 files changed, 19 insertions(+), 10 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b0caa7b..1c38eb8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -13,6 +13,7 @@ process { // Publish directory names + TRANSFORMATION = "transformation" publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, @@ -27,4 +28,12 @@ process { pattern: '*_versions.yml' ] } + + withName: LOCK_METADATA { + publishDir = [ + path: { ["${params.outdir}", "${task.TRANSFORMATION}"].join(File.separator) }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/docs/output.md b/docs/output.md index 5fd0fc9..bcdf157 100644 --- a/docs/output.md +++ b/docs/output.md @@ -21,8 +21,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
Output files -- `lock/` - - A CSV-format file reporting locked files: `locked.csv` +- `transformation/` + - A CSV-format file reporting locked files: `transformation.csv`
diff --git a/modules/local/lock/main.nf b/modules/local/lock/main.nf index 4a8cbc0..a0f7b97 100644 --- a/modules/local/lock/main.nf +++ b/modules/local/lock/main.nf @@ -1,5 +1,5 @@ process LOCK_METADATA { - tag "Locks metadata" + tag "Lock metadata" label 'process_single' input: @@ -7,10 +7,10 @@ process LOCK_METADATA { val metadata_rows output: - path("locked.csv"), emit: locked + path("transformation.csv"), emit: locked exec: - task.workDir.resolve("locked.csv").withWriter { writer -> + task.workDir.resolve("transformation.csv").withWriter { writer -> // Header: writer.writeLine(metadata_headers.join(",")) diff --git a/nextflow.config b/nextflow.config index b6b1f0f..77a73c9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -179,7 +179,7 @@ iridanext { metadata { samples { csv { - path = "**/lock/locked.csv" + path = "**/transformation/transformation.csv" idcol = "sample_id" } } diff --git a/tests/data/configs/irida_id.config b/tests/data/configs/irida_id.config index 3e5d289..08eee32 100644 --- a/tests/data/configs/irida_id.config +++ b/tests/data/configs/irida_id.config @@ -1,4 +1,4 @@ iridanext.output.path = "${params.outdir}/iridanext.output.json" -iridanext.output.metadata.samples.csv.path = "**/lock/locked.csv" +iridanext.output.metadata.samples.csv.path = "**/transformation/transformation.csv" iridanext.output.metadata.samples.csv.idcol = "irida_id" iridanext.output.files.idkey = "irida_id" \ No newline at end of file diff --git a/tests/pipelines/integration.nf.test b/tests/pipelines/integration.nf.test index 7617943..0255c8f 100644 --- a/tests/pipelines/integration.nf.test +++ b/tests/pipelines/integration.nf.test @@ -20,7 +20,7 @@ nextflow_pipeline { assert path("$launchDir/results").exists() // Check Locked Results - def locked = path("$launchDir/results/lock/locked.csv") + def locked = path("$launchDir/results/transformation/transformation.csv") assert locked.exists() assert locked.text.contains("sample_id,irida_id,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8") @@ -87,7 +87,7 @@ nextflow_pipeline { assert path("$launchDir/results").exists() // Check Locked Results - def locked = path("$launchDir/results/lock/locked.csv") + def locked = path("$launchDir/results/transformation/transformation.csv") assert locked.exists() assert locked.text.contains("sample_id,irida_id,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8") @@ -193,7 +193,7 @@ nextflow_pipeline { assert path("$launchDir/results").exists() // Check Locked Results - def locked = path("$launchDir/results/lock/locked.csv") + def locked = path("$launchDir/results/transformation/transformation.csv") assert locked.exists() assert locked.text.contains("sample_id,irida_id,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8") From 50aa0cf39600916c47f809d7f722bd93ede3acd8 Mon Sep 17 00:00:00 2001 From: Eric Date: Fri, 7 Feb 2025 09:54:28 -0600 Subject: [PATCH 15/16] Prettier --- CHANGELOG.md | 1 - README.md | 4 ++-- assets/schema_input.json | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad5a0d9..321f2f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,4 +18,3 @@ Initial release of phac-nml/metadatatransformation, created with the [nf-core](h [Overriding container registries with the container directive]: https://github.com/phac-nml/pipeline-standards?tab=readme-ov-file#521-module-software-requirements [phac-nml pipeline standards software requirements]: https://github.com/phac-nml/pipeline-standards?tab=readme-ov-file#521-module-software-requirements [1.0.0]: https://github.com/phac-nml/metadatatransformation/releases/tag/1.0.0 - diff --git a/README.md b/README.md index 0b56ebb..ea7d3ce 100644 --- a/README.md +++ b/README.md @@ -51,10 +51,10 @@ An example of the what the contents of the IRIDA Next JSON file looks like for t { "files": { "global": [ - + ], "samples": { - + } }, "metadata": { diff --git a/assets/schema_input.json b/assets/schema_input.json index fa21337..ffcd49f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -78,4 +78,4 @@ }, "required": ["sample"] } -} \ No newline at end of file +} From e0accb3e05822018eb3b78ad6c148032b9d99ad1 Mon Sep 17 00:00:00 2001 From: Eric Date: Fri, 7 Feb 2025 09:55:47 -0600 Subject: [PATCH 16/16] Formatting --- modules/local/lock/main.nf | 2 +- tests/data/configs/irida_id.config | 2 +- tests/nextflow.config | 2 +- tests/pipelines/integration.nf.test | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/lock/main.nf b/modules/local/lock/main.nf index a0f7b97..92c2ce0 100644 --- a/modules/local/lock/main.nf +++ b/modules/local/lock/main.nf @@ -5,7 +5,7 @@ process LOCK_METADATA { input: val metadata_headers val metadata_rows - + output: path("transformation.csv"), emit: locked diff --git a/tests/data/configs/irida_id.config b/tests/data/configs/irida_id.config index 08eee32..53541da 100644 --- a/tests/data/configs/irida_id.config +++ b/tests/data/configs/irida_id.config @@ -1,4 +1,4 @@ iridanext.output.path = "${params.outdir}/iridanext.output.json" iridanext.output.metadata.samples.csv.path = "**/transformation/transformation.csv" iridanext.output.metadata.samples.csv.idcol = "irida_id" -iridanext.output.files.idkey = "irida_id" \ No newline at end of file +iridanext.output.files.idkey = "irida_id" diff --git a/tests/nextflow.config b/tests/nextflow.config index 29c45b6..e803b4f 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -4,4 +4,4 @@ ======================================================================================== */ -iridanext.output.path = "${params.outdir}/iridanext.output.json" \ No newline at end of file +iridanext.output.path = "${params.outdir}/iridanext.output.json" diff --git a/tests/pipelines/integration.nf.test b/tests/pipelines/integration.nf.test index 0255c8f..6366f67 100644 --- a/tests/pipelines/integration.nf.test +++ b/tests/pipelines/integration.nf.test @@ -32,7 +32,7 @@ nextflow_pipeline { def iridanext_json = path("$launchDir/results/iridanext.output.json").json def iridanext_metadata = iridanext_json.metadata.samples - assert iridanext_metadata.size() == 3 + assert iridanext_metadata.size() == 3 assert iridanext_metadata.containsKey("ABC") assert iridanext_metadata.ABC.irida_id == "sample1" @@ -99,7 +99,7 @@ nextflow_pipeline { def iridanext_json = path("$launchDir/results/iridanext.output.json").json def iridanext_metadata = iridanext_json.metadata.samples - assert iridanext_metadata.size() == 3 + assert iridanext_metadata.size() == 3 assert iridanext_metadata.containsKey("sample1") assert iridanext_metadata.sample1.sample_id == "ABC" @@ -205,7 +205,7 @@ nextflow_pipeline { def iridanext_json = path("$launchDir/results/iridanext.output.json").json def iridanext_metadata = iridanext_json.metadata.samples - assert iridanext_metadata.size() == 3 + assert iridanext_metadata.size() == 3 assert iridanext_metadata.containsKey("ABC") assert iridanext_metadata.ABC.irida_id == "sample1"