From 3ef255b55d7c79a43fc8c10d5fcb22fb6c2c20c6 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 2 Apr 2024 16:09:17 -0500 Subject: [PATCH 01/16] Created initial workflow Implemented the initial workflow, it is likely that many changes will need to be made. However I am focusing on implementing working unit-tests for the time being --- tests/data/called/expected_results.txt | 5 +++++ tests/data/distances/expected_pairwise_dists.txt | 4 ++++ 2 files changed, 9 insertions(+) create mode 100644 tests/data/called/expected_results.txt create mode 100644 tests/data/distances/expected_pairwise_dists.txt diff --git a/tests/data/called/expected_results.txt b/tests/data/called/expected_results.txt new file mode 100644 index 0000000..8a3eec9 --- /dev/null +++ b/tests/data/called/expected_results.txt @@ -0,0 +1,5 @@ +id address level_1 level_2 level_3 +sample1 1.1.1 1 1 1 +sample2 1.1.1 1 1 1 +sample3 2.2.2 2 2 2 +sampleQ 1.1.1 1 1 1 diff --git a/tests/data/distances/expected_pairwise_dists.txt b/tests/data/distances/expected_pairwise_dists.txt new file mode 100644 index 0000000..df58510 --- /dev/null +++ b/tests/data/distances/expected_pairwise_dists.txt @@ -0,0 +1,4 @@ +query_id ref_id dist +sampleQ sample1 0.0 +sampleQ sample2 33.333333333333336 +sampleQ sample3 66.66666666666667 From 40e41c87896013c10de4350cea19ab27b297e539 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 2 Apr 2024 16:23:47 -0500 Subject: [PATCH 02/16] added initial unit tests I added some unit tests to the pipeline however these tests are incomplete, and show some current work that needs to be done in terms of altering the output paths as they are quite messy currently. Additionally more tests with the existing data can be added as it woul really improve the robustness of the pipeline --- tests/nextflow.config | 13 ++++++++++++ tests/pipelines/main.nf.test | 40 ++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 tests/pipelines/main.nf.test diff --git a/tests/nextflow.config b/tests/nextflow.config index c19b1ad..ff66ccb 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -3,3 +3,16 @@ Nextflow config file for running tests ======================================================================================== */ + + +params.max_memory = "2.GB" +params.max_cpus = 1 +params.ref_clusters = "$baseDir/tests/data/expected_clusters.txt" + + +/* This is required to run in WSL/Ubuntu using singularity +Without this, profile_dists was not successfully completing +due to issues with multiprocessing in the container. A similar +error is found at https://github.com/marcelm/cutadapt/issues/583 +*/ +singularity.runOptions = "--contain" diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test new file mode 100644 index 0000000..7370ad0 --- /dev/null +++ b/tests/pipelines/main.nf.test @@ -0,0 +1,40 @@ +nextflow_pipeline { + + name "Integration test of nomenclature assignment pipeline" + script "main.nf" + + test("Small-scale test of full pipeline"){ + tag "pipeline" + + when{ + params { + input = "$baseDir/tests/data/samplesheets/samplesheet1.csv" + outdir = "results" + } + } + + then { + assert workflow.success + assert path("$launchDir/results").exists() + + // Check merged profiles + // TODO check query profile is merged + def actual_profile_ref = path("$launchDir/results/locidex/merged/query/merged_value/merged_profiles_value.tsv") + def expected_profile_tsv = path("$baseDir/tests/data/profiles/expected-profile1.tsv") + assert actual_profile_ref.text == expected_profile_tsv.text + + + // Check computed pairwise distances + def actual_distances = path("$launchDir/results/distances/results.text") + def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_dists.tsv") + assert actual_distances.text == expected_distances.text + + // Check called clusters + def actual_calls = path("$launchDir/results/call/Called/results.text") + def expected_calls = path("$baseDir/tests/data/called/expected_results.txt") + assert actual_calls.text == expected_calls.text + } + } + + +} From f4d2887fb633be0b1bf6857d72db0cc294193e1e Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 2 Apr 2024 16:27:17 -0500 Subject: [PATCH 03/16] removed module tests --- conf/modules.config | 49 ++++++++++++- modules/local/gas/call/main.nf | 13 ++-- modules/local/locidex/merge/main.nf | 8 ++- modules/local/profile_dists/main.nf | 23 ++++--- nextflow.config | 17 +++++ tests/modules/local/assemblystub/main.nf.test | 38 ---------- .../local/generatesamplejson/main.nf.test | 40 ----------- .../local/generatesummary/main.nf.test | 37 ---------- .../local/iridanextoutput/main.nf.test | 51 -------------- .../local/simplifyiridajson/main.nf.test | 41 ----------- workflows/gas_nomenclature.nf | 69 ++++++++++++++++--- 11 files changed, 148 insertions(+), 238 deletions(-) delete mode 100644 tests/modules/local/assemblystub/main.nf.test delete mode 100644 tests/modules/local/generatesamplejson/main.nf.test delete mode 100644 tests/modules/local/generatesummary/main.nf.test delete mode 100644 tests/modules/local/iridanextoutput/main.nf.test delete mode 100644 tests/modules/local/simplifyiridajson/main.nf.test diff --git a/conf/modules.config b/conf/modules.config index 08fc284..86c0455 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,8 +15,11 @@ process { // Publish directory names assembly_directory_name = "assembly" summary_directory_name = "summary" + profile_dists_directory_name = "distances" + gas_call_directory_name = "call" - locidex_merge_directory_name = [params.outdir , "locidex", "merge"].join(File.separator) + locidex_merge_ref_directory_name = [params.outdir , "locidex", "merge", "reference"].join(File.separator) + locidex_merge_query_directory_name = [params.outdir , "locidex", "merge", "query"].join(File.separator) publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, @@ -56,15 +59,55 @@ process { ] } - withName: LOCIDEX_MERGE { + withName: LOCIDEX_MERGE_REF { publishDir = [ - path: locidex_merge_directory_name, + path: locidex_merge_ref_directory_name, mode: params.publish_dir_mode, pattern: "*/*", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: LOCIDEX_MERGE_QUERY { + publishDir = [ + path: locidex_merge_query_directory_name, + mode: params.publish_dir_mode, + pattern: "*/*", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + + withName: PROFILE_DISTS { + publishDir = [ + path: { ["${params.outdir}", "${task.profile_dists_directory_name}"].join(File.separator) }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : + filename.contains(File.separator) ? filename.split(File.separator)[-1] : filename } + ] + } + + withName: GAS_CALL { + publishDir = [ + [ + path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) }, + mode: params.publish_dir_mode, + pattern: "*/thresholds.json" + ], + [ + path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) }, + mode: params.publish_dir_mode, + pattern: "*/results.{text,parquet}" + ], + [ + path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) }, + mode: params.publish_dir_mode, + pattern: "*/run.json" + ] + ] + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/modules/local/gas/call/main.nf b/modules/local/gas/call/main.nf index 1fe2c64..33db7a7 100644 --- a/modules/local/gas/call/main.nf +++ b/modules/local/gas/call/main.nf @@ -2,7 +2,7 @@ process GAS_CALL{ label "process_high" - tag "Calling: ${meta.id}" + tag "Assigning Nomenclature" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' : @@ -10,17 +10,18 @@ process GAS_CALL{ input: - tuple val(meta), path(reference_clusters), path(distances) + path(reference_clusters) + path(distances) output: - tuple val(meta), path("${prefix}/results.{text,parquet}"), emit: distances, optional: true - tuple val(meta), path("${prefix}/thresholds.json"), emit: thresholds - tuple val(meta), path("${prefix}/run.json"), emit: run + path("${prefix}/results.{text,parquet}"), emit: distances, optional: true + path("${prefix}/thresholds.json"), emit: thresholds + path("${prefix}/run.json"), emit: run path "versions.yml", emit: versions script: // Need to add more args for gas call below - prefix = meta.id + prefix = "Called" """ gas call --dists $distances \\ --rclusters $reference_clusters \\ diff --git a/modules/local/locidex/merge/main.nf b/modules/local/locidex/merge/main.nf index bd9f3e8..b58b154 100644 --- a/modules/local/locidex/merge/main.nf +++ b/modules/local/locidex/merge/main.nf @@ -9,17 +9,19 @@ process LOCIDEX_MERGE { 'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }" input: - val input_values // [file(sample1), file(sample2), file(sample3), etc...] + path input_values // [file(sample1), file(sample2), file(sample3), etc...] + val input_tag // makes output unique and denotes the item as the reference or query to preven name collision output: path("${combined_dir}/*.tsv"), emit: combined_profiles - path("${combined_dir}/*.json"), emit: report path "versions.yml", emit: versions script: - combined_dir = "merged" + combined_dir = "merged_${input_tag}" """ locidex merge -i ${input_values.join(' ')} -o ${combined_dir} + + mv ${combined_dir}/*.tsv ${combined_dir}/merged_profiles_${input_tag}.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": locidex merge: \$(echo \$(locidex search -V 2>&1) | sed 's/^.*locidex //' ) diff --git a/modules/local/profile_dists/main.nf b/modules/local/profile_dists/main.nf index 2e48a02..b7a0933 100644 --- a/modules/local/profile_dists/main.nf +++ b/modules/local/profile_dists/main.nf @@ -1,24 +1,25 @@ process PROFILE_DISTS{ label "process_high" - tag "Pairwise Distance Generation: ${meta.id}" + tag "Gathering Distances Between Reference and Query Profiles" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/profile_dists%3A1.0.0--pyh7cba7a3_0' : 'quay.io/biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }" input: - tuple val(meta), path(query), path(ref) + path query + path ref val mapping_format - path(mapping_file) - path(columns) + path mapping_file + path columns output: - tuple val(meta), path("${prefix}_${mapping_format}/allele_map.json"), emit: allele_map - tuple val(meta), path("${prefix}_${mapping_format}/query_profile.{text,parquet}"), emit: query_profile - tuple val(meta), path("${prefix}_${mapping_format}/ref_profile.{text,parquet}"), emit: ref_profile - tuple val(meta), path("${prefix}_${mapping_format}/results.{text,parquet}"), emit: results - tuple val(meta), path("${prefix}_${mapping_format}/run.json"), emit: run + path("${prefix}/allele_map.json"), emit: allele_map + path("${prefix}/query_profile.{text,parquet}"), emit: query_profile + path("${prefix}/ref_profile.{text,parquet}"), emit: ref_profile + path("${prefix}/results.{text,parquet}"), emit: results + path("${prefix}/run.json"), emit: run path "versions.yml", emit: versions @@ -41,7 +42,7 @@ process PROFILE_DISTS{ args = args + " --count_missing" } // --match_threshold $params.profile_dists.match_thresh \\ - prefix = meta.id + prefix = "distances_${mapping_format}" """ profile_dists --query $query --ref $ref $args --outfmt $mapping_format \\ --distm $params.pd_distm \\ @@ -50,7 +51,7 @@ process PROFILE_DISTS{ --sample_qual_thresh $params.pd_sample_quality_threshold \\ --max_mem ${task.memory.toGiga()} \\ --cpus ${task.cpus} \\ - -o ${prefix}_${mapping_format} + -o ${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index bddc99e..89da1c6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -47,6 +47,23 @@ params { validate_params = true // Profile Dists + pd_outfmt = "pairwise" + pd_distm = "scaled" + pd_missing_threshold = 1.0 + pd_sample_quality_threshold = 1.0 + pd_match_threshold = -1.0 + pd_file_type = "text" + pd_mapping_file = null // default is no file + pd_force = false + pd_skip = false + pd_columns = null + pd_count_missing = true + + + // GAS Call + gm_thresholds = "10,5,0" + gm_delimiter = "'.'" // note the single quotes surrounding the delimiter + ref_clusters = "" } diff --git a/tests/modules/local/assemblystub/main.nf.test b/tests/modules/local/assemblystub/main.nf.test deleted file mode 100644 index 881bf56..0000000 --- a/tests/modules/local/assemblystub/main.nf.test +++ /dev/null @@ -1,38 +0,0 @@ -nextflow_process { - - name "Test Process ASSEMBLY_STUB" - script "modules/local/assemblystub/main.nf" - process "ASSEMBLY_STUB" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = new Tuple(["id": "SAMPLE1"], [file("sample1_R1.fastq.gz"), file("sample1_R2.fastq.gz")]) - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert assembly.size() == 1 - - // parse assembly file - def assembly_header = path(assembly.get(0)[1]).linesGzip[0] - def assembly_body = path(assembly.get(0)[1]).linesGzip[1] - - assert assembly_header.equals(">SAMPLE1-stub-assembly") - assert assembly_body.equals("ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTTAAAAACCCCCGGGGGTTTTT") - } - } - - } - -} diff --git a/tests/modules/local/generatesamplejson/main.nf.test b/tests/modules/local/generatesamplejson/main.nf.test deleted file mode 100644 index ac071a3..0000000 --- a/tests/modules/local/generatesamplejson/main.nf.test +++ /dev/null @@ -1,40 +0,0 @@ -nextflow_process { - - name "Test Process GENERATE_SAMPLE_JSON" - script "modules/local/generatesamplejson/main.nf" - process "GENERATE_SAMPLE_JSON" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = new Tuple(["id": "SAMPLE1"], [file("sample1_R1.fastq.gz"), file("sample1_R2.fastq.gz")], file("SAMPLE1.assembly.fa.gz")) - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert json.size() == 1 - - // parse output json file - def sample_json_string = path(json.get(0)[1]).linesGzip.join("\n") - def parser = new groovy.json.JsonSlurper() - def sample_json = parser.parseText(sample_json_string) - - assert sample_json.files.samples.SAMPLE1[0].path.equals("assembly/SAMPLE1.assembly.fa.gz") - assert sample_json.metadata.samples.SAMPLE1.reads[0].equals("sample1_R1.fastq.gz") - assert sample_json.metadata.samples.SAMPLE1.reads[1].equals("sample1_R2.fastq.gz") - } - } - - } - -} diff --git a/tests/modules/local/generatesummary/main.nf.test b/tests/modules/local/generatesummary/main.nf.test deleted file mode 100644 index b2eb189..0000000 --- a/tests/modules/local/generatesummary/main.nf.test +++ /dev/null @@ -1,37 +0,0 @@ -nextflow_process { - - name "Test Process GENERATE_SUMMARY" - script "modules/local/generatesummary/main.nf" - process "GENERATE_SUMMARY" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = [new Tuple(["id": "SAMPLE1"], [file("sample1_R1.fastq.gz"), file("sample1_R2.fastq.gz")], file("SAMPLE1.assembly.fa.gz"))] - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert summary.size() == 1 - - assert path(summary.get(0)).linesGzip[0].equals("IRIDANEXTEXAMPLE Pipeline Summary") - assert path(summary.get(0)).linesGzip[4].equals("SAMPLE1:") - assert path(summary.get(0)).linesGzip[5].contains("reads.1: ") - assert path(summary.get(0)).linesGzip[6].contains("reads.2: ") - assert path(summary.get(0)).linesGzip[7].contains("assembly: ") - } - } - - } - -} diff --git a/tests/modules/local/iridanextoutput/main.nf.test b/tests/modules/local/iridanextoutput/main.nf.test deleted file mode 100644 index 72808ab..0000000 --- a/tests/modules/local/iridanextoutput/main.nf.test +++ /dev/null @@ -1,51 +0,0 @@ -nextflow_process { - - name "Test Process IRIDA_NEXT_OUTPUT" - script "modules/local/iridanextoutput/main.nf" - process "IRIDA_NEXT_OUTPUT" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = [file("$baseDir/tests/data/SAMPLE1.simple.json.gz"), file("$baseDir/tests/data/SAMPLE2.simple.json.gz"), file("$baseDir/tests/data/SAMPLE3.simple.json.gz")] - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert output_json.size() == 1 - - // parse output json file - def json_string = path(output_json.get(0)).linesGzip.join("\n") - def parser = new groovy.json.JsonSlurper() - def irida_json = parser.parseText(json_string) - - assert irida_json.files.global[0].path.equals("summary/summary.txt.gz") - - assert irida_json.files.samples.SAMPLE1[0].path.equals("assembly/SAMPLE1.assembly.fa.gz") - assert irida_json.files.samples.SAMPLE2[0].path.equals("assembly/SAMPLE2.assembly.fa.gz") - assert irida_json.files.samples.SAMPLE3[0].path.equals("assembly/SAMPLE3.assembly.fa.gz") - - assert irida_json.metadata.samples.SAMPLE1.'reads.1'.equals("sample1_R1.fastq.gz") - assert irida_json.metadata.samples.SAMPLE1.'reads.2'.equals("sample1_R2.fastq.gz") - - assert irida_json.metadata.samples.SAMPLE2.'reads.1'.equals("sample2_R1.fastq.gz") - assert irida_json.metadata.samples.SAMPLE2.'reads.2'.equals("sample2_R2.fastq.gz") - - assert irida_json.metadata.samples.SAMPLE3.'reads.1'.equals("sample1_R1.fastq.gz") - assert irida_json.metadata.samples.SAMPLE3.'reads.2'.equals("null") - } - } - - } - -} diff --git a/tests/modules/local/simplifyiridajson/main.nf.test b/tests/modules/local/simplifyiridajson/main.nf.test deleted file mode 100644 index 7d61567..0000000 --- a/tests/modules/local/simplifyiridajson/main.nf.test +++ /dev/null @@ -1,41 +0,0 @@ -nextflow_process { - - name "Test Process SIMPLIFY_IRIDA_JSON" - script "modules/local/simplifyiridajson/main.nf" - process "SIMPLIFY_IRIDA_JSON" - - test("Basic execution, check output.") { - - when { - params { - outdir = "tests/results" - } - process { - """ - input[0] = new Tuple(["id": "SAMPLE1"], file("$baseDir/tests/data/SAMPLE1.json.gz")) - """ - } - } - - then { - assert process.success - - with(process.out) { - // check if emitted output has been created - assert simple_json.size() == 1 - - // parse output json file - def json_string = path(simple_json.get(0)[1]).linesGzip.join("\n") - def parser = new groovy.json.JsonSlurper() - def json_simple = parser.parseText(json_string) - - assert json_simple.files.samples.SAMPLE1[0].path.equals("assembly/SAMPLE1.assembly.fa.gz") - - assert json_simple.metadata.samples.SAMPLE1.'reads.1'.equals("sample1_R1.fastq.gz") - assert json_simple.metadata.samples.SAMPLE1.'reads.2'.equals("sample1_R2.fastq.gz") - } - } - - } - -} diff --git a/workflows/gas_nomenclature.nf b/workflows/gas_nomenclature.nf index 453a922..f9a6572 100644 --- a/workflows/gas_nomenclature.nf +++ b/workflows/gas_nomenclature.nf @@ -48,6 +48,22 @@ include { PROFILE_DISTS } from "../modules/local/profile_dists/main" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +def prepareFilePath(String filep){ + // Rerturns null if a file is not valid + def return_path = null + if(filep){ + file_in = path(filep) + if(file_in.exists()){ + return_path = file_in + } + }else{ + return_path = [] + } + + return return_path // empty value if file argument is null +} + workflow GAS_NOMENCLATURE { ch_versions = Channel.empty() @@ -62,11 +78,47 @@ workflow GAS_NOMENCLATURE { } reference_values = profiles.ref.collect{ meta, profile -> profile} - query_values = profile.query.collect{ meta, profile -> proifile } - reference_values.view() - query_values.view() - //LOCIDEX_MERGE_REF(reference_values) - //LOCIDEX_MERGE_QUERY(query_values) + query_values = profiles.query.collect{ meta, profile -> profile } + + // LOCIDEX modules + ref_tag = Channel.value("ref") + query_tag = Channel.value("value") + merged_references = LOCIDEX_MERGE_REF(reference_values, ref_tag) + ch_versions = ch_versions.mix(merged_references.versions) + + merged_queries = LOCIDEX_MERGE_QUERY(query_values, query_tag) + ch_versions = ch_versions.mix(merged_queries.versions) + + + // PROFILE DISTS processes + + mapping_file = prepareFilePath(params.pd_mapping_file) + if(mapping_file == null){ + exit 1, "${params.pd_mapping_file}: Does not exist but was passed to the pipeline. Exiting now." + } + + + columns_file = prepareFilePath(params.pd_columns) + if(columns_file == null){ + exit 1, "${params.pd_columns}: Does not exist but was passed to the pipeline. Exiting now." + } + + mapping_format = Channel.value(params.pd_outfmt) + + distances = PROFILE_DISTS(merged_queries.combined_profiles, + merged_references.combined_profiles, + mapping_format, + mapping_file, + columns_file) + + ch_versions = ch_versions.mix(distances.versions) + + // GAS CALL + + clusters = Channel.fromPath(params.ref_clusters, checkIfExists: true) + called_data = GAS_CALL(clusters, distances.results) + + ch_versions = ch_versions.mix(called_data.versions) // A channel of tuples of ({meta}, [read[0], read[1]], assembly) @@ -93,9 +145,10 @@ workflow GAS_NOMENCLATURE { //) //ch_versions = ch_versions.mix(IRIDA_NEXT_OUTPUT.out.versions) - //CUSTOM_DUMPSOFTWAREVERSIONS ( - // ch_versions.unique().collectFile(name: 'collated_versions.yml') - //) + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + } From bebcbb33736f42fb1768349ac6b6e89803695bce Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 2 Apr 2024 16:29:24 -0500 Subject: [PATCH 04/16] updated expected clusters path --- tests/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nextflow.config b/tests/nextflow.config index ff66ccb..672dc69 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -7,7 +7,7 @@ params.max_memory = "2.GB" params.max_cpus = 1 -params.ref_clusters = "$baseDir/tests/data/expected_clusters.txt" +params.ref_clusters = "$baseDir/tests/data/clusters/expected_clusters.txt" /* This is required to run in WSL/Ubuntu using singularity From 2b8de972322d37c2a13e0ce7dee34c5e0bad89fc Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 2 Apr 2024 16:56:19 -0500 Subject: [PATCH 05/16] updated test configs --- conf/test.config | 10 +++++++++- conf/test_full.config | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index 0e0b591..b0061d6 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,5 +20,13 @@ params { max_time = '1.h' // Input data - input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/assets/samplesheet.csv' + input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/samplesheets/samplesheet1.csv' } + + +/* This is required to run in WSL/Ubuntu using singularity +Without this, profile_dists was not successfully completing +due to issues with multiprocessing in the container. A similar +error is found at https://github.com/marcelm/cutadapt/issues/583 +*/ +singularity.runOptions = "--contain" diff --git a/conf/test_full.config b/conf/test_full.config index c8b5764..0981a15 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,5 +15,13 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/assets/samplesheet.csv' + input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/samplesheets/samplesheet1.csv' } + +/* This is required to run in WSL/Ubuntu using singularity +Without this, profile_dists was not successfully completing +due to issues with multiprocessing in the container. A similar +error is found at https://github.com/marcelm/cutadapt/issues/583 +*/ +singularity.runOptions = "--contain" + From e8a73736cfd89dcf1d34f0e752f2ae2dff290b39 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 09:10:02 -0500 Subject: [PATCH 06/16] updated nftest.config Its likely that the binaries for the individual tools were not being found as the nf-test.config file was not configured to use docker. --- nf-test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf-test.config b/nf-test.config index 870799d..2fa82ad 100644 --- a/nf-test.config +++ b/nf-test.config @@ -3,6 +3,6 @@ config { testsDir "tests" workDir ".nf-test" configFile "tests/nextflow.config" - profile "" + profile "docker" } From b64bb3d1ae461cd4a426a761454aa9679e836afd Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 09:14:22 -0500 Subject: [PATCH 07/16] updated test outpu path --- tests/pipelines/main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test index 7370ad0..059be1f 100644 --- a/tests/pipelines/main.nf.test +++ b/tests/pipelines/main.nf.test @@ -19,7 +19,7 @@ nextflow_pipeline { // Check merged profiles // TODO check query profile is merged - def actual_profile_ref = path("$launchDir/results/locidex/merged/query/merged_value/merged_profiles_value.tsv") + def actual_profile_ref = path("$launchDir/results/locidex/merge/query/merged_value/merged_profiles_value.tsv") def expected_profile_tsv = path("$baseDir/tests/data/profiles/expected-profile1.tsv") assert actual_profile_ref.text == expected_profile_tsv.text From f1756e9d6b824d38f2d3ff97b75227d7d7c698a9 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 09:19:25 -0500 Subject: [PATCH 08/16] updated test output paths --- tests/pipelines/main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test index 059be1f..de784af 100644 --- a/tests/pipelines/main.nf.test +++ b/tests/pipelines/main.nf.test @@ -19,7 +19,7 @@ nextflow_pipeline { // Check merged profiles // TODO check query profile is merged - def actual_profile_ref = path("$launchDir/results/locidex/merge/query/merged_value/merged_profiles_value.tsv") + def actual_profile_ref = path("$launchDir/results/locidex/merge/reference/merged_ref/merged_profiles_ref.tsv") def expected_profile_tsv = path("$baseDir/tests/data/profiles/expected-profile1.tsv") assert actual_profile_ref.text == expected_profile_tsv.text From 7c1e516029aedfab36c2335bde80b0e3a118e6df Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 09:22:58 -0500 Subject: [PATCH 09/16] fixed test data line endings --- tests/data/profiles/expected-profile1.tsv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/data/profiles/expected-profile1.tsv b/tests/data/profiles/expected-profile1.tsv index 233f6e4..9b938e1 100644 --- a/tests/data/profiles/expected-profile1.tsv +++ b/tests/data/profiles/expected-profile1.tsv @@ -1,4 +1,4 @@ -sample_id l1 l2 l3 -sample1 1 1 1 -sample2 1 1 1 -sample3 1 1 2 +sample_id l1 l2 l3 +sample1 1 1 1 +sample2 1 1 1 +sample3 1 1 2 From bdfe6c3c9ee6d38e1c0de6074c01168b9c2642ba Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 09:30:58 -0500 Subject: [PATCH 10/16] updated paths to expected input files --- tests/pipelines/main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/main.nf.test b/tests/pipelines/main.nf.test index de784af..911af92 100644 --- a/tests/pipelines/main.nf.test +++ b/tests/pipelines/main.nf.test @@ -26,7 +26,7 @@ nextflow_pipeline { // Check computed pairwise distances def actual_distances = path("$launchDir/results/distances/results.text") - def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_dists.tsv") + def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_dists.txt") assert actual_distances.text == expected_distances.text // Check called clusters From 1515c374ffc5cad27d38ffaadd429cdff1958061 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 09:35:45 -0500 Subject: [PATCH 11/16] updated test conf file input paths --- conf/test.config | 2 +- conf/test_full.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index b0061d6..a8b10b4 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,7 +20,7 @@ params { max_time = '1.h' // Input data - input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/samplesheets/samplesheet1.csv' + input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv' } diff --git a/conf/test_full.config b/conf/test_full.config index 0981a15..9df6034 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,7 +15,7 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/samplesheets/samplesheet1.csv' + input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv' } /* This is required to run in WSL/Ubuntu using singularity From fbd6117949d03280968ed43deb21e28a5fb49d26 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 09:39:40 -0500 Subject: [PATCH 12/16] updated params in test configs --- conf/test.config | 1 + conf/test_full.config | 1 + 2 files changed, 2 insertions(+) diff --git a/conf/test.config b/conf/test.config index a8b10b4..9c17e75 100644 --- a/conf/test.config +++ b/conf/test.config @@ -21,6 +21,7 @@ params { // Input data input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv' + ref_clusters = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/clusters/expected_clusters.txt' } diff --git a/conf/test_full.config b/conf/test_full.config index 9df6034..4133c10 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -16,6 +16,7 @@ params { // Input data for full size test input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv' + ref_clusters = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/clusters/expected_clusters.txt' } /* This is required to run in WSL/Ubuntu using singularity From ca448b5ca5e5d804580962f80a1baf4f2b169ddb Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 11:00:46 -0500 Subject: [PATCH 13/16] updated schema, and test file line endings --- nextflow_schema.json | 77 ++++++++++++++++++++++++++-- tests/data/reports/sample1.mlst.json | 14 ++--- tests/data/reports/sample2.mlst.json | 14 ++--- tests/data/reports/sample3.mlst.json | 14 ++--- workflows/gas_nomenclature.nf | 9 ++-- 5 files changed, 98 insertions(+), 30 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 8639c86..71d1fac 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -41,7 +44,11 @@ "default": "stub", "fa_icon": "fas fa-desktop", "description": "The sequence assembler to use for sequence assembly.", - "enum": ["default", "stub", "experimental"] + "enum": [ + "default", + "stub", + "experimental" + ] }, "random_seed": { "type": "integer", @@ -152,7 +159,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -226,5 +240,58 @@ { "$ref": "#/definitions/generic_options" } - ] -} + ], + "properties": { + "pd_outfmt": { + "type": "string", + "default": "pairwise" + }, + "pd_distm": { + "type": "string", + "default": "scaled" + }, + "pd_missing_threshold": { + "type": "integer", + "default": 1 + }, + "pd_sample_quality_threshold": { + "type": "integer", + "default": 1 + }, + "pd_match_threshold": { + "type": "integer", + "default": -1 + }, + "pd_file_type": { + "type": "string", + "default": "text" + }, + "pd_mapping_file": { + "type": "string" + }, + "pd_force": { + "type": "boolean" + }, + "pd_skip": { + "type": "boolean" + }, + "pd_columns": { + "type": "string" + }, + "pd_count_missing": { + "type": "boolean", + "default": true + }, + "gm_thresholds": { + "type": "string", + "default": "10,5,0" + }, + "gm_delimiter": { + "type": "string", + "default": "\\'.\\" + }, + "ref_clusters": { + "type": "string" + } + } +} \ No newline at end of file diff --git a/tests/data/reports/sample1.mlst.json b/tests/data/reports/sample1.mlst.json index 393f0ac..01bc774 100644 --- a/tests/data/reports/sample1.mlst.json +++ b/tests/data/reports/sample1.mlst.json @@ -1,7 +1,7 @@ -{ - "sample1": { - "l1": "1", - "l2": "1", - "l3": "1" - } -} +{ + "sample1": { + "l1": "1", + "l2": "1", + "l3": "1" + } +} diff --git a/tests/data/reports/sample2.mlst.json b/tests/data/reports/sample2.mlst.json index 9af0a4c..7c0426c 100644 --- a/tests/data/reports/sample2.mlst.json +++ b/tests/data/reports/sample2.mlst.json @@ -1,7 +1,7 @@ -{ - "sample2": { - "l1": "1", - "l2": "1", - "l3": "1" - } -} +{ + "sample2": { + "l1": "1", + "l2": "1", + "l3": "1" + } +} diff --git a/tests/data/reports/sample3.mlst.json b/tests/data/reports/sample3.mlst.json index 88c3d0c..43ea3c7 100644 --- a/tests/data/reports/sample3.mlst.json +++ b/tests/data/reports/sample3.mlst.json @@ -1,7 +1,7 @@ -{ - "sample3": { - "l1": "1", - "l2": "1", - "l3": "2" - } -} +{ + "sample3": { + "l1": "1", + "l2": "1", + "l3": "2" + } +} diff --git a/workflows/gas_nomenclature.nf b/workflows/gas_nomenclature.nf index f9a6572..de931e1 100644 --- a/workflows/gas_nomenclature.nf +++ b/workflows/gas_nomenclature.nf @@ -49,13 +49,14 @@ include { PROFILE_DISTS } from "../modules/local/profile_dists/main" */ -def prepareFilePath(String filep){ +def prepareFilePath(String filep, String debug_msg){ // Rerturns null if a file is not valid def return_path = null if(filep){ file_in = path(filep) if(file_in.exists()){ return_path = file_in + log.debug debug_msg } }else{ return_path = [] @@ -74,7 +75,7 @@ workflow GAS_NOMENCLATURE { profiles = input.branch{ ref: it[0].profile_type query: !it[0].profile_type - errors: true // TODO add in check on file for erroneous values, may not be needed as nf-validation is working + errors: true // To discuss, add in check on file for erroneous values, may not be needed as nf-validation is working } reference_values = profiles.ref.collect{ meta, profile -> profile} @@ -92,13 +93,13 @@ workflow GAS_NOMENCLATURE { // PROFILE DISTS processes - mapping_file = prepareFilePath(params.pd_mapping_file) + mapping_file = prepareFilePath(params.pd_mapping_file, "Selecting ${params.pd_mapping_file} for --pd_mapping_file") if(mapping_file == null){ exit 1, "${params.pd_mapping_file}: Does not exist but was passed to the pipeline. Exiting now." } - columns_file = prepareFilePath(params.pd_columns) + columns_file = prepareFilePath(params.pd_columns, "Selecting ${params.pd_columns} for --pd_mapping_file") if(columns_file == null){ exit 1, "${params.pd_columns}: Does not exist but was passed to the pipeline. Exiting now." } From 6893ece76c8aead3bdf7a1a912ddd110d1e3f000 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 11:05:10 -0500 Subject: [PATCH 14/16] updated schema types --- nextflow_schema.json | 128 +++++++++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 54 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 71d1fac..f0eb807 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,6 +5,73 @@ "description": "IRIDA Next Example Pipeline", "type": "object", "definitions": { + "gas_call": { + "title": "GAS Call", + "type": "object", + "description": "", + "default": "", + "properties": { + "gm_thresholds": { + "type": "string", + "default": "10,5,0" + }, + "gm_delimiter": { + "type": "string", + "default": "\\'.\\" + }, + "ref_clusters": { + "type": "string" + } + } + }, + "profile_dists": { + "title": "Profile Dists", + "type": "object", + "description": "", + "default": "", + "properties": { + "pd_outfmt": { + "type": "string", + "default": "pairwise" + }, + "pd_distm": { + "type": "string", + "default": "scaled" + }, + "pd_missing_threshold": { + "type": "number", + "default": 1 + }, + "pd_sample_quality_threshold": { + "type": "number", + "default": 1 + }, + "pd_match_threshold": { + "type": "number", + "default": -1 + }, + "pd_file_type": { + "type": "string", + "default": "text" + }, + "pd_mapping_file": { + "type": "string" + }, + "pd_force": { + "type": "boolean" + }, + "pd_skip": { + "type": "boolean" + }, + "pd_columns": { + "type": "string" + }, + "pd_count_missing": { + "type": "boolean", + "default": true + } + } + }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -228,6 +295,12 @@ } }, "allOf": [ + { + "$ref": "#/definitions/gas_call" + }, + { + "$ref": "#/definitions/profile_dists" + }, { "$ref": "#/definitions/input_output_options" }, @@ -240,58 +313,5 @@ { "$ref": "#/definitions/generic_options" } - ], - "properties": { - "pd_outfmt": { - "type": "string", - "default": "pairwise" - }, - "pd_distm": { - "type": "string", - "default": "scaled" - }, - "pd_missing_threshold": { - "type": "integer", - "default": 1 - }, - "pd_sample_quality_threshold": { - "type": "integer", - "default": 1 - }, - "pd_match_threshold": { - "type": "integer", - "default": -1 - }, - "pd_file_type": { - "type": "string", - "default": "text" - }, - "pd_mapping_file": { - "type": "string" - }, - "pd_force": { - "type": "boolean" - }, - "pd_skip": { - "type": "boolean" - }, - "pd_columns": { - "type": "string" - }, - "pd_count_missing": { - "type": "boolean", - "default": true - }, - "gm_thresholds": { - "type": "string", - "default": "10,5,0" - }, - "gm_delimiter": { - "type": "string", - "default": "\\'.\\" - }, - "ref_clusters": { - "type": "string" - } - } + ] } \ No newline at end of file From 141b040bbb0b256cb54130d82e87605b3aea3d94 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 11:09:11 -0500 Subject: [PATCH 15/16] Ran prettier --- nextflow_schema.json | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index f0eb807..5799dcf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -77,10 +77,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -111,11 +108,7 @@ "default": "stub", "fa_icon": "fas fa-desktop", "description": "The sequence assembler to use for sequence assembly.", - "enum": [ - "default", - "stub", - "experimental" - ] + "enum": ["default", "stub", "experimental"] }, "random_seed": { "type": "integer", @@ -226,14 +219,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -314,4 +300,4 @@ "$ref": "#/definitions/generic_options" } ] -} \ No newline at end of file +} From 7ce6e9fa56fc3060ac87c6827737e0f9e321ef92 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 3 Apr 2024 11:15:03 -0500 Subject: [PATCH 16/16] updated function data types --- workflows/gas_nomenclature.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/gas_nomenclature.nf b/workflows/gas_nomenclature.nf index de931e1..a0befa9 100644 --- a/workflows/gas_nomenclature.nf +++ b/workflows/gas_nomenclature.nf @@ -49,7 +49,7 @@ include { PROFILE_DISTS } from "../modules/local/profile_dists/main" */ -def prepareFilePath(String filep, String debug_msg){ +def prepareFilePath(String filep, GString debug_msg){ // Rerturns null if a file is not valid def return_path = null if(filep){