Skip to content

Commit

Permalink
Merge pull request #4 from phac-nml/workflow
Browse files Browse the repository at this point in the history
Integrating the initial skeleton of the workflow
  • Loading branch information
mattheww95 authored Apr 4, 2024
2 parents 377b998 + 7ce6e9f commit dccc794
Show file tree
Hide file tree
Showing 23 changed files with 331 additions and 267 deletions.
49 changes: 46 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ process {
// Publish directory names
assembly_directory_name = "assembly"
summary_directory_name = "summary"
profile_dists_directory_name = "distances"
gas_call_directory_name = "call"

locidex_merge_directory_name = [params.outdir , "locidex", "merge"].join(File.separator)
locidex_merge_ref_directory_name = [params.outdir , "locidex", "merge", "reference"].join(File.separator)
locidex_merge_query_directory_name = [params.outdir , "locidex", "merge", "query"].join(File.separator)

publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
Expand Down Expand Up @@ -56,15 +59,55 @@ process {
]
}

withName: LOCIDEX_MERGE {
withName: LOCIDEX_MERGE_REF {
publishDir = [
path: locidex_merge_directory_name,
path: locidex_merge_ref_directory_name,
mode: params.publish_dir_mode,
pattern: "*/*",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: LOCIDEX_MERGE_QUERY {
publishDir = [
path: locidex_merge_query_directory_name,
mode: params.publish_dir_mode,
pattern: "*/*",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}


withName: PROFILE_DISTS {
publishDir = [
path: { ["${params.outdir}", "${task.profile_dists_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null :
filename.contains(File.separator) ? filename.split(File.separator)[-1] : filename }
]
}

withName: GAS_CALL {
publishDir = [
[
path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
pattern: "*/thresholds.json"
],
[
path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
pattern: "*/results.{text,parquet}"
],
[
path: { ["${params.outdir}", "${task.gas_call_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
pattern: "*/run.json"
]
]
}


withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
Expand Down
11 changes: 10 additions & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,14 @@ params {
max_time = '1.h'

// Input data
input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/assets/samplesheet.csv'
input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv'
ref_clusters = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/clusters/expected_clusters.txt'
}


/* This is required to run in WSL/Ubuntu using singularity
Without this, profile_dists was not successfully completing
due to issues with multiprocessing in the container. A similar
error is found at https://github.com/marcelm/cutadapt/issues/583
*/
singularity.runOptions = "--contain"
11 changes: 10 additions & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,14 @@ params {
config_profile_description = 'Full test dataset to check pipeline function'

// Input data for full size test
input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/main/assets/samplesheet.csv'
input = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/samplesheets/samplesheet1.csv'
ref_clusters = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/clusters/expected_clusters.txt'
}

/* This is required to run in WSL/Ubuntu using singularity
Without this, profile_dists was not successfully completing
due to issues with multiprocessing in the container. A similar
error is found at https://github.com/marcelm/cutadapt/issues/583
*/
singularity.runOptions = "--contain"

13 changes: 7 additions & 6 deletions modules/local/gas/call/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,26 @@

process GAS_CALL{
label "process_high"
tag "Calling: ${meta.id}"
tag "Assigning Nomenclature"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/genomic_address_service%3A0.1.1--pyh7cba7a3_1' :
'quay.io/biocontainers/genomic_address_service:0.1.1--pyh7cba7a3_1' }"


input:
tuple val(meta), path(reference_clusters), path(distances)
path(reference_clusters)
path(distances)

output:
tuple val(meta), path("${prefix}/results.{text,parquet}"), emit: distances, optional: true
tuple val(meta), path("${prefix}/thresholds.json"), emit: thresholds
tuple val(meta), path("${prefix}/run.json"), emit: run
path("${prefix}/results.{text,parquet}"), emit: distances, optional: true
path("${prefix}/thresholds.json"), emit: thresholds
path("${prefix}/run.json"), emit: run
path "versions.yml", emit: versions

script:
// Need to add more args for gas call below
prefix = meta.id
prefix = "Called"
"""
gas call --dists $distances \\
--rclusters $reference_clusters \\
Expand Down
8 changes: 5 additions & 3 deletions modules/local/locidex/merge/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@ process LOCIDEX_MERGE {
'quay.io/biocontainers/locidex:0.1.1--pyhdfd78af_0' }"

input:
val input_values // [file(sample1), file(sample2), file(sample3), etc...]
path input_values // [file(sample1), file(sample2), file(sample3), etc...]
val input_tag // makes output unique and denotes the item as the reference or query to preven name collision

output:
path("${combined_dir}/*.tsv"), emit: combined_profiles
path("${combined_dir}/*.json"), emit: report
path "versions.yml", emit: versions

script:
combined_dir = "merged"
combined_dir = "merged_${input_tag}"
"""
locidex merge -i ${input_values.join(' ')} -o ${combined_dir}
mv ${combined_dir}/*.tsv ${combined_dir}/merged_profiles_${input_tag}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
locidex merge: \$(echo \$(locidex search -V 2>&1) | sed 's/^.*locidex //' )
Expand Down
23 changes: 12 additions & 11 deletions modules/local/profile_dists/main.nf
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
process PROFILE_DISTS{
label "process_high"
tag "Pairwise Distance Generation: ${meta.id}"
tag "Gathering Distances Between Reference and Query Profiles"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/profile_dists%3A1.0.0--pyh7cba7a3_0' :
'quay.io/biocontainers/profile_dists:1.0.0--pyh7cba7a3_0' }"

input:
tuple val(meta), path(query), path(ref)
path query
path ref
val mapping_format
path(mapping_file)
path(columns)
path mapping_file
path columns


output:
tuple val(meta), path("${prefix}_${mapping_format}/allele_map.json"), emit: allele_map
tuple val(meta), path("${prefix}_${mapping_format}/query_profile.{text,parquet}"), emit: query_profile
tuple val(meta), path("${prefix}_${mapping_format}/ref_profile.{text,parquet}"), emit: ref_profile
tuple val(meta), path("${prefix}_${mapping_format}/results.{text,parquet}"), emit: results
tuple val(meta), path("${prefix}_${mapping_format}/run.json"), emit: run
path("${prefix}/allele_map.json"), emit: allele_map
path("${prefix}/query_profile.{text,parquet}"), emit: query_profile
path("${prefix}/ref_profile.{text,parquet}"), emit: ref_profile
path("${prefix}/results.{text,parquet}"), emit: results
path("${prefix}/run.json"), emit: run
path "versions.yml", emit: versions


Expand All @@ -41,7 +42,7 @@ process PROFILE_DISTS{
args = args + " --count_missing"
}
// --match_threshold $params.profile_dists.match_thresh \\
prefix = meta.id
prefix = "distances_${mapping_format}"
"""
profile_dists --query $query --ref $ref $args --outfmt $mapping_format \\
--distm $params.pd_distm \\
Expand All @@ -50,7 +51,7 @@ process PROFILE_DISTS{
--sample_qual_thresh $params.pd_sample_quality_threshold \\
--max_mem ${task.memory.toGiga()} \\
--cpus ${task.cpus} \\
-o ${prefix}_${mapping_format}
-o ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
17 changes: 17 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,23 @@ params {
validate_params = true

// Profile Dists
pd_outfmt = "pairwise"
pd_distm = "scaled"
pd_missing_threshold = 1.0
pd_sample_quality_threshold = 1.0
pd_match_threshold = -1.0
pd_file_type = "text"
pd_mapping_file = null // default is no file
pd_force = false
pd_skip = false
pd_columns = null
pd_count_missing = true


// GAS Call
gm_thresholds = "10,5,0"
gm_delimiter = "'.'" // note the single quotes surrounding the delimiter
ref_clusters = ""

}

Expand Down
73 changes: 73 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,73 @@
"description": "IRIDA Next Example Pipeline",
"type": "object",
"definitions": {
"gas_call": {
"title": "GAS Call",
"type": "object",
"description": "",
"default": "",
"properties": {
"gm_thresholds": {
"type": "string",
"default": "10,5,0"
},
"gm_delimiter": {
"type": "string",
"default": "\\'.\\"
},
"ref_clusters": {
"type": "string"
}
}
},
"profile_dists": {
"title": "Profile Dists",
"type": "object",
"description": "",
"default": "",
"properties": {
"pd_outfmt": {
"type": "string",
"default": "pairwise"
},
"pd_distm": {
"type": "string",
"default": "scaled"
},
"pd_missing_threshold": {
"type": "number",
"default": 1
},
"pd_sample_quality_threshold": {
"type": "number",
"default": 1
},
"pd_match_threshold": {
"type": "number",
"default": -1
},
"pd_file_type": {
"type": "string",
"default": "text"
},
"pd_mapping_file": {
"type": "string"
},
"pd_force": {
"type": "boolean"
},
"pd_skip": {
"type": "boolean"
},
"pd_columns": {
"type": "string"
},
"pd_count_missing": {
"type": "boolean",
"default": true
}
}
},
"input_output_options": {
"title": "Input/output options",
"type": "object",
Expand Down Expand Up @@ -214,6 +281,12 @@
}
},
"allOf": [
{
"$ref": "#/definitions/gas_call"
},
{
"$ref": "#/definitions/profile_dists"
},
{
"$ref": "#/definitions/input_output_options"
},
Expand Down
2 changes: 1 addition & 1 deletion nf-test.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ config {
testsDir "tests"
workDir ".nf-test"
configFile "tests/nextflow.config"
profile ""
profile "docker"

}
5 changes: 5 additions & 0 deletions tests/data/called/expected_results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id address level_1 level_2 level_3
sample1 1.1.1 1 1 1
sample2 1.1.1 1 1 1
sample3 2.2.2 2 2 2
sampleQ 1.1.1 1 1 1
4 changes: 4 additions & 0 deletions tests/data/distances/expected_pairwise_dists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
query_id ref_id dist
sampleQ sample1 0.0
sampleQ sample2 33.333333333333336
sampleQ sample3 66.66666666666667
8 changes: 4 additions & 4 deletions tests/data/profiles/expected-profile1.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample_id l1 l2 l3
sample1 1 1 1
sample2 1 1 1
sample3 1 1 2
sample_id l1 l2 l3
sample1 1 1 1
sample2 1 1 1
sample3 1 1 2
14 changes: 7 additions & 7 deletions tests/data/reports/sample1.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
}
}
{
"sample1": {
"l1": "1",
"l2": "1",
"l3": "1"
}
}
14 changes: 7 additions & 7 deletions tests/data/reports/sample2.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"sample2": {
"l1": "1",
"l2": "1",
"l3": "1"
}
}
{
"sample2": {
"l1": "1",
"l2": "1",
"l3": "1"
}
}
14 changes: 7 additions & 7 deletions tests/data/reports/sample3.mlst.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
}
}
{
"sample3": {
"l1": "1",
"l2": "1",
"l3": "2"
}
}
Loading

0 comments on commit dccc794

Please sign in to comment.