Skip to content

Commit

Permalink
Merge pull request #11 from phac-nml/expected-clusters
Browse files Browse the repository at this point in the history
Add module to create the expected_clusters file for gas-call
  • Loading branch information
kylacochrane authored Jun 10, 2024
2 parents b1c888a + 6691dea commit 4acdb8c
Show file tree
Hide file tree
Showing 10 changed files with 127 additions and 10 deletions.
3 changes: 1 addition & 2 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ params {
max_time = '1.h'

// Input data
input = "${projectDir}/tests/data/samplesheets/samplesheet1.csv"
ref_clusters = 'https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/clusters/expected_clusters.txt'
input = "${projectDir}/assets/samplesheet.csv"
}


Expand Down
45 changes: 45 additions & 0 deletions modules/local/cluster_file/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
process CLUSTER_FILE {
tag "Create cluster file for GAS call"
label 'process_single'

input:
val meta

output:
path("expected_clusters.txt"), emit: text

exec:
def outputLines = []
def delimiter = java.util.regex.Pattern.quote(params.gm_delimiter)

// Determine the maximum number of levels to set the header requirements for each pipeline run
int maxLevels = meta.collect { sample -> sample.address.split(delimiter).size() }.max() ?: 0

// Verify each sample is consistent with $maxLevels
meta.each { sample ->
int level = sample.address.split(delimiter).size()
if (level != maxLevels) {
error ("Inconsistent levels found: expected $maxLevels levels but found $level levels in ${sample.id}")
}
}

// Generate the header for the expected_clusters.txt file
def header = ["id", "address"] + (1..maxLevels).collect { "level_$it" }
outputLines << header.join("\t")

// Iterate over each sample in the meta list and pull the relevant information for the text file
meta.each { sample ->
def id = sample.id
def address = sample.address
def levels = address.split(delimiter)
def line = [id, address] + levels.collect { it.toString() }
outputLines << line.join("\t")
}

// Write the text file, iterating over each sample
task.workDir.resolve("expected_clusters.txt").withWriter { writer ->
outputLines.each { line ->
writer.writeLine(line)
}
}
}
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ params {

// GAS Call
gm_thresholds = "10,5,0"
gm_delimiter = "'.'" // note the single quotes surrounding the delimiter
gm_delimiter = "."
ref_clusters = ""

}
Expand Down
4 changes: 3 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
},
"gm_delimiter": {
"type": "string",
"default": "\\'.\\"
"default": ".",
"description": "Delimiter desired for nomenclature code.",
"pattern": "^[A-Fa-f0-9\\._-]+$"
},
"ref_clusters": {
"type": "string"
Expand Down
2 changes: 1 addition & 1 deletion tests/data/called/expected_results.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id address level_1 level_2 level_3
sample1 1.1.1 1 1 1
sample2 1.1.1 1 1 1
sample3 2.2.2 2 2 2
sample3 1.1.2 1 1 2
sampleQ 1.1.3 1 1 3
2 changes: 1 addition & 1 deletion tests/data/clusters/expected_clusters.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
id address level_1 level_2 level_3
sample1 1.1.1 1 1 1
sample2 1.1.1 1 1 1
sample3 2.2.2 2 2 2
sample3 1.1.2 1 1 2
2 changes: 2 additions & 0 deletions tests/data/profiles/expected-profile2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sample_id l1 l2 l3
sampleQ 1 2 1
58 changes: 58 additions & 0 deletions tests/modules/cluster_file/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
nextflow_process {
name "Test Process CLUSTER_FILE"
script "modules/local/cluster_file/main.nf"
process "CLUSTER_FILE"

test("Test when sample levels are equal") {

when {
process {
"""
input[0] = Channel.of(
[['id':'sample1', 'address':'1.1.1'],
['id':'sample2', 'address':'1.1.1'],
['id':'sample3', 'address':'1.1.2']]
)
"""
}

params {
outdir = "cluster_results"
}
}

then {
assert process.success
assert path("$launchDir/cluster_results").exists()

// Check expected_clusters
def actual_clusters = path("$launchDir/cluster_results/cluster/expected_clusters.txt")
def expected_clusters = path("$baseDir/tests/data/clusters/expected_clusters.txt")
assert actual_clusters.text == expected_clusters.text
}
}

test("Test when sample levels are different") {

when {
process {
"""
input[0] = Channel.of(
[['id':'sample1', 'address':'1.1.1'],
['id':'sample2', 'address':'1.1.1'],
['id':'sample3', 'address':'1.2']]
)
"""
}

params {
outdir = "cluster_results"
}
}

then {
assert process.failed
assert (process.stdout =~ /Inconsistent levels found: expected 3 levels but found 2 levels in sample3/).find()
}
}
}
6 changes: 5 additions & 1 deletion tests/pipelines/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@ nextflow_pipeline {
assert path("$launchDir/results").exists()

// Check merged profiles
// TODO check query profile is merged
def actual_profile_ref = path("$launchDir/results/locidex/merge/reference/merged_ref/merged_profiles_ref.tsv")
def expected_profile_tsv = path("$baseDir/tests/data/profiles/expected-profile1.tsv")
assert actual_profile_ref.text == expected_profile_tsv.text

// Check query profiles
def actual_profile_query = path("$launchDir/results/locidex/merge/query/merged_value/merged_profiles_value.tsv")
def expected_profile_query_tsv = path("$baseDir/tests/data/profiles/expected-profile2.tsv")
assert actual_profile_query.text == expected_profile_query_tsv.text

// Check computed pairwise distances
def actual_distances = path("$launchDir/results/distances/results.text")
def expected_distances = path("$baseDir/tests/data/distances/expected_pairwise_dists.txt")
Expand Down
13 changes: 10 additions & 3 deletions workflows/gas_nomenclature.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ include { INPUT_CHECK } from "../modules/local/input_c
include { LOCIDEX_MERGE as LOCIDEX_MERGE_REF } from "../modules/local/locidex/merge/main"
include { LOCIDEX_MERGE as LOCIDEX_MERGE_QUERY } from "../modules/local/locidex/merge/main"
include { PROFILE_DISTS } from "../modules/local/profile_dists/main"
include { CLUSTER_FILE } from "../modules/local/cluster_file/main"
include { GAS_CALL } from "../modules/local/gas/call/main"
include { FILTER_QUERY } from "../modules/local/filter_query/main"

Expand Down Expand Up @@ -131,10 +132,16 @@ workflow GAS_NOMENCLATURE {
columns_file)
ch_versions = ch_versions.mix(distances.versions)

// GAS CALL
clusters = Channel.fromPath(params.ref_clusters, checkIfExists: true)
// Generate the expected_clusters.txt file from the addresses of the provided reference samples
clusters = input.filter { meta, file ->
meta.address != null
}.collect { meta, file ->
meta }

expected_clusters = CLUSTER_FILE(clusters)

called_data = GAS_CALL(clusters, distances.results)
// GAS CALL
called_data = GAS_CALL(expected_clusters.text, distances.results)
ch_versions = ch_versions.mix(called_data.versions)

// Filter the new queried samples and addresses into a CSV/JSON file for the IRIDANext plug in
Expand Down

0 comments on commit 4acdb8c

Please sign in to comment.