From bbbe8add55a41653154719d0596a5efb9050e1ea Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:50:44 -0600 Subject: [PATCH 01/14] update to 16.15.0 --- modules/local/datasets.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/datasets.nf b/modules/local/datasets.nf index f38f447..b1320ba 100644 --- a/modules/local/datasets.nf +++ b/modules/local/datasets.nf @@ -52,7 +52,7 @@ process datasets_download { // because there's no way to specify threads label "process_medium" publishDir path: "${params.outdir}", mode: 'copy', pattern: "logs/*/*log" - container 'staphb/ncbi-datasets:16.10.3' + container 'staphb/ncbi-datasets:16.15.0' time '5h' errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} From 6ac636d2e61d943b1092a4e50fe15bc9fa677d0a Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:51:04 -0600 Subject: [PATCH 02/14] removed flag and fixed version --- modules/local/drprg.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/drprg.nf b/modules/local/drprg.nf index c831d22..1762616 100644 --- a/modules/local/drprg.nf +++ b/modules/local/drprg.nf @@ -8,7 +8,7 @@ process drprg { errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: - tuple val(meta), file(contigs), val(flag) + tuple val(meta), file(contigs) output: tuple val(meta), val("drprg"), file("drprg/*/*.drprg.json"), emit: json @@ -17,7 +17,7 @@ process drprg { path "versions.yml", emit: versions when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) shell: def args = task.ext.args ?: '' @@ -35,7 +35,7 @@ process drprg { cat <<-END_VERSIONS > versions.yml "${task.process}": - drprg: \$( drprg --version ) + drprg: \$( drprg --version | awk '{print \$NF}') END_VERSIONS """ } \ No newline at end of file From a7e196dd9c550e537af4d71da4dde3f47b76575a Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:51:21 -0600 Subject: [PATCH 03/14] removed flag --- modules/local/elgato.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/elgato.nf b/modules/local/elgato.nf index 3b65119..adb0c8e 100755 --- a/modules/local/elgato.nf +++ b/modules/local/elgato.nf @@ -7,7 +7,7 @@ process elgato { errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: - tuple val(meta), file(contigs), val(flag) + tuple val(meta), file(contigs) output: path "elgato/*/possible_mlsts.txt", emit: collect @@ -15,7 +15,7 @@ process elgato { path "versions.yml" , emit: versions when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) shell: def args = task.ext.args ?: '' From 5abf53a84ef26d6bd91c1f6da66d8f1ecab05b47 Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:51:36 -0600 Subject: [PATCH 04/14] removed flag --- modules/local/emmtyper.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/emmtyper.nf b/modules/local/emmtyper.nf index 61c6845..5af6307 100644 --- a/modules/local/emmtyper.nf +++ b/modules/local/emmtyper.nf @@ -8,10 +8,10 @@ process emmtyper { errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) input: - tuple val(meta), file(contigs), val(flag), file(script) + tuple val(meta), file(contigs), file(script) output: path "emmtyper/*_emmtyper.txt" , emit: collect From 400284db8a6e89b6721e1501475e78ff45ef3143 Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:51:51 -0600 Subject: [PATCH 05/14] update to 2.3.4 --- modules/local/iqtree2.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/iqtree2.nf b/modules/local/iqtree2.nf index 8ae7366..a839eb7 100644 --- a/modules/local/iqtree2.nf +++ b/modules/local/iqtree2.nf @@ -2,7 +2,7 @@ process iqtree2 { tag "Phylogenetic analysis" label "process_high" publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/iqtree2:2.3.1' + container 'staphb/iqtree2:2.3.4' time '24h' errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} From 68a8488340069d2daaeb4c9536bd40b81754d673 Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:52:05 -0600 Subject: [PATCH 06/14] removed flag --- modules/local/kaptive.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/kaptive.nf b/modules/local/kaptive.nf index ff274ac..8019606 100644 --- a/modules/local/kaptive.nf +++ b/modules/local/kaptive.nf @@ -8,7 +8,7 @@ process kaptive { errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: - tuple val(meta), file(contigs), val(flag) + tuple val(meta), file(contigs) output: path "kaptive/${meta.id}_table.txt", emit: collect @@ -17,7 +17,7 @@ process kaptive { path "versions.yml", emit: versions when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) shell: def args = task.ext.args ?: '' From f731820e46369074d4ff14bed59c9c4433d19704 Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:52:16 -0600 Subject: [PATCH 07/14] removed flag --- modules/local/kleborate.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/kleborate.nf b/modules/local/kleborate.nf index 177cd0d..a8d84ad 100644 --- a/modules/local/kleborate.nf +++ b/modules/local/kleborate.nf @@ -7,7 +7,7 @@ process kleborate { errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: - tuple val(meta), file(contig), val(flag), file(script) + tuple val(meta), file(contig), file(script) output: path "kleborate/*_results.tsv" , emit: collect, optional: true @@ -16,7 +16,7 @@ process kleborate { path "versions.yml" , emit: versions when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) shell: def args = task.ext.args ?: '--all' From 3c3ecdb7091320ffb7444e3976a9f8fe062f9c54 Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:53:57 -0600 Subject: [PATCH 08/14] removed flag --- modules/local/local.nf | 106 +---------------------------------------- 1 file changed, 1 insertion(+), 105 deletions(-) diff --git a/modules/local/local.nf b/modules/local/local.nf index d2bd91f..53f1c93 100644 --- a/modules/local/local.nf +++ b/modules/local/local.nf @@ -65,110 +65,6 @@ process download_sra { """ } -process flag { - tag "${meta.id}" - label "process_single" - //no publishDir params.outdir, mode: 'copy' - container 'quay.io/biocontainers/pandas:1.5.2' - time '10m' - errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} - - input: - tuple val(meta), file(files) - - output: - tuple val(meta), file("${files[0]}"), env(salmonella_flag) , emit: salmonella_flag - tuple val(meta), file("${files[0]}"), env(klebsiella_flag) , emit: klebsiella_flag - tuple val(meta), file("${files[0]}"), env(ecoli_flag) , emit: ecoli_flag - tuple val(meta), file("${files[0]}"), env(streppneu_flag) , emit: streppneu_flag - tuple val(meta), file("${files[0]}"), env(legionella_flag) , emit: legionella_flag - tuple val(meta), file("${files[0]}"), env(klebacin_flag) , emit: klebacin_flag - tuple val(meta), file("${files[0]}"), env(strepa_flag) , emit: strepa_flag - tuple val(meta), file("${files[0]}"), env(vibrio_flag) , emit: vibrio_flag - tuple val(meta), file("${files[0]}"), env(myco_flag) , emit: myco_flag - tuple val(meta), file("${files[0]}"), env(genus), env(species), emit: organism - path "flag/*_flag.csv", emit: collect - path "logs/${task.process}/*.log", emit: log_files - - shell: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - mkdir -p flag logs/${task.process} - log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log - - if [ -f "${prefix}_fastani.csv" ] - then - awk -F "," '{if (\$4 > 90) print \$0}' ${prefix}_fastani.csv > smaller_fastani.csv - genus=\$(head -n 2 ${prefix}_fastani.csv | tail -n 1 | cut -f 3 -d , | cut -f 1 -d "_") - species=\$(head -n 2 ${prefix}_fastani.csv | tail -n 1 | cut -f 3 -d , | cut -f 2 -d "_") - else - touch smaller_fastani.csv - genus="unknown" - species="unknown" - fi - - touch ${prefix}_reads_summary_kraken2.csv ${prefix}.summary.mash.csv ${prefix}_blobtools.txt - - files="smaller_fastani.csv ${prefix}_reads_summary_kraken2.csv ${prefix}.summary.mash.csv ${prefix}_blobtools.txt" - - echo "Looking for Salmonella:" >> \$log_file - salmonella_flag='' - find_salmonella=\$(head -n 10 \$files | grep "Salmonella" | tee -a \$log_file | head -n 1 ) - if [ -n "\$find_salmonella" ] ; then salmonella_flag="found" ; else salmonella_flag="not" ; fi - - echo "Looking for E. coli and Shigella:" >> \$log_file - ecoli_flag='' - find_ecoli=\$(head -n 10 \$files | grep -e "Escherichia" -e "Shigella" | tee -a \$log_file | head -n 1 ) - if [ -n "\$find_ecoli" ] ; then ecoli_flag="found" ; else ecoli_flag="not" ; fi - - echo "Looking for Klebsiella:" >> \$log_file - klebsiella_flag='' - find_klebsiella=\$(head -n 10 \$files | grep -e "Klebsiella" -e "Enterobacter" -e "Serratia" | tee -a \$log_file | head -n 1 ) - if [ -n "\$find_klebsiella" ] ; then klebsiella_flag="found" ; else klebsiella_flag="not" ; fi - - echo "Looking for Strep A organisms:" >> \$log_file - strepa_flag='' - find_strepa=\$(head -n 10 \$files | grep "Streptococcus" | grep -e "pyogenes" -e "dysgalactiae" -e "anginosus" | tee -a \$log_file | head -n 1 ) - if [ -n "\$find_strepa" ] ; then strepa_flag='found' ; else strepa_flag='not' ; fi - - echo "Looking for Streptococcus pneumoniae organisms:" >> \$log_file - streppneu_flag_flag='' - find_streppneu=\$(head -n 10 \$files | grep "Streptococcus" | grep "pneumoniae" | tee -a \$log_file | head -n 1 ) - if [ -n "\$find_streppneu" ] ; then streppneu_flag='found' ; else streppneu_flag='not' ; fi - - echo "Looking for Legionella organisms:" >> \$log_file - legionella_flag='' - find_legionella=\$(head -n 10 \$files | grep "Legionella" | tee -a \$log_file | head -n 1 ) - if [ -n "\$find_legionella" ] ; then legionella_flag='found' ; else legionella_flag='not' ; fi - - echo "Looking for Vibrio organisms:" >> \$log_file - vibrio_flag='' - find_vibrio=\$(head -n 10 \$files | grep "Vibrio" | tee -a \$log_file | head -n 1 ) - if [ -n "\$find_vibrio" ] ; then vibrio_flag='found' ; else vibrio_flag='not' ; fi - - echo "Looking for Klebsiella or Acinetobacter:" >> \$log_file - klebacin_flag='' - if [ -n "\$find_klebsiella" ] - then - klebacin_flag='found' - else - find_acin=\$(head -n 10 \$files | grep "Acinetobacter" | tee -a \$log_file | head -n 1 ) - if [ -n "\$find_acin" ] ; then klebacin_flag='found' ; else klebacin_flag='not' ; fi - fi - - echo "Looking for Mycobacterium/Mycobacteria" - myco_flag='' - find_myco=\$(head -n 10 \$files | grep "Mycobacteri" | tee -a \$log_file | head -n 1 ) - if [ -n "\$find_myco" ] ; then myco_flag='found' ; else myco_flag='not' ; fi - - if [ -z "\$genus" ] ; then genus=unknown ; fi - if [ -z "\$species" ] ; then species=unknown ; fi - - echo "sample,genus,species,salmonella_flag,ecoli_flag,klebsiella_flag,klebacin_flag,myco_flag,strepa_flag,streppneu_flag,legionella_flag,vibrio_flag" > flag/${prefix}_flag.csv - echo "${prefix},\$genus,\$species,\$salmonella_flag,\$ecoli_flag,\$klebsiella_flag,\$klebacin_flag,\$myco_flag,\$strepa_flag,\$streppneu_flag,\$legionella_flag,\$vibrio_flag" >> flag/${prefix}_flag.csv - """ -} - process json_convert { tag "${meta.id}" label "process_single" @@ -278,7 +174,7 @@ process references { tag "Preparing references" // no publishDir label "process_single" - container 'quay.io/uphl/grandeur_ref:2024-03-07' + container 'quay.io/uphl/grandeur_ref:2024-06-26' time '10m' errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} From ed4635b65208a1552e8904cc8f02ea8ac727d1de Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:54:20 -0600 Subject: [PATCH 09/14] added errorStrategy --- modules/local/bbduk.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/bbduk.nf b/modules/local/bbduk.nf index 44fd3b7..3536873 100644 --- a/modules/local/bbduk.nf +++ b/modules/local/bbduk.nf @@ -3,7 +3,7 @@ process bbduk { label "process_medium" publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } container 'staphb/bbtools:39.01' - //errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} + errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' input: From b596fe0f07b2fb8ede157ff051cb8a61ccccf9f0 Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:54:46 -0600 Subject: [PATCH 10/14] update to 2.23.0-2024-06-01 --- modules/local/mlst.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mlst.nf b/modules/local/mlst.nf index 6710565..43b069b 100644 --- a/modules/local/mlst.nf +++ b/modules/local/mlst.nf @@ -2,7 +2,7 @@ process mlst { tag "${meta.id}" label "process_medium" publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - container 'staphb/mlst:2.23.0-2024-05-01' + container 'staphb/mlst:2.23.0-2024-06-01' maxForks 10 errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} time '10m' From d46878d03770504f66fabacea0bf32780a661911 Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:55:00 -0600 Subject: [PATCH 11/14] added errorStrategy --- modules/local/mykrobe.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/mykrobe.nf b/modules/local/mykrobe.nf index 5a4d757..e5ed3af 100644 --- a/modules/local/mykrobe.nf +++ b/modules/local/mykrobe.nf @@ -8,7 +8,7 @@ process mykrobe { errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: - tuple val(meta), file(contigs), val(flag) + tuple val(meta), file(contigs) output: path "mykrobe/*.csv", emit: collect @@ -17,7 +17,7 @@ process mykrobe { path "versions.yml", emit: versions when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) shell: def args = task.ext.args ?: '' From 6f9084375c141cd083e7539fd6dda06a200c429a Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:55:32 -0600 Subject: [PATCH 12/14] removed flag --- modules/local/pbptyper.nf | 4 ++-- modules/local/seqsero2.nf | 4 ++-- modules/local/serotypefinder.nf | 4 ++-- modules/local/shigatyper.nf | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/local/pbptyper.nf b/modules/local/pbptyper.nf index 97e8aea..9cb85d5 100644 --- a/modules/local/pbptyper.nf +++ b/modules/local/pbptyper.nf @@ -8,7 +8,7 @@ process pbptyper { errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: - tuple val(meta), file(contigs), val(flag) + tuple val(meta), file(contigs) output: path "pbptyper/${meta.id}.tsv" , emit: collect @@ -17,7 +17,7 @@ process pbptyper { path "versions.yml" , emit: versions when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) shell: def args = task.ext.args ?: '' diff --git a/modules/local/seqsero2.nf b/modules/local/seqsero2.nf index ce619de..41849e1 100644 --- a/modules/local/seqsero2.nf +++ b/modules/local/seqsero2.nf @@ -7,7 +7,7 @@ process seqsero2 { errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: - tuple val(meta), file(file), val(flag) + tuple val(meta), file(file) output: path "seqsero2/*/*" , emit: files @@ -16,7 +16,7 @@ process seqsero2 { path "versions.yml" , emit: versions when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) shell: def args = task.ext.args ?: '-m a -b mem' diff --git a/modules/local/serotypefinder.nf b/modules/local/serotypefinder.nf index 1628b72..a4d6193 100644 --- a/modules/local/serotypefinder.nf +++ b/modules/local/serotypefinder.nf @@ -8,7 +8,7 @@ process serotypefinder { errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} input: - tuple val(meta), file(file), val(flag), file(script) + tuple val(meta), file(file), file(script) output: path "serotypefinder/*/*" , emit: files @@ -17,7 +17,7 @@ process serotypefinder { path "versions.yml" , emit: versions when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) shell: def args = task.ext.args ?: '' diff --git a/modules/local/shigatyper.nf b/modules/local/shigatyper.nf index d222509..46ed8d0 100644 --- a/modules/local/shigatyper.nf +++ b/modules/local/shigatyper.nf @@ -8,7 +8,7 @@ process shigatyper { time '10m' input: - tuple val(meta), file(input), val(flag), file(script) + tuple val(meta), file(input), file(script) output: path "shigatyper/*_shigatyper.tsv", optional: true, emit: files @@ -17,7 +17,7 @@ process shigatyper { path "versions.yml", emit: versions when: - (task.ext.when == null || task.ext.when) && flag =~ 'found' + (task.ext.when == null || task.ext.when) shell: def args = task.ext.args ?: '' From f7c147e607a239a567d285dae4cae3599d51c856 Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:56:10 -0600 Subject: [PATCH 13/14] version change to 4.5.240626 --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 2fe1277..8669790 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,7 +3,7 @@ manifest { author = 'Erin Young' homePage = 'https://github.com/UPHL-BioNGS/Grandeur' mainScript = 'main.nf' - version = '4.4.240521' + version = '4.5.240626' defaultBranch = 'main' description = 'Grandeur is short-read de novo assembly pipeline with serotyping.' nextflowVersion = '!>=22.10.1' From 1a343859ea9eb13852c63cdb053162d45d4c9b59 Mon Sep 17 00:00:00 2001 From: eriny Date: Wed, 26 Jun 2024 16:56:25 -0600 Subject: [PATCH 14/14] replaced flag process with functions --- subworkflows/information.nf | 229 ++++++++++++++++++++++++++++++------ 1 file changed, 194 insertions(+), 35 deletions(-) diff --git a/subworkflows/information.nf b/subworkflows/information.nf index b39ceee..a2d78d4 100644 --- a/subworkflows/information.nf +++ b/subworkflows/information.nf @@ -2,7 +2,6 @@ include { amrfinderplus } from '../modules/local/amrfinderplus' addParams(para include { drprg } from '../modules/local/drprg' addParams(params) include { elgato } from '../modules/local/elgato' addParams(params) include { emmtyper } from '../modules/local/emmtyper' addParams(params) -include { flag } from '../modules/local/local' addParams(params) include { json_convert } from '../modules/local/local' addParams(params) include { kaptive } from '../modules/local/kaptive' addParams(params) include { kleborate } from '../modules/local/kleborate' addParams(params) @@ -12,6 +11,101 @@ include { seqsero2 } from '../modules/local/seqsero2' addParams(para include { serotypefinder } from '../modules/local/serotypefinder' addParams(params) include { shigatyper } from '../modules/local/shigatyper' addParams(params) +def flagOrg(org_files, phrases) { + def found = false + org_files.each { org_file -> + if (org_file && org_file.exists()) { + def count = 0 + org_file.withReader { reader -> + while (reader.ready() && count < 10 && !found) { + def line = reader.readLine() + count++ + phrases.each { phrase -> + if (line.toString().contains(phrase)) { + if (org_file.getName().contains('fastani')) { + def columns = line.split(',') + if (columns.size() >= 4 && columns[3].toFloat() > 90) { + found = true + } + } else { + found = true + } + } + } + } + } + } + } + return found +} + +def topOrg(org_files) { + def fastani_file = org_files[0] + def blobtools_file = org_files[1] + def kraken2_file = org_files[2] + def mash_file = org_files[3] + + def genus = 'unknown' + def species = 'unknown' + + if (fastani_file && fastani_file.exists() && genus == 'unknown') { + def lines = fastani_file.readLines() + if (lines.size() > 1) { + def secondLine = lines[1].split(',') + if (secondLine.size() >= 4 && secondLine[3].toFloat() > 90) { + hit = secondLine[2].trim()+ '_unknown' + genus = hit.split('_')[0] + species = hit.split('_')[1] + } + } + } + + if (blobtools_file && blobtools_file.exists() && genus == 'unknown') { + blobtools_file.withReader { reader -> + while(reader.ready()) { + def line = reader.readLine() + def columns = line.split('\t') + if (columns.size() > 1) { + hit = columns[1].trim() + if (!['name', 'all', 'no-hit', 'undel'].contains(hit)) { + if (columns.size() == 14 && columns[-1].toFloat() > 50) { + name = hit + '_unknown' + genus = name.split('_')[0] + species = name.split('_')[1] + return + } + } + } + } + } + } + + if (kraken2_file && kraken2_file.exists() && genus == 'unknown') { + def lines = kraken2_file.readLines() + if (lines.size() > 1) { + def secondLine = lines[1].split(',') + if (secondLine.size() >= 2 && secondLine[1].toFloat() > 50) { + hit = secondLine[-1].trim() + '_unknown' + genus = hit.split('_')[0] + species = hit.split('_')[1] + } + } + } + + if (mash_file && mash_file.exists() && genus == 'unknown') { + def lines = mash_file.readLines() + if (lines.size() > 1) { + def secondLine = lines[1].split(',') + if (secondLine.size() >= 4 && secondLine[3].toFloat() < 0.1) { + hit = secondLine[-1].trim() + '_unknown' + genus = hit.split('_')[0] + species = hit.split('_')[1] + } + } + } + return [genus, species] +} + workflow information { take: ch_contigs @@ -31,35 +125,107 @@ workflow information { fastani: it[1] =~ /fastani.csv/ } .set { ch_flag_branch } - + + ch_contigs .filter{it[1] != null} + .join(ch_flag_branch.fastani, by:0, failOnMismatch: false, remainder: true) .join(ch_flag_branch.blobtools, by:0, failOnMismatch: false, remainder: true) .join(ch_flag_branch.kraken2, by:0, failOnMismatch: false, remainder: true) .join(ch_flag_branch.mash, by:0, failOnMismatch: false, remainder: true) - .join(ch_flag_branch.fastani, by:0, failOnMismatch: false, remainder: true) .filter{it[1] != null} - .map{ it -> tuple(it[0],[it[1], it[2], it[3], it[4], it[5]])} + .map{ it -> tuple(it[0], it[1], [it[2], it[3], it[4], it[5]])} .set {ch_for_flag} - flag(ch_for_flag) + // Yersinia + ch_for_flag + .filter{flagOrg(it[2], ['Yersinia'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_yersinia} - amrfinderplus(flag.out.organism) - drprg(flag.out.myco_flag) - emmtyper(flag.out.strepa_flag.combine(summfle_script)) - kaptive(flag.out.vibrio_flag) - kleborate(flag.out.klebsiella_flag.combine(summfle_script)) - elgato(flag.out.legionella_flag) - mykrobe(flag.out.myco_flag) - pbptyper(flag.out.streppneu_flag) - seqsero2(flag.out.salmonella_flag) - serotypefinder(flag.out.ecoli_flag.combine(summfle_script)) - shigatyper(flag.out.ecoli_flag.combine(summfle_script)) + // Salmonella + ch_for_flag + .filter{flagOrg(it[2], ['Salmonella'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_salmonella} + + // E. coli and Shigella + ch_for_flag + .filter{flagOrg(it[2], ['Escherichia', 'Shigella'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_ecoli} + + // Klebsiella + ch_for_flag + .filter{flagOrg(it[2], ['Klebsiella', 'Enterobacter', 'Serratia'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_kleb} + + // Group A Strep + ch_for_flag + .filter{flagOrg(it[2], ['Streptococcus'])} + .filter{flagOrg(it[2], ['pyogenes', 'dysgalactiae', 'anginosus'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_gas} + + // Streptococcus pneumoniae + ch_for_flag + .filter{flagOrg(it[2], ['Streptococcus'])} + .filter{flagOrg(it[2], ['pneumoniae'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_strep} + + // Legionella + ch_for_flag + .filter{flagOrg(it[2], ['Legionella'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_legionella} + + // Vibrio + ch_for_flag + .filter{flagOrg(it[2], ['Vibrio'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_vibrio} + + // Acinetobacter + ch_for_flag + .filter{flagOrg(it[2], ['Acinetobacter'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_acinetobacter} + + // Mycobacterium/Mycobacteria + ch_for_flag + .filter{flagOrg(it[2], ['Mycobacteri'])} + .map { it -> tuple(it[0], it[1])} + .set {ch_myco} + + // Getting the top organism for each sample + // for amrfinderplus + // for prokka + ch_for_flag + .map { + it -> + genus_species = topOrg(it[2]) + tuple (it[0], it[1], genus_species[0], genus_species[1]) + } + .set { ch_organism } + + amrfinderplus(ch_organism) + drprg(ch_myco) + emmtyper(ch_gas.combine(summfle_script)) + kaptive(ch_vibrio) + kleborate(ch_kleb.combine(summfle_script)) + elgato(ch_legionella) + mykrobe(ch_myco) + pbptyper(ch_strep) + seqsero2(ch_salmonella) + serotypefinder(ch_ecoli.combine(summfle_script)) + shigatyper(ch_ecoli.combine(summfle_script)) json_convert(drprg.out.json.combine(jsoncon_script)) amrfinderplus.out.collect - .collectFile(name: "amrfinderplus.txt", + .collectFile(name: 'amrfinderplus.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/ncbi-AMRFinderplus") @@ -67,84 +233,77 @@ workflow information { json_convert.out.collect .filter( ~/.*drprg.tsv/ ) - .collectFile(name: "drprg_summary.tsv", + .collectFile(name: 'drprg_summary.tsv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/drprg") .set{ drprg_summary } elgato.out.collect - .collectFile(name: "elgato_summary.tsv", + .collectFile(name: 'elgato_summary.tsv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/elgato") .set{ elgato_summary } emmtyper.out.collect - .collectFile(name: "emmtyper_summary.tsv", + .collectFile(name: 'emmtyper_summary.tsv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/emmtyper") .set{ emmtyper_summary } - flag.out.collect - .collectFile(name: "flag_summary.csv", - keepHeader: true, - sort: {file -> file.text }, - storeDir: "${params.outdir}/flag") - .set { flag_summary } - kaptive.out.collect - .collectFile(name: "kaptive_summary.txt", + .collectFile(name: 'kaptive_summary.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/kaptive") .set{ kaptive_summary } kleborate.out.collect - .collectFile(name: "kleborate_results.tsv", + .collectFile(name: 'kleborate_results.tsv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/kleborate") .set{ kleborate_summary } mykrobe.out.collect - .collectFile(name: "mykrobe_summary.csv", + .collectFile(name: 'mykrobe_summary.csv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/mykrobe") .set{ mykrobe_summary } pbptyper.out.collect - .collectFile(name: "pbptyper_summary.tsv", + .collectFile(name: 'pbptyper_summary.tsv', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/pbptyper") .set{ pbptyper_summary } seqsero2.out.collect - .collectFile(name: "seqsero2_results.txt", + .collectFile(name: 'seqsero2_results.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/seqsero2") .set{ seqsero2_summary } serotypefinder.out.collect - .collectFile(name: "serotypefinder_results.txt", + .collectFile(name: 'serotypefinder_results.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/serotypefinder") .set{ serotypefinder_summary } shigatyper.out.collect - .collectFile(name: "shigatyper_hits.txt", + .collectFile(name: 'shigatyper_hits.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/shigatyper") .set{ shigatyper_hits } shigatyper.out.files - .collectFile(name: "shigatyper_summary.txt", + .collectFile(name: 'shigatyper_summary.txt', keepHeader: true, sort: { file -> file.text }, storeDir: "${params.outdir}/shigatyper")