From 78208888c5bf971f20c3d74144619c19f7df9c18 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 23 Jan 2025 14:20:10 +0100 Subject: [PATCH 1/5] remove local align swf --- subworkflows/local/align.nf | 395 ---------------------------- subworkflows/local/compute_trees.nf | 80 ------ 2 files changed, 475 deletions(-) delete mode 100644 subworkflows/local/align.nf delete mode 100644 subworkflows/local/compute_trees.nf diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf deleted file mode 100644 index bfcd7254..00000000 --- a/subworkflows/local/align.nf +++ /dev/null @@ -1,395 +0,0 @@ -/* - * Compute trees if needed and run alignment - */ - -// -// Include the subworkflows -// -include { COMPUTE_TREES } from '../../subworkflows/local/compute_trees.nf' - -// Include the nf-core modules -include { CLUSTALO_ALIGN } from '../../modules/nf-core/clustalo/align/main' -include { FAMSA_ALIGN } from '../../modules/nf-core/famsa/align/main' -include { FOLDMASON_EASYMSA } from '../../modules/nf-core/foldmason/easymsa/main' -include { KALIGN_ALIGN } from '../../modules/nf-core/kalign/align/main' -include { LEARNMSA_ALIGN } from '../../modules/nf-core/learnmsa/align/main' -include { MAFFT_ALIGN } from '../../modules/nf-core/mafft/align/main' -include { MAGUS_ALIGN } from '../../modules/nf-core/magus/align/main' -include { MTMALIGN_ALIGN } from '../../modules/nf-core/mtmalign/align/main' -include { MUSCLE5_SUPER5 } from '../../modules/nf-core/muscle5/super5/main' -include { TCOFFEE_ALIGN } from '../../modules/nf-core/tcoffee/align/main' -include { TCOFFEE_ALIGN as TCOFFEE3D_ALIGN } from '../../modules/nf-core/tcoffee/align/main' -include { TCOFFEE_REGRESSIVE } from '../../modules/nf-core/tcoffee/regressive/main' -include { TCOFFEE_CONSENSUS as CONSENSUS } from '../../modules/nf-core/tcoffee/consensus/main' -include { UPP_ALIGN } from '../../modules/nf-core/upp/align/main' - -workflow ALIGN { - take: - ch_fastas // channel: [ val(meta), [ path(fastas) ] ] - ch_tools // channel: [ val(meta_tree), val(meta_aligner) ] - // [[tree:, args_tree:, args_tree_clean: ], [aligner:, args_aligner:, args_aligner_clean:]] - // e.g.[[tree:FAMSA, args_tree:-gt upgma -parttree, args_tree_clean:-gt_upgma_-parttree], [aligner:FAMSA, args_aligner:null, args_aligner_clean:null]] - // e.g.[[tree:null, args_tree:null, args_tree_clean:null], [aligner:TCOFFEE, args_aligner:-output fasta_aln, args_aligner_clean:-output_fasta_aln]] - ch_optional_data // channel: meta, [e.g. /path/to/file.pdb,/path/to/file.pdb,/path/to/file.pdb] - compress // boolean: true or false - - main: - - ch_msa = Channel.empty() - ch_versions = Channel.empty() - - // Branch the toolsheet information into two channels - // This way, it can direct the computation of guidetrees - // and aligners separately - ch_tools - .multiMap { - it -> - tree: it[0] - align: it[1] - } - .set { ch_tools_split } - - // ------------------------------------------------ - // Compute the required trees - // ------------------------------------------------ - COMPUTE_TREES ( - ch_fastas, - ch_optional_data, - ch_tools_split.tree.unique() - ) - trees = COMPUTE_TREES.out.trees - ch_versions = ch_versions.mix(COMPUTE_TREES.out.versions) - - ch_fastas.combine(ch_tools) - .map { - metafasta, fasta, metatree, metaalign -> - [ metafasta+metatree , metaalign, fasta ] - } - .set { ch_fasta_tools } - - // ------------------------------------------------ - // Add back trees to the fasta channel - // And prepare the input channels for the aligners - // ------------------------------------------------ - - // Tools that accept sequence and tree - ch_fasta_tools - .join(trees, by: [0], remainder:true ) - .filter{ - it[1] != null - } - .map { - metafasta_tree, metaalign, fasta, tree -> - [ metafasta_tree + metaalign, fasta, tree ] - } - .map { - meta, fasta, tree -> - tree ? [ meta,fasta, tree ] : [meta, fasta, [ ] ] - } - .branch { - clustalo: it[0]["aligner"] == "CLUSTALO" - famsa: it[0]["aligner"] == "FAMSA" - kalign: it[0]["aligner"] == "KALIGN" - learnmsa: it[0]["aligner"] == "LEARNMSA" - mafft: it[0]["aligner"] == "MAFFT" - magus: it[0]["aligner"] == "MAGUS" - muscle5: it[0]["aligner"] == "MUSCLE5" - mtmalign: it[0]["aligner"] == "MTMALIGN" - regressive: it[0]["aligner"] == "REGRESSIVE" - tcoffee: it[0]["aligner"] == "TCOFFEE" - tcoffee3d: it[0]["aligner"] == "3DCOFFEE" - upp: it[0]["aligner"] == "UPP" - } - .set { ch_fasta_trees } - - - // tools that accept only optional data - ch_optional_data.combine(ch_tools) - .map { - metadependency, template, dependency, metatree, metaalign -> - [ metadependency+metatree+metaalign, template, dependency ] - } - .branch { - mtmalign: it[0]["aligner"] == "MTMALIGN" - } - .set { ch_optional_data_tools } - - - // tools that accept optional data and tree - ch_optional_data.combine(ch_tools) - .map { - metadependency, template, dependency, metatree, metaalign -> - [ metadependency + metatree , metaalign, template, dependency ] - } - .join(trees, by: 0, remainder: true) - .filter{ - it.size() == 5 - } - .map { - metratreeanddep, metaalign, template, dependency, tree -> - tree ? [ metratreeanddep + metaalign, tree, template, dependency ]:[ metratreeanddep + metaalign, [ ], template, dependency ] - } - .branch { - foldmason: it[0]["aligner"] == "FOLDMASON" - } - .set { ch_optional_data_tools_tree } - - // ------------------------------------------------ - // Compute the alignments - // ------------------------------------------------ - - // 1. SEQUENCE BASED - // ----------------- CLUSTALO ------------------ - ch_fasta_trees.clustalo - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_clustalo } - - CLUSTALO_ALIGN ( - ch_fasta_trees_clustalo.fasta, - ch_fasta_trees_clustalo.tree, - [], - [], - [], - [], - compress - ) - ch_msa = ch_msa.mix(CLUSTALO_ALIGN.out.alignment) - ch_versions = ch_versions.mix(CLUSTALO_ALIGN.out.versions.first()) - - // ----------------- FAMSA --------------------- - ch_fasta_trees.famsa - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_famsa} - - FAMSA_ALIGN (ch_fasta_trees_famsa.fasta, - ch_fasta_trees_famsa.tree, - compress - ) - ch_msa = ch_msa.mix(FAMSA_ALIGN.out.alignment) - ch_versions = ch_versions.mix(FAMSA_ALIGN.out.versions.first()) - - // ---------------- KALIGN ----------------------- - ch_fasta_trees.kalign - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_kalign } - - KALIGN_ALIGN ( - ch_fasta_kalign.fasta, - compress - ) - ch_msa = ch_msa.mix(KALIGN_ALIGN.out.alignment) - ch_versions = ch_versions.mix(KALIGN_ALIGN.out.versions.first()) - - // ---------------- LEARNMSA ---------------------- - ch_fasta_trees.learnmsa - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_learnmsa } - - LEARNMSA_ALIGN ( - ch_fasta_learnmsa.fasta, - compress - ) - ch_msa = ch_msa.mix(LEARNMSA_ALIGN.out.alignment) - ch_versions = ch_versions.mix(LEARNMSA_ALIGN.out.versions.first()) - - // ---------------- MAFFT ----------------------- - ch_fasta_trees.mafft - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_mafft } - - MAFFT_ALIGN ( - ch_fasta_mafft.fasta, - [ [:], [] ], - [ [:], [] ], - [ [:], [] ], - [ [:], [] ], - [ [:], [] ], - compress - ) - ch_msa = ch_msa.mix(MAFFT_ALIGN.out.fas) // the MAFFT module calls its output fas instead of alignment - ch_versions = ch_versions.mix(MAFFT_ALIGN.out.versions.first()) - - // ----------------- MAGUS ------------------ - ch_fasta_trees.magus - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_magus } - - MAGUS_ALIGN ( - ch_fasta_trees_magus.fasta, - ch_fasta_trees_magus.tree, - compress - ) - ch_msa = ch_msa.mix(MAGUS_ALIGN.out.alignment) - ch_versions = ch_versions.mix(MAGUS_ALIGN.out.versions.first()) - - // ----------------- MUSCLE5 ------------------ - ch_fasta_trees.muscle5 - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_muscle5 } - - MUSCLE5_SUPER5 ( - ch_fasta_muscle5.fasta, - compress - ) - ch_msa = ch_msa.mix(MUSCLE5_SUPER5.out.alignment.first()) - ch_versions = ch_versions.mix(MUSCLE5_SUPER5.out.versions.first()) - - // ----------------- TCOFFEE ------------------ - ch_fasta_trees.tcoffee - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_tcoffee } - - TCOFFEE_ALIGN ( - ch_fasta_trees_tcoffee.fasta, - ch_fasta_trees_tcoffee.tree, - [ [:], [], [] ], - compress - ) - ch_msa = ch_msa.mix(TCOFFEE_ALIGN.out.alignment) - ch_versions = ch_versions.mix(TCOFFEE_ALIGN.out.versions.first()) - - // ----------------- REGRESSIVE ------------------ - ch_fasta_trees.regressive - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_regressive } - - TCOFFEE_REGRESSIVE ( - ch_fasta_trees_regressive.fasta, - ch_fasta_trees_regressive.tree, - [ [:], [], [] ], - compress - ) - ch_msa = ch_msa.mix(TCOFFEE_REGRESSIVE.out.alignment) - ch_versions = ch_versions.mix(TCOFFEE_REGRESSIVE.out.versions.first()) - - // ----------------- UPP ------------------- - ch_fasta_trees.upp - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_upp } - - UPP_ALIGN ( - ch_fasta_trees_upp.fasta, - ch_fasta_trees_upp.tree, - compress - ) - ch_msa = ch_msa.mix(UPP_ALIGN.out.alignment) - ch_versions = ch_versions.mix(UPP_ALIGN.out.versions.first()) - - // 2. SEQUENCE + STRUCTURE BASED - - if(params.templates_suffix == ".pdb"){ - // ----------------- 3DCOFFEE ------------------ - ch_fasta_trees.tcoffee3d - .map{ meta, fasta, tree -> [ meta["id"], meta, fasta, tree ] } - .combine(ch_optional_data.map{ meta, template, optional_data -> [ meta["id"], template, optional_data ] }, by: 0) - .multiMap{ - merging_id, meta, fastafile, treefile, templatefile, datafiles -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - optional_data: [ meta, templatefile, datafiles ] - } - .set { ch_fasta_trees_3dcoffee } - - TCOFFEE3D_ALIGN ( - ch_fasta_trees_3dcoffee.fasta, - ch_fasta_trees_3dcoffee.tree, - ch_fasta_trees_3dcoffee.optional_data, - compress - ) - ch_msa = ch_msa.mix(TCOFFEE3D_ALIGN.out.alignment) - ch_versions = ch_versions.mix(TCOFFEE3D_ALIGN.out.versions.first()) - - // 3. STRUCTURE BASED - - // ----------------- MTMALIGN ------------------ - ch_optional_data_tools.mtmalign - .multiMap { - meta, template, dependency -> - pdbs: [ meta, dependency ] - } - .set { ch_pdb_mtmalign } - - MTMALIGN_ALIGN ( - ch_pdb_mtmalign.pdbs, - compress - ) - ch_msa = ch_msa.mix(MTMALIGN_ALIGN.out.alignment) - ch_versions = ch_versions.mix(MTMALIGN_ALIGN.out.versions.first()) - - - // ----------------- FOLDMASON ------------------ - - ch_optional_data_tools_tree.foldmason - .multiMap { - meta, tree, template, dependency -> - pdbs: [ meta, dependency ] - trees: [ meta, tree ] - } - .set { ch_pdb_foldmason } - - FOLDMASON_EASYMSA ( - ch_pdb_foldmason.pdbs, - ch_pdb_foldmason.trees, - compress - ) - ch_msa = ch_msa.mix(FOLDMASON_EASYMSA.out.msa_aa) - ch_versions = ch_versions.mix(FOLDMASON_EASYMSA.out.versions.first()) - } - - - - // ----------------- CONSENSUS ------------------ - if(params.build_consensus){ - ch_msa.map{ meta, msa -> [ meta["id"], msa]} - .groupTuple() - .filter { it[1].size() > 1 } - .map { id_meta, msas -> [ ["id": id_meta, "tree":"DEFAULT", "args_tree":"", "args_tree_clean":"default", "aligner":"CONSENSUS", "args_aligner":"", "args_aligner_clean":"default" ], msas ]} - .set { ch_msa_consensus } - - CONSENSUS(ch_msa_consensus, [[:],[]], compress) - ch_msa = ch_msa.mix(CONSENSUS.out.alignment) - ch_versions = ch_versions.mix(CONSENSUS.out.versions.first()) - } - - - emit: - msa = ch_msa // channel: [ val(meta), path(msa) ] - trees = trees // channel: [ val(meta), path(tree) ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/compute_trees.nf b/subworkflows/local/compute_trees.nf deleted file mode 100644 index e99d6619..00000000 --- a/subworkflows/local/compute_trees.nf +++ /dev/null @@ -1,80 +0,0 @@ -// -// Compute guide trees either with FAMSA or Clusta Omega -// - -include { FAMSA_GUIDETREE } from '../../modules/nf-core/famsa/guidetree/main' -include { CLUSTALO_GUIDETREE } from '../../modules/nf-core/clustalo/guidetree/main' -include { MAFFT_GUIDETREE } from '../../modules/nf-core/mafft/guidetree/main' - -include { CUSTOM_PDBSTOFASTA } from '../../modules/local/custom_pdbtofasta.nf' -include { FASTAVALIDATOR } from '../../modules/nf-core/fastavalidator/main' - -workflow COMPUTE_TREES { - - take: - ch_fastas //channel: [ meta, /path/to/file.fasta ] - ch_optional_data //channel: [ meta, template, [ /path/to/file1, /path/to/file2, ... ] ] - tree_tools //channel: [ meta ] ( tools to be run: meta.tree, meta.args_tree ) - - main: - ch_versions = Channel.empty() - ch_trees = Channel.empty() - - // - // For the inputs that only have optional data but not a fasta - // we need to generate the fasta file - // - - ch_optional_data - .join(ch_fastas, remainder:true) - .filter { - it[-1] == null - } - .map { - it -> [it[0], it[2]] - }.set { ch_optional_data_no_fasta } - - CUSTOM_PDBSTOFASTA(ch_optional_data_no_fasta) - ch_versions = ch_versions.mix(CUSTOM_PDBSTOFASTA.out.versions) - - if(!params.skip_preprocessing){ - FASTAVALIDATOR(CUSTOM_PDBSTOFASTA.out.fasta) - ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions) - } - ch_fastas_all = ch_fastas.mix(CUSTOM_PDBSTOFASTA.out.fasta) - - - - // - // Render the required guide trees - // - ch_fastas_all - .combine(tree_tools) - .map { - metafasta, fasta, metatree -> - [ metafasta + metatree, fasta ] - } - .branch { - famsa: it[0]["tree"] == "FAMSA" - clustalo: it[0]["tree"] == "CLUSTALO" - mafft: it[0]["tree"] == "MAFFT" - } - .set { ch_fastas_fortrees } - - - FAMSA_GUIDETREE (ch_fastas_fortrees.famsa) - ch_trees = FAMSA_GUIDETREE.out.tree - ch_versions = ch_versions.mix(FAMSA_GUIDETREE.out.versions.first()) - - CLUSTALO_GUIDETREE (ch_fastas_fortrees.clustalo) - ch_trees = ch_trees.mix(CLUSTALO_GUIDETREE.out.tree) - ch_versions = ch_versions.mix(CLUSTALO_GUIDETREE.out.versions.first()) - - MAFFT_GUIDETREE (ch_fastas_fortrees.mafft) - ch_trees = ch_trees.mix(MAFFT_GUIDETREE.out.tree) - ch_versions = ch_versions.mix(MAFFT_GUIDETREE.out.versions.first()) - - emit: - trees = ch_trees // channel: [ val(meta), path(tree) ] - versions = ch_versions // channel: [ versions.yml ] -} From fa6667b65c9e6448d3ad46d88c2c62e9a754f523 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 23 Jan 2025 16:52:13 +0100 Subject: [PATCH 2/5] install class swf --- .../mirpedrol/clustalo/align/environment.yml | 8 + modules/mirpedrol/clustalo/align/main.nf | 54 +++ modules/mirpedrol/clustalo/align/meta.yml | 60 ++++ .../clustalo/align/tests/main.nf.test | 34 ++ .../clustalo/align/tests/main.nf.test.snap | 31 ++ .../mirpedrol/clustalo/align/tests/tags.yml | 2 + .../clustalo/guidetree/environment.yml | 7 + modules/mirpedrol/clustalo/guidetree/main.nf | 47 +++ modules/mirpedrol/clustalo/guidetree/meta.yml | 54 +++ .../clustalo/guidetree/tests/main.nf.test | 32 ++ .../guidetree/tests/main.nf.test.snap | 23 ++ .../clustalo/guidetree/tests/tags.yml | 2 + .../clustalo/treealign/environment.yml | 8 + modules/mirpedrol/clustalo/treealign/main.nf | 56 ++++ modules/mirpedrol/clustalo/treealign/meta.yml | 69 ++++ .../clustalo/treealign/tests/main.nf.test | 48 +++ .../treealign/tests/main.nf.test.snap | 31 ++ .../clustalo/treealign/tests/tags.yml | 2 + modules/mirpedrol/famsa/align/environment.yml | 7 + modules/mirpedrol/famsa/align/main.nf | 48 +++ modules/mirpedrol/famsa/align/meta.yml | 55 +++ .../mirpedrol/famsa/align/tests/main.nf.test | 34 ++ .../famsa/align/tests/main.nf.test.snap | 31 ++ modules/mirpedrol/famsa/align/tests/tags.yml | 2 + .../mirpedrol/famsa/guidetree/environment.yml | 7 + modules/mirpedrol/famsa/guidetree/main.nf | 49 +++ modules/mirpedrol/famsa/guidetree/meta.yml | 53 +++ .../famsa/guidetree/tests/main.nf.test | 32 ++ .../famsa/guidetree/tests/main.nf.test.snap | 23 ++ .../mirpedrol/famsa/guidetree/tests/tags.yml | 2 + .../mirpedrol/famsa/treealign/environment.yml | 7 + modules/mirpedrol/famsa/treealign/main.nf | 50 +++ modules/mirpedrol/famsa/treealign/meta.yml | 64 ++++ .../famsa/treealign/tests/main.nf.test | 48 +++ .../famsa/treealign/tests/main.nf.test.snap | 31 ++ .../mirpedrol/famsa/treealign/tests/tags.yml | 2 + .../mirpedrol/kalign/align/environment.yml | 8 + modules/mirpedrol/kalign/align/main.nf | 48 +++ modules/mirpedrol/kalign/align/meta.yml | 54 +++ .../mirpedrol/kalign/align/tests/main.nf.test | 34 ++ .../kalign/align/tests/main.nf.test.snap | 60 ++++ modules/mirpedrol/kalign/align/tests/tags.yml | 2 + .../mirpedrol/learnmsa/align/environment.yml | 8 + modules/mirpedrol/learnmsa/align/main.nf | 48 +++ modules/mirpedrol/learnmsa/align/meta.yml | 54 +++ .../learnmsa/align/tests/main.nf.test | 36 ++ .../learnmsa/align/tests/main.nf.test.snap | 26 ++ .../mirpedrol/learnmsa/align/tests/tags.yml | 2 + modules/mirpedrol/mafft/environment.yml | 8 + modules/mirpedrol/mafft/main.nf | 50 +++ modules/mirpedrol/mafft/meta.yml | 60 ++++ modules/mirpedrol/mafft/tests/main.nf.test | 32 ++ .../mirpedrol/mafft/tests/main.nf.test.snap | 317 ++++++++++++++++++ modules/mirpedrol/mafft/tests/tags.yml | 2 + modules/mirpedrol/magus/align/environment.yml | 8 + modules/mirpedrol/magus/align/main.nf | 53 +++ modules/mirpedrol/magus/align/meta.yml | 54 +++ .../mirpedrol/magus/align/tests/main.nf.test | 40 +++ .../magus/align/tests/main.nf.test.snap | 14 + modules/mirpedrol/magus/align/tests/tags.yml | 2 + .../mirpedrol/magus/guidetree/environment.yml | 7 + modules/mirpedrol/magus/guidetree/main.nf | 48 +++ modules/mirpedrol/magus/guidetree/meta.yml | 52 +++ .../magus/guidetree/tests/main.nf.test | 39 +++ .../magus/guidetree/tests/main.nf.test.snap | 23 ++ .../mirpedrol/magus/guidetree/tests/tags.yml | 2 + .../mirpedrol/magus/treealign/environment.yml | 8 + modules/mirpedrol/magus/treealign/main.nf | 55 +++ modules/mirpedrol/magus/treealign/meta.yml | 66 ++++ .../magus/treealign/tests/main.nf.test | 54 +++ .../magus/treealign/tests/main.nf.test.snap | 14 + .../mirpedrol/magus/treealign/tests/tags.yml | 2 + .../mirpedrol/muscle5/super5/environment.yml | 8 + modules/mirpedrol/muscle5/super5/main.nf | 62 ++++ modules/mirpedrol/muscle5/super5/meta.yml | 61 ++++ .../muscle5/super5/tests/main.nf.test | 54 +++ .../muscle5/super5/tests/main.nf.test.snap | 65 ++++ .../muscle5/super5/tests/nextflow.config | 0 .../muscle5/super5/tests/perm_all.config | 3 + .../mirpedrol/muscle5/super5/tests/tags.yml | 2 + .../mirpedrol/tcoffee/align/environment.yml | 8 + modules/mirpedrol/tcoffee/align/main.nf | 58 ++++ modules/mirpedrol/tcoffee/align/meta.yml | 62 ++++ .../tcoffee/align/tests/main.nf.test | 33 ++ .../tcoffee/align/tests/main.nf.test.snap | 31 ++ .../mirpedrol/tcoffee/align/tests/tags.yml | 2 + .../tcoffee/treealign/environment.yml | 8 + modules/mirpedrol/tcoffee/treealign/main.nf | 60 ++++ modules/mirpedrol/tcoffee/treealign/meta.yml | 71 ++++ .../tcoffee/treealign/tests/main.nf.test | 51 +++ .../tcoffee/treealign/tests/main.nf.test.snap | 31 ++ .../tcoffee/treealign/tests/tags.yml | 2 + subworkflows/mirpedrol/msa_alignment/main.nf | 81 +++++ subworkflows/mirpedrol/msa_alignment/meta.yml | 66 ++++ .../msa_alignment/tests/main.nf.test | 204 +++++++++++ .../msa_alignment/tests/main.nf.test.snap | 201 +++++++++++ subworkflows/mirpedrol/msa_guidetree/main.nf | 46 +++ subworkflows/mirpedrol/msa_guidetree/meta.yml | 54 +++ .../msa_guidetree/tests/main.nf.test | 89 +++++ .../msa_guidetree/tests/main.nf.test.snap | 80 +++++ subworkflows/mirpedrol/msa_treealign/main.nf | 67 ++++ subworkflows/mirpedrol/msa_treealign/meta.yml | 73 ++++ .../msa_treealign/tests/main.nf.test | 173 ++++++++++ .../msa_treealign/tests/main.nf.test.snap | 200 +++++++++++ 104 files changed, 4509 insertions(+) create mode 100644 modules/mirpedrol/clustalo/align/environment.yml create mode 100644 modules/mirpedrol/clustalo/align/main.nf create mode 100644 modules/mirpedrol/clustalo/align/meta.yml create mode 100644 modules/mirpedrol/clustalo/align/tests/main.nf.test create mode 100644 modules/mirpedrol/clustalo/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/clustalo/align/tests/tags.yml create mode 100644 modules/mirpedrol/clustalo/guidetree/environment.yml create mode 100644 modules/mirpedrol/clustalo/guidetree/main.nf create mode 100644 modules/mirpedrol/clustalo/guidetree/meta.yml create mode 100644 modules/mirpedrol/clustalo/guidetree/tests/main.nf.test create mode 100644 modules/mirpedrol/clustalo/guidetree/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/clustalo/guidetree/tests/tags.yml create mode 100644 modules/mirpedrol/clustalo/treealign/environment.yml create mode 100644 modules/mirpedrol/clustalo/treealign/main.nf create mode 100644 modules/mirpedrol/clustalo/treealign/meta.yml create mode 100644 modules/mirpedrol/clustalo/treealign/tests/main.nf.test create mode 100644 modules/mirpedrol/clustalo/treealign/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/clustalo/treealign/tests/tags.yml create mode 100644 modules/mirpedrol/famsa/align/environment.yml create mode 100644 modules/mirpedrol/famsa/align/main.nf create mode 100644 modules/mirpedrol/famsa/align/meta.yml create mode 100644 modules/mirpedrol/famsa/align/tests/main.nf.test create mode 100644 modules/mirpedrol/famsa/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/famsa/align/tests/tags.yml create mode 100644 modules/mirpedrol/famsa/guidetree/environment.yml create mode 100644 modules/mirpedrol/famsa/guidetree/main.nf create mode 100644 modules/mirpedrol/famsa/guidetree/meta.yml create mode 100644 modules/mirpedrol/famsa/guidetree/tests/main.nf.test create mode 100644 modules/mirpedrol/famsa/guidetree/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/famsa/guidetree/tests/tags.yml create mode 100644 modules/mirpedrol/famsa/treealign/environment.yml create mode 100644 modules/mirpedrol/famsa/treealign/main.nf create mode 100644 modules/mirpedrol/famsa/treealign/meta.yml create mode 100644 modules/mirpedrol/famsa/treealign/tests/main.nf.test create mode 100644 modules/mirpedrol/famsa/treealign/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/famsa/treealign/tests/tags.yml create mode 100644 modules/mirpedrol/kalign/align/environment.yml create mode 100644 modules/mirpedrol/kalign/align/main.nf create mode 100644 modules/mirpedrol/kalign/align/meta.yml create mode 100644 modules/mirpedrol/kalign/align/tests/main.nf.test create mode 100644 modules/mirpedrol/kalign/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/kalign/align/tests/tags.yml create mode 100644 modules/mirpedrol/learnmsa/align/environment.yml create mode 100644 modules/mirpedrol/learnmsa/align/main.nf create mode 100644 modules/mirpedrol/learnmsa/align/meta.yml create mode 100644 modules/mirpedrol/learnmsa/align/tests/main.nf.test create mode 100644 modules/mirpedrol/learnmsa/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/learnmsa/align/tests/tags.yml create mode 100644 modules/mirpedrol/mafft/environment.yml create mode 100644 modules/mirpedrol/mafft/main.nf create mode 100644 modules/mirpedrol/mafft/meta.yml create mode 100644 modules/mirpedrol/mafft/tests/main.nf.test create mode 100644 modules/mirpedrol/mafft/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/mafft/tests/tags.yml create mode 100644 modules/mirpedrol/magus/align/environment.yml create mode 100644 modules/mirpedrol/magus/align/main.nf create mode 100644 modules/mirpedrol/magus/align/meta.yml create mode 100644 modules/mirpedrol/magus/align/tests/main.nf.test create mode 100644 modules/mirpedrol/magus/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/magus/align/tests/tags.yml create mode 100644 modules/mirpedrol/magus/guidetree/environment.yml create mode 100644 modules/mirpedrol/magus/guidetree/main.nf create mode 100644 modules/mirpedrol/magus/guidetree/meta.yml create mode 100644 modules/mirpedrol/magus/guidetree/tests/main.nf.test create mode 100644 modules/mirpedrol/magus/guidetree/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/magus/guidetree/tests/tags.yml create mode 100644 modules/mirpedrol/magus/treealign/environment.yml create mode 100644 modules/mirpedrol/magus/treealign/main.nf create mode 100644 modules/mirpedrol/magus/treealign/meta.yml create mode 100644 modules/mirpedrol/magus/treealign/tests/main.nf.test create mode 100644 modules/mirpedrol/magus/treealign/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/magus/treealign/tests/tags.yml create mode 100644 modules/mirpedrol/muscle5/super5/environment.yml create mode 100644 modules/mirpedrol/muscle5/super5/main.nf create mode 100644 modules/mirpedrol/muscle5/super5/meta.yml create mode 100644 modules/mirpedrol/muscle5/super5/tests/main.nf.test create mode 100644 modules/mirpedrol/muscle5/super5/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/muscle5/super5/tests/nextflow.config create mode 100644 modules/mirpedrol/muscle5/super5/tests/perm_all.config create mode 100644 modules/mirpedrol/muscle5/super5/tests/tags.yml create mode 100644 modules/mirpedrol/tcoffee/align/environment.yml create mode 100644 modules/mirpedrol/tcoffee/align/main.nf create mode 100644 modules/mirpedrol/tcoffee/align/meta.yml create mode 100644 modules/mirpedrol/tcoffee/align/tests/main.nf.test create mode 100644 modules/mirpedrol/tcoffee/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/tcoffee/align/tests/tags.yml create mode 100644 modules/mirpedrol/tcoffee/treealign/environment.yml create mode 100644 modules/mirpedrol/tcoffee/treealign/main.nf create mode 100644 modules/mirpedrol/tcoffee/treealign/meta.yml create mode 100644 modules/mirpedrol/tcoffee/treealign/tests/main.nf.test create mode 100644 modules/mirpedrol/tcoffee/treealign/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/tcoffee/treealign/tests/tags.yml create mode 100644 subworkflows/mirpedrol/msa_alignment/main.nf create mode 100644 subworkflows/mirpedrol/msa_alignment/meta.yml create mode 100644 subworkflows/mirpedrol/msa_alignment/tests/main.nf.test create mode 100644 subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap create mode 100644 subworkflows/mirpedrol/msa_guidetree/main.nf create mode 100644 subworkflows/mirpedrol/msa_guidetree/meta.yml create mode 100644 subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test create mode 100644 subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap create mode 100644 subworkflows/mirpedrol/msa_treealign/main.nf create mode 100644 subworkflows/mirpedrol/msa_treealign/meta.yml create mode 100644 subworkflows/mirpedrol/msa_treealign/tests/main.nf.test create mode 100644 subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap diff --git a/modules/mirpedrol/clustalo/align/environment.yml b/modules/mirpedrol/clustalo/align/environment.yml new file mode 100644 index 00000000..be1eef95 --- /dev/null +++ b/modules/mirpedrol/clustalo/align/environment.yml @@ -0,0 +1,8 @@ +name: clustalo_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::clustalo=1.2.4 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/clustalo/align/main.nf b/modules/mirpedrol/clustalo/align/main.nf new file mode 100644 index 00000000..55a1113b --- /dev/null +++ b/modules/mirpedrol/clustalo/align/main.nf @@ -0,0 +1,54 @@ +process CLUSTALO_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0': + 'biocontainers/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // the --force -o is necessary, as clustalo expands the commandline input, + // causing it to treat the pipe as a parameter and fail + // this way, the command expands to /dev/fd/, and --force allows writing output to an already existing file + """ + clustalo \ + -i ${fasta} \ + --threads=${task.cpus} \ + $args \ + --force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/clustalo/align/meta.yml b/modules/mirpedrol/clustalo/align/meta.yml new file mode 100644 index 00000000..1eaeec50 --- /dev/null +++ b/modules/mirpedrol/clustalo/align/meta.yml @@ -0,0 +1,60 @@ +name: "clustalo_align" +description: Align sequences using Clustal Omega +keywords: + - alignment + - MSA + - msa + - align + - genomics +class: "msa_alignment" +tools: + - "clustalo": + description: "Latest version of Clustal: a multiple sequence alignment program + for DNA or proteins" + homepage: "http://www.clustal.org/omega/" + documentation: "http://www.clustal.org/omega/" + tool_dev_url: "http://www.clustal.org/omega/" + doi: "10.1038/msb.2011.75" + licence: ["GPL v2"] + identifier: "biotools:clustalo" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in gzipped fasta format + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@joseespinosa" +maintainers: + - "@luisas" + - "@joseespinosa" + - "@lrauschning" diff --git a/modules/mirpedrol/clustalo/align/tests/main.nf.test b/modules/mirpedrol/clustalo/align/tests/main.nf.test new file mode 100644 index 00000000..bc571df8 --- /dev/null +++ b/modules/mirpedrol/clustalo/align/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process CLUSTALO_ALIGN" + script "../main.nf" + process "CLUSTALO_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "clustalo" + tag "clustalo/align" + + + test("sarscov2 - contigs-fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment ")}, + { assert snapshot(process.out.versions).match("versions1") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/align/tests/main.nf.test.snap b/modules/mirpedrol/clustalo/align/tests/main.nf.test.snap new file mode 100644 index 00000000..b92811bf --- /dev/null +++ b/modules/mirpedrol/clustalo/align/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions1": { + "content": [ + [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T13:15:10.541265" + }, + "alignment ": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T13:15:10.531416" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/align/tests/tags.yml b/modules/mirpedrol/clustalo/align/tests/tags.yml new file mode 100644 index 00000000..3dd9c9c0 --- /dev/null +++ b/modules/mirpedrol/clustalo/align/tests/tags.yml @@ -0,0 +1,2 @@ +clustalo/align: + - "modules/mirpedrol/clustalo/align/**" diff --git a/modules/mirpedrol/clustalo/guidetree/environment.yml b/modules/mirpedrol/clustalo/guidetree/environment.yml new file mode 100644 index 00000000..38b2f5b9 --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/environment.yml @@ -0,0 +1,7 @@ +name: clustalo_guidetree +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::clustalo=1.2.4 diff --git a/modules/mirpedrol/clustalo/guidetree/main.nf b/modules/mirpedrol/clustalo/guidetree/main.nf new file mode 100644 index 00000000..b94f2aa6 --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/main.nf @@ -0,0 +1,47 @@ +process CLUSTALO_GUIDETREE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/clustalo:1.2.4--h87f3376_5': + 'biocontainers/clustalo:1.2.4--h87f3376_5' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.dnd"), emit: tree + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + clustalo \\ + -i ${fasta} \\ + --guidetree-out ${prefix}.dnd \\ + --threads=${task.cpus} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.dnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/clustalo/guidetree/meta.yml b/modules/mirpedrol/clustalo/guidetree/meta.yml new file mode 100644 index 00000000..fbd35729 --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/meta.yml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/meta-schema.json +name: "clustalo_guidetree" +description: Renders a guidetree in clustalo +keywords: + - guide tree + - msa + - newick + - align + - guidetree +class: "msa_guidetree" +tools: + - "clustalo": + description: "Latest version of Clustal: a multiple sequence alignment program + for DNA or proteins" + homepage: "http://www.clustal.org/omega/" + documentation: "http://www.clustal.org/omega/" + tool_dev_url: "http://www.clustal.org/omega/" + doi: "10.1038/msb.2011.75" + licence: ["GPL v2"] + identifier: "biotools:clustalo" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - tree: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.dnd": + type: file + description: Guide tree file in Newick format + pattern: "*.{dnd}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test b/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test new file mode 100644 index 00000000..82a1977d --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process CLUSTALO_GUIDETREE" + script "../main.nf" + process "CLUSTALO_GUIDETREE" + + tag "modules" + tag "modules_mirpedrol" + tag "clustalo" + tag "clustalo/guidetree" + + test("sarscov2 - contigs-fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tree).match("tree")}, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test.snap b/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..6e3fdfc1 --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,64796b9beb7201a42b2c78cbdad51049" + ] + ], + "timestamp": "2023-11-27T22:49:13.44908228" + }, + "tree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.dnd:md5,5428bad500a0a0bd985744bec1a12a70" + ] + ] + ], + "timestamp": "2023-11-27T22:49:13.43743393" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/guidetree/tests/tags.yml b/modules/mirpedrol/clustalo/guidetree/tests/tags.yml new file mode 100644 index 00000000..c666305f --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/tests/tags.yml @@ -0,0 +1,2 @@ +clustalo/guidetree: + - "modules/mirpedrol/clustalo/guidetree/**" diff --git a/modules/mirpedrol/clustalo/treealign/environment.yml b/modules/mirpedrol/clustalo/treealign/environment.yml new file mode 100644 index 00000000..9226c3ac --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/environment.yml @@ -0,0 +1,8 @@ +name: clustalo_treealign +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::clustalo=1.2.4 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/clustalo/treealign/main.nf b/modules/mirpedrol/clustalo/treealign/main.nf new file mode 100644 index 00000000..cfa9c117 --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/main.nf @@ -0,0 +1,56 @@ +process CLUSTALO_TREEALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0': + 'biocontainers/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // the --force -o is necessary, as clustalo expands the commandline input, + // causing it to treat the pipe as a parameter and fail + // this way, the command expands to /dev/fd/, and --force allows writing output to an already existing file + """ + clustalo \ + -i ${fasta} \ + --guidetree-in=${tree} \ + --threads=${task.cpus} \ + $args \ + --force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/clustalo/treealign/meta.yml b/modules/mirpedrol/clustalo/treealign/meta.yml new file mode 100644 index 00000000..d5b89f73 --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/meta.yml @@ -0,0 +1,69 @@ +name: "clustalo_treealign" +description: Align sequences using Clustal Omega +keywords: + - alignment + - treealignment + - MSA + - msa + - genomics +class: "msa_treealing" +tools: + - "clustalo": + description: "Latest version of Clustal: a multiple sequence alignment program + for DNA or proteins" + homepage: "http://www.clustal.org/omega/" + documentation: "http://www.clustal.org/omega/" + tool_dev_url: "http://www.clustal.org/omega/" + doi: "10.1038/msb.2011.75" + licence: ["GPL v2"] + identifier: "biotools:clustalo" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 + - - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in gzipped fasta format + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@joseespinosa" +maintainers: + - "@luisas" + - "@joseespinosa" + - "@lrauschning" diff --git a/modules/mirpedrol/clustalo/treealign/tests/main.nf.test b/modules/mirpedrol/clustalo/treealign/tests/main.nf.test new file mode 100644 index 00000000..ca39ddfa --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/tests/main.nf.test @@ -0,0 +1,48 @@ +nextflow_process { + + name "Test Process CLUSTALO_TREEALIGN" + script "../main.nf" + process "CLUSTALO_TREEALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "clustalo" + tag "clustalo/treealign" + tag "clustalo/guidetree" + + test("sarscov2 - contigs-fasta - guide_tree") { + + setup { + + run("CLUSTALO_GUIDETREE") { + script "../../guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + input[1] = CLUSTALO_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("with_guide_tree_alignment")}, + { assert snapshot(process.out.versions).match("with_guide_tree_versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/treealign/tests/main.nf.test.snap b/modules/mirpedrol/clustalo/treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..16e80fed --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "with_guide_tree_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-02-09T19:40:45.057777867" + }, + "with_guide_tree_versions": { + "content": [ + [ + "versions.yml:md5,b825152229a974c6cfc6d826db883cb4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T13:29:28.254709" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/treealign/tests/tags.yml b/modules/mirpedrol/clustalo/treealign/tests/tags.yml new file mode 100644 index 00000000..3dd9c9c0 --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/tests/tags.yml @@ -0,0 +1,2 @@ +clustalo/align: + - "modules/mirpedrol/clustalo/align/**" diff --git a/modules/mirpedrol/famsa/align/environment.yml b/modules/mirpedrol/famsa/align/environment.yml new file mode 100644 index 00000000..c41cda2a --- /dev/null +++ b/modules/mirpedrol/famsa/align/environment.yml @@ -0,0 +1,7 @@ +name: famsa_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::famsa=2.2.2 diff --git a/modules/mirpedrol/famsa/align/main.nf b/modules/mirpedrol/famsa/align/main.nf new file mode 100644 index 00000000..ba7c0eb1 --- /dev/null +++ b/modules/mirpedrol/famsa/align/main.nf @@ -0,0 +1,48 @@ + + +process FAMSA_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/famsa:2.2.2--h9f5acd7_0': + 'biocontainers/famsa:2.2.2--h9f5acd7_0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + famsa -gz \\ + $args \\ + -t ${task.cpus} \\ + ${fasta} \\ + ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/famsa/align/meta.yml b/modules/mirpedrol/famsa/align/meta.yml new file mode 100644 index 00000000..7419fc4e --- /dev/null +++ b/modules/mirpedrol/famsa/align/meta.yml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/meta-schema.json +name: "famsa_align" +description: Aligns sequences using FAMSA +keywords: + - alignment + - MSA + - genomics + - msa + - align +class: "msa_alignment" +tools: + - "famsa": + description: "Algorithm for large-scale multiple sequence alignments" + homepage: "https://github.com/refresh-bio/FAMSA" + documentation: "https://github.com/refresh-bio/FAMSA" + tool_dev_url: "https://github.com/refresh-bio/FAMSA" + doi: "10.1038/srep33964" + licence: ["GPL v3"] + identifier: biotools:famsa +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in FASTA format. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/famsa/align/tests/main.nf.test b/modules/mirpedrol/famsa/align/tests/main.nf.test new file mode 100644 index 00000000..072816be --- /dev/null +++ b/modules/mirpedrol/famsa/align/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process FAMSA_ALIGN" + script "../main.nf" + process "FAMSA_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "famsa" + tag "famsa/align" + + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment")}, + { assert snapshot(process.out.versions).match("versions1") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/align/tests/main.nf.test.snap b/modules/mirpedrol/famsa/align/tests/main.nf.test.snap new file mode 100644 index 00000000..3f144f6e --- /dev/null +++ b/modules/mirpedrol/famsa/align/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions1": { + "content": [ + [ + "versions.yml:md5,7d9e0a8c263fa6d9017075fe88c9e9dc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:06:05.094484" + }, + "alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:06:05.047249" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/align/tests/tags.yml b/modules/mirpedrol/famsa/align/tests/tags.yml new file mode 100644 index 00000000..6944f882 --- /dev/null +++ b/modules/mirpedrol/famsa/align/tests/tags.yml @@ -0,0 +1,2 @@ +famsa/align: + - "modules/mirpedrol/famsa/align/**" diff --git a/modules/mirpedrol/famsa/guidetree/environment.yml b/modules/mirpedrol/famsa/guidetree/environment.yml new file mode 100644 index 00000000..28be1c7f --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/environment.yml @@ -0,0 +1,7 @@ +name: famsa_guidetree +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::famsa=2.2.2 diff --git a/modules/mirpedrol/famsa/guidetree/main.nf b/modules/mirpedrol/famsa/guidetree/main.nf new file mode 100644 index 00000000..7d8f46cd --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/main.nf @@ -0,0 +1,49 @@ + +process FAMSA_GUIDETREE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/famsa:2.2.2--h9f5acd7_0': + 'biocontainers/famsa:2.2.2--h9f5acd7_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.dnd"), emit: tree + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + famsa -gt_export \\ + $args \\ + -t ${task.cpus} \\ + ${fasta} \\ + ${prefix}.dnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.dnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ +} + diff --git a/modules/mirpedrol/famsa/guidetree/meta.yml b/modules/mirpedrol/famsa/guidetree/meta.yml new file mode 100644 index 00000000..0e959bf4 --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/meta.yml @@ -0,0 +1,53 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/meta-schema.json +name: "famsa_guidetree" +description: Renders a guidetree in famsa +keywords: + - guide tree + - msa + - newick + - align + - guidetree +class: "msa_guidetree" +tools: + - "famsa": + description: "Algorithm for large-scale multiple sequence alignments" + homepage: "https://github.com/refresh-bio/FAMSA" + documentation: "https://github.com/refresh-bio/FAMSA" + tool_dev_url: "https://github.com/refresh-bio/FAMSA" + doi: "10.1038/srep33964" + licence: ["GPL v3"] + identifier: biotools:famsa +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - tree: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.dnd": + type: file + description: Guide tree file in Newick format + pattern: "*.{dnd}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/famsa/guidetree/tests/main.nf.test b/modules/mirpedrol/famsa/guidetree/tests/main.nf.test new file mode 100644 index 00000000..6b822d58 --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process FAMSA_GUIDETREE" + script "../main.nf" + process "FAMSA_GUIDETREE" + + tag "modules" + tag "modules_mirpedrol" + tag "famsa" + tag "famsa/guidetree" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tree).match("tree")}, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/guidetree/tests/main.nf.test.snap b/modules/mirpedrol/famsa/guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..00a049d1 --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/tests/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,320ce01bcb255b03ef5125755bf95195" + ] + ], + "timestamp": "2023-11-29T12:12:38.870544616" + }, + "tree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.dnd:md5,f3ef8b16a7a16cb4548942ebf2e7bad6" + ] + ] + ], + "timestamp": "2023-11-29T12:12:38.855536268" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/guidetree/tests/tags.yml b/modules/mirpedrol/famsa/guidetree/tests/tags.yml new file mode 100644 index 00000000..1bb93661 --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/tests/tags.yml @@ -0,0 +1,2 @@ +famsa/guidetree: + - "modules/mirpedrol/famsa/guidetree/**" diff --git a/modules/mirpedrol/famsa/treealign/environment.yml b/modules/mirpedrol/famsa/treealign/environment.yml new file mode 100644 index 00000000..3f200c75 --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/environment.yml @@ -0,0 +1,7 @@ +name: famsa_treealign +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::famsa=2.2.2 diff --git a/modules/mirpedrol/famsa/treealign/main.nf b/modules/mirpedrol/famsa/treealign/main.nf new file mode 100644 index 00000000..6d1fd80a --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/main.nf @@ -0,0 +1,50 @@ + + +process FAMSA_TREEALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/famsa:2.2.2--h9f5acd7_0': + 'biocontainers/famsa:2.2.2--h9f5acd7_0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + famsa -gt import $tree \\ + -gz \\ + $args \\ + -t ${task.cpus} \\ + ${fasta} \\ + ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/famsa/treealign/meta.yml b/modules/mirpedrol/famsa/treealign/meta.yml new file mode 100644 index 00000000..17b63fed --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/meta.yml @@ -0,0 +1,64 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/meta-schema.json +name: "famsa_treealign" +description: Aligns sequences using FAMSA +keywords: + - alignment + - treealignment + - MSA + - msa + - genomics +class: "msa_treealing" +tools: + - "famsa": + description: "Algorithm for large-scale multiple sequence alignments" + homepage: "https://github.com/refresh-bio/FAMSA" + documentation: "https://github.com/refresh-bio/FAMSA" + tool_dev_url: "https://github.com/refresh-bio/FAMSA" + doi: "10.1038/srep33964" + licence: ["GPL v3"] + identifier: biotools:famsa +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 + - - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in FASTA format. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/famsa/treealign/tests/main.nf.test b/modules/mirpedrol/famsa/treealign/tests/main.nf.test new file mode 100644 index 00000000..1d406476 --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/tests/main.nf.test @@ -0,0 +1,48 @@ +nextflow_process { + + name "Test Process FAMSA_TREEALIGN" + script "../main.nf" + process "FAMSA_TREEALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "famsa" + tag "famsa/treealign" + tag "famsa/guidetree" + + + test("sarscov2 - fasta - guide_tree") { + + setup { + run("FAMSA_GUIDETREE") { + script "../../guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("with_guide_tree_alignment")}, + { assert snapshot(process.out.versions).match("with_guide_tree_versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/treealign/tests/main.nf.test.snap b/modules/mirpedrol/famsa/treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..cc750f4b --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "with_guide_tree_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-02-09T19:10:05.167368314" + }, + "with_guide_tree_versions": { + "content": [ + [ + "versions.yml:md5,7b4c829b2d9a9fc6e805c06d432998cf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T14:20:45.346455" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/treealign/tests/tags.yml b/modules/mirpedrol/famsa/treealign/tests/tags.yml new file mode 100644 index 00000000..6944f882 --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/tests/tags.yml @@ -0,0 +1,2 @@ +famsa/align: + - "modules/mirpedrol/famsa/align/**" diff --git a/modules/mirpedrol/kalign/align/environment.yml b/modules/mirpedrol/kalign/align/environment.yml new file mode 100644 index 00000000..93563eae --- /dev/null +++ b/modules/mirpedrol/kalign/align/environment.yml @@ -0,0 +1,8 @@ +name: kalign_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kalign3=3.4.0 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/kalign/align/main.nf b/modules/mirpedrol/kalign/align/main.nf new file mode 100644 index 00000000..014f5216 --- /dev/null +++ b/modules/mirpedrol/kalign/align/main.nf @@ -0,0 +1,48 @@ +process KALIGN_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-5cd0277547c6b33133225c8ce14c0cf2a4396ea2:0a70b6d89a3e06fbdc4a735461e8b98ff32ee5de-0': + 'biocontainers/mulled-v2-5cd0277547c6b33133225c8ce14c0cf2a4396ea2:0a70b6d89a3e06fbdc4a735461e8b98ff32ee5de-0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + unpigz -cdf $fasta | \\ + kalign \\ + $args \\ + -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kalign: \$(echo \$(kalign -v) | sed 's/kalign //g' ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kalign : \$(echo \$(kalign -v) | sed 's/kalign //g' ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/kalign/align/meta.yml b/modules/mirpedrol/kalign/align/meta.yml new file mode 100644 index 00000000..c98576a5 --- /dev/null +++ b/modules/mirpedrol/kalign/align/meta.yml @@ -0,0 +1,54 @@ +name: "kalign_align" +description: "Aligns sequences using kalign" +keywords: + - alignment + - MSA + - genomics + - msa + - align +class: "msa_alignment" +tools: + - "kalign": + description: "Kalign is a fast and accurate multiple sequence alignment algorithm." + homepage: "https://msa.sbc.su.se/cgi-bin/msa.cgi" + documentation: "https://github.com/TimoLassmann/kalign" + tool_dev_url: "https://github.com/TimoLassmann/kalign" + doi: "10.1093/bioinformatics/btz795" + licence: ["GPL v3"] + identifier: "biotools:kalign" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/kalign/align/tests/main.nf.test b/modules/mirpedrol/kalign/align/tests/main.nf.test new file mode 100644 index 00000000..85b25c1f --- /dev/null +++ b/modules/mirpedrol/kalign/align/tests/main.nf.test @@ -0,0 +1,34 @@ +// nf-core modules test kalign/align +nextflow_process { + + name "Test Process KALIGN_ALIGN" + script "../main.nf" + process "KALIGN_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "kalign" + tag "kalign/align" + + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment")}, + { assert snapshot(process.out.versions).match("versions")} + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/kalign/align/tests/main.nf.test.snap b/modules/mirpedrol/kalign/align/tests/main.nf.test.snap new file mode 100644 index 00000000..da6fc94c --- /dev/null +++ b/modules/mirpedrol/kalign/align/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "SARS-CoV-2 scaffolds fasta - uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "1": [ + "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "versions": [ + "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" + ] + } + ], + "timestamp": "2024-03-22T16:42:01.934768" + }, + "SARS-CoV-2 scaffolds fasta - compressed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "1": [ + "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "versions": [ + "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" + ] + } + ], + "timestamp": "2024-03-22T16:42:07.734293" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/kalign/align/tests/tags.yml b/modules/mirpedrol/kalign/align/tests/tags.yml new file mode 100644 index 00000000..d5cecbe4 --- /dev/null +++ b/modules/mirpedrol/kalign/align/tests/tags.yml @@ -0,0 +1,2 @@ +kalign/align: + - "modules/mirpedrol/kalign/align/**" diff --git a/modules/mirpedrol/learnmsa/align/environment.yml b/modules/mirpedrol/learnmsa/align/environment.yml new file mode 100644 index 00000000..124b8d84 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/environment.yml @@ -0,0 +1,8 @@ +name: learnmsa_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::learnmsa=2.0.1 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/learnmsa/align/main.nf b/modules/mirpedrol/learnmsa/align/main.nf new file mode 100644 index 00000000..365768e0 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/main.nf @@ -0,0 +1,48 @@ +process LEARNMSA_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' : + 'biocontainers/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + learnMSA \\ + $args \\ + -i <(unpigz -cdf $fasta) \\ + -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/learnmsa/align/meta.yml b/modules/mirpedrol/learnmsa/align/meta.yml new file mode 100644 index 00000000..03f45d37 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/meta.yml @@ -0,0 +1,54 @@ +name: "learnmsa_align" +description: Align sequences using learnMSA +keywords: + - alignment + - MSA + - genomics + - msa + - align +class: "msa_alignment" +tools: + - "learnmsa": + description: "learnMSA: Learning and Aligning large Protein Families" + homepage: "https://github.com/Gaius-Augustus/learnMSA" + documentation: "https://github.com/Gaius-Augustus/learnMSA" + tool_dev_url: "https://github.com/Gaius-Augustus/learnMSA" + doi: "10.1093/gigascience/giac104" + licence: ["MIT"] + identifier: biotools:learnMSA +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format. May be gz-compressed or uncompressed. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in FASTA format. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/learnmsa/align/tests/main.nf.test b/modules/mirpedrol/learnmsa/align/tests/main.nf.test new file mode 100644 index 00000000..328c1a6d --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/tests/main.nf.test @@ -0,0 +1,36 @@ +// nf-core modules test learnmsa/align +nextflow_process { + + name "Test Process LEARNMSA_ALIGN" + script "../main.nf" + process "LEARNMSA_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "learnmsa" + tag "learnmsa/align" + + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.alignment.get(0).get(1)).getTextGzip().contains(">sample1") }, + { assert snapshot(process.out.versions).match("versions1") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/mirpedrol/learnmsa/align/tests/main.nf.test.snap b/modules/mirpedrol/learnmsa/align/tests/main.nf.test.snap new file mode 100644 index 00000000..981738a2 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,85322b0f038aa768f202fd0d748d6c7c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T16:06:48.867020809" + }, + "versions1": { + "content": [ + [ + "versions.yml:md5,85322b0f038aa768f202fd0d748d6c7c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T16:12:13.921813607" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/learnmsa/align/tests/tags.yml b/modules/mirpedrol/learnmsa/align/tests/tags.yml new file mode 100644 index 00000000..fda0d147 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/tests/tags.yml @@ -0,0 +1,2 @@ +learnmsa/align: + - "modules/mirpedrol/learnmsa/align/**" diff --git a/modules/mirpedrol/mafft/environment.yml b/modules/mirpedrol/mafft/environment.yml new file mode 100644 index 00000000..595252e0 --- /dev/null +++ b/modules/mirpedrol/mafft/environment.yml @@ -0,0 +1,8 @@ +name: mafft +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::mafft=7.520 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/mafft/main.nf b/modules/mirpedrol/mafft/main.nf new file mode 100644 index 00000000..1ed127b6 --- /dev/null +++ b/modules/mirpedrol/mafft/main.nf @@ -0,0 +1,50 @@ +process MAFFT { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0': + 'biocontainers/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mafft \\ + --thread ${task.cpus} \\ + ${args} \\ + ${fasta} \\ + | pigz -cp ${task.cpus} > ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + +} diff --git a/modules/mirpedrol/mafft/meta.yml b/modules/mirpedrol/mafft/meta.yml new file mode 100644 index 00000000..e4277471 --- /dev/null +++ b/modules/mirpedrol/mafft/meta.yml @@ -0,0 +1,60 @@ +name: mafft +description: Multiple sequence alignment using MAFFT +keywords: + - fasta + - msa + - multiple sequence alignment + - alignment + - align +class: "msa_alignment" +tools: + - "mafft": + description: Multiple alignment program for amino acid or nucleotide sequences + based on fast Fourier transform + homepage: https://mafft.cbrc.jp/alignment/software/ + documentation: https://mafft.cbrc.jp/alignment/software/manual/manual.html + tool_dev_url: https://mafft.cbrc.jp/alignment/software/source.html + doi: "10.1093/nar/gkf436" + licence: ["BSD"] + identifier: biotools:MAFFT + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file containing the sequences to align. May be gzipped or + uncompressed. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.aln.gz": + type: file + description: Aligned sequences in FASTA format. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@MillironX" +maintainers: + - "@MillironX" + - "@Joon-Klaps" diff --git a/modules/mirpedrol/mafft/tests/main.nf.test b/modules/mirpedrol/mafft/tests/main.nf.test new file mode 100644 index 00000000..ec13b2c0 --- /dev/null +++ b/modules/mirpedrol/mafft/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process MAFFT" + script "../main.nf" + process "MAFFT" + tag "modules" + tag "modules_mirpedrol" + tag "mafft" + + + test("SARS-CoV-2 scaffolds fasta") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['scaffolds_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment")}, + { assert snapshot(process.out.versions).match("mafft_versions")} + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/mafft/tests/main.nf.test.snap b/modules/mirpedrol/mafft/tests/main.nf.test.snap new file mode 100644 index 00000000..f898bed1 --- /dev/null +++ b/modules/mirpedrol/mafft/tests/main.nf.test.snap @@ -0,0 +1,317 @@ +{ + "SARS-CoV-2 scaffolds fasta - uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:08:41.735774847" + }, + "SARS-CoV-2 scaffolds fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "alignment": [ + [ + { + "id": "test", + "single_end": false + }, + "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T09:20:32.608521064" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta multiple": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:10:38.940555785" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta normal": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:09:35.656248409" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta long": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:10:26.372655394" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta profile": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:10:14.039053212" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:09:49.737364197" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta full": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:10:02.952480822" + }, + "SARS-CoV-2 scaffolds fasta - compressed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:09:21.096197597" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/mafft/tests/tags.yml b/modules/mirpedrol/mafft/tests/tags.yml new file mode 100644 index 00000000..8109d122 --- /dev/null +++ b/modules/mirpedrol/mafft/tests/tags.yml @@ -0,0 +1,2 @@ +mafft: + - modules/mirpedrol/mafft/** diff --git a/modules/mirpedrol/magus/align/environment.yml b/modules/mirpedrol/magus/align/environment.yml new file mode 100644 index 00000000..685f5a87 --- /dev/null +++ b/modules/mirpedrol/magus/align/environment.yml @@ -0,0 +1,8 @@ +name: magus_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::magus-msa=0.2.0 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/magus/align/main.nf b/modules/mirpedrol/magus/align/main.nf new file mode 100644 index 00000000..18622ddd --- /dev/null +++ b/modules/mirpedrol/magus/align/main.nf @@ -0,0 +1,53 @@ +process MAGUS_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0': + 'biocontainers/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // using --overwrite is necessary, as the file descriptor generated by the named file will already exist + """ + magus \\ + -np $task.cpus \\ + -i $fasta \\ + -d ./ \\ + --overwrite -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/magus/align/meta.yml b/modules/mirpedrol/magus/align/meta.yml new file mode 100644 index 00000000..e1b833fb --- /dev/null +++ b/modules/mirpedrol/magus/align/meta.yml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/meta-schema.json +name: "magus_align" +description: Multiple Sequence Alignment using Graph Clustering +keywords: + - MSA + - alignment + - genomics + - graph + - msa + - align +class: "msa_alignment" +tools: + - "magus": + description: "Multiple Sequence Alignment using Graph Clustering" + homepage: "https://github.com/vlasmirnov/MAGUS" + documentation: "https://github.com/vlasmirnov/MAGUS" + tool_dev_url: "https://github.com/vlasmirnov/MAGUS" + doi: "10.1093/bioinformatics/btaa992" + licence: ["MIT"] + identifier: biotools:magus + +input: + - - meta: + type: map + description: | + Groovy Map containing the fasta meta information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Input sequences in FASTA format. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample meta information. + e.g. `[ id:'test', single_end:false ]` + - "*.aln.gz": + type: file + description: File containing the output alignment, in FASTA format containing + gaps. The sequences may be in a different order than in the input FASTA. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lrauschning" diff --git a/modules/mirpedrol/magus/align/tests/main.nf.test b/modules/mirpedrol/magus/align/tests/main.nf.test new file mode 100644 index 00000000..d10fb4a4 --- /dev/null +++ b/modules/mirpedrol/magus/align/tests/main.nf.test @@ -0,0 +1,40 @@ +nextflow_process { + + name "Test Process MAGUS_ALIGN" + script "../main.nf" + process "MAGUS_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "magus" + tag "magus/align" + + + test("setoxin - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1apf") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1atx") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1bds") }, + { assert snapshot(process.out.versions).match("versions1") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/align/tests/main.nf.test.snap b/modules/mirpedrol/magus/align/tests/main.nf.test.snap new file mode 100644 index 00000000..b0757599 --- /dev/null +++ b/modules/mirpedrol/magus/align/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions1": { + "content": [ + [ + "versions.yml:md5,ef9456e058ce51bce10dbc3703da29c7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-03-28T18:17:23.679862847" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/align/tests/tags.yml b/modules/mirpedrol/magus/align/tests/tags.yml new file mode 100644 index 00000000..c7d2f94a --- /dev/null +++ b/modules/mirpedrol/magus/align/tests/tags.yml @@ -0,0 +1,2 @@ +magus/align: + - "modules/mirpedrol/magus/align/**" diff --git a/modules/mirpedrol/magus/guidetree/environment.yml b/modules/mirpedrol/magus/guidetree/environment.yml new file mode 100644 index 00000000..8e750334 --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/environment.yml @@ -0,0 +1,7 @@ +name: magus_guidetree +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::magus-msa=0.2.0 diff --git a/modules/mirpedrol/magus/guidetree/main.nf b/modules/mirpedrol/magus/guidetree/main.nf new file mode 100644 index 00000000..0206053e --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/main.nf @@ -0,0 +1,48 @@ +process MAGUS_GUIDETREE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/magus-msa:0.2.0--pyhdfd78af_0': + 'biocontainers/magus-msa:0.2.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.dnd"), emit: tree + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + magus \\ + -np $task.cpus \\ + -i $fasta \\ + -o ${prefix}.dnd \\ + --onlyguidetree TRUE \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.dnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/magus/guidetree/meta.yml b/modules/mirpedrol/magus/guidetree/meta.yml new file mode 100644 index 00000000..9000187e --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/meta-schema.json +name: "magus_guidetree" +description: Multiple Sequence Alignment using Graph Clustering +keywords: + - MSA + - guide tree + - genomics + - graph + - align + - guidetree + - msa +class: "msa_guidetree" +tools: + - "magus": + description: "Multiple Sequence Alignment using Graph Clustering" + homepage: "https://github.com/vlasmirnov/MAGUS" + documentation: "https://github.com/vlasmirnov/MAGUS" + tool_dev_url: "https://github.com/vlasmirnov/MAGUS" + doi: "10.1093/bioinformatics/btaa992" + licence: ["MIT"] + identifier: biotools:magus + +input: + - - meta: + type: map + description: | + Groovy Map containing fasta meta information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Input sequences in FASTA format. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - tree: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.dnd": + type: file + description: File containing the output guidetree, in newick format. + pattern: "*.{dnd}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lrauschning" diff --git a/modules/mirpedrol/magus/guidetree/tests/main.nf.test b/modules/mirpedrol/magus/guidetree/tests/main.nf.test new file mode 100644 index 00000000..9ac87e35 --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/tests/main.nf.test @@ -0,0 +1,39 @@ +nextflow_process { + + name "Test Process MAGUS_GUIDETREE" + script "../main.nf" + process "MAGUS_GUIDETREE" + + tag "modules" + tag "modules_mirpedrol" + tag "magus" + tag "magus/guidetree" + + test("setoxin - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + //{ assert snapshot(process.out.tree).match("tree")}, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(process.out.tree[0][1]).getText().contains("1apf") }, + { assert path(process.out.tree[0][1]).getText().contains("1ahl") }, + { assert path(process.out.tree[0][1]).getText().contains("1atx") }, + { assert path(process.out.tree[0][1]).getText().contains("1sh1") }, + { assert path(process.out.tree[0][1]).getText().contains("1bds") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/guidetree/tests/main.nf.test.snap b/modules/mirpedrol/magus/guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..d564be3d --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/tests/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5a2ad92c9ea945c4bf4890f02ca2562f" + ] + ], + "timestamp": "2024-03-28T18:25:41.292337485" + }, + "tree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tree:md5,c742636229d166322a2824d409595738" + ] + ] + ], + "timestamp": "2024-03-28T18:25:41.226027114" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/guidetree/tests/tags.yml b/modules/mirpedrol/magus/guidetree/tests/tags.yml new file mode 100644 index 00000000..75534069 --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/tests/tags.yml @@ -0,0 +1,2 @@ +magus/guidetree: + - "modules/mirpedrol/magus/guidetree/**" diff --git a/modules/mirpedrol/magus/treealign/environment.yml b/modules/mirpedrol/magus/treealign/environment.yml new file mode 100644 index 00000000..f6b42d26 --- /dev/null +++ b/modules/mirpedrol/magus/treealign/environment.yml @@ -0,0 +1,8 @@ +name: magus_treealign +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::magus-msa=0.2.0 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/magus/treealign/main.nf b/modules/mirpedrol/magus/treealign/main.nf new file mode 100644 index 00000000..0fd93baa --- /dev/null +++ b/modules/mirpedrol/magus/treealign/main.nf @@ -0,0 +1,55 @@ +process MAGUS_TREEALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0': + 'biocontainers/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // using --overwrite is necessary, as the file descriptor generated by the named file will already exist + """ + magus \\ + -np $task.cpus \\ + -i $fasta \\ + -d ./ \\ + --overwrite -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) \\ + -t $tree \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/magus/treealign/meta.yml b/modules/mirpedrol/magus/treealign/meta.yml new file mode 100644 index 00000000..45fc49bb --- /dev/null +++ b/modules/mirpedrol/magus/treealign/meta.yml @@ -0,0 +1,66 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/meta-schema.json +name: "magus_treealign" +description: Multiple Sequence Alignment using Graph Clustering +keywords: + - MSA + - alignment + - treealignment + - msa + - genomics + - graph +class: "msa_treealing" +tools: + - "magus": + description: "Multiple Sequence Alignment using Graph Clustering" + homepage: "https://github.com/vlasmirnov/MAGUS" + documentation: "https://github.com/vlasmirnov/MAGUS" + tool_dev_url: "https://github.com/vlasmirnov/MAGUS" + doi: "10.1093/bioinformatics/btaa992" + licence: ["MIT"] + identifier: biotools:magus + +input: + - - meta: + type: map + description: | + Groovy Map containing the fasta meta information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Input sequences in FASTA format. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 + - - meta2: + type: map + description: | + Groovy Map containing sample information for the specified guide tree (if supplied) + e.g. `[ id:'test', single_end:false ]` + - tree: + type: file + description: Optional path to a file containing a guide tree in newick format + to use as input. If empty, or overwritten by passing `-t [fasttree|fasttree-noml|clustal|parttree]`, + MAGUS will construct its own guide tree. If empty, `fasttree` is used as a + default. + pattern: "*.{dnd}" +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample meta information. + e.g. `[ id:'test', single_end:false ]` + - "*.aln.gz": + type: file + description: File containing the output alignment, in FASTA format containing + gaps. The sequences may be in a different order than in the input FASTA. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lrauschning" diff --git a/modules/mirpedrol/magus/treealign/tests/main.nf.test b/modules/mirpedrol/magus/treealign/tests/main.nf.test new file mode 100644 index 00000000..bd177127 --- /dev/null +++ b/modules/mirpedrol/magus/treealign/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process MAGUS_TREEALIGN" + script "../main.nf" + process "MAGUS_TREEALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "magus" + tag "magus/treealign" + tag "magus/guidetree" + + + test("setoxin - fasta - guide_tree") { + + setup { + run("MAGUS_GUIDETREE") { + script "../../guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = MAGUS_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1apf") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1atx") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1bds") }, + { assert snapshot(process.out.versions).match("with_guide_tree_versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/treealign/tests/main.nf.test.snap b/modules/mirpedrol/magus/treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..119a2389 --- /dev/null +++ b/modules/mirpedrol/magus/treealign/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "with_guide_tree_versions": { + "content": [ + [ + "versions.yml:md5,25c60978b2ebb3f6729b00efae44dcee" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T15:18:06.218411" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/treealign/tests/tags.yml b/modules/mirpedrol/magus/treealign/tests/tags.yml new file mode 100644 index 00000000..c7d2f94a --- /dev/null +++ b/modules/mirpedrol/magus/treealign/tests/tags.yml @@ -0,0 +1,2 @@ +magus/align: + - "modules/mirpedrol/magus/align/**" diff --git a/modules/mirpedrol/muscle5/super5/environment.yml b/modules/mirpedrol/muscle5/super5/environment.yml new file mode 100644 index 00000000..fbaf4a2e --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/environment.yml @@ -0,0 +1,8 @@ +name: muscle5_super5 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::muscle=5.1 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/muscle5/super5/main.nf b/modules/mirpedrol/muscle5/super5/main.nf new file mode 100644 index 00000000..09545015 --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/main.nf @@ -0,0 +1,62 @@ +process MUSCLE5_SUPER5 { + tag "$meta.id" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8eb01a3c2755c935d070dd03ff2dee698eeb4466:ceb6e65e00346ed20d0d8078dddf9858a7af0fe2-0': + 'biocontainers/mulled-v2-8eb01a3c2755c935d070dd03ff2dee698eeb4466:ceb6e65e00346ed20d0d8078dddf9858a7af0fe2-0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + prefix = args.contains('-perm all') ? "${prefix}@" : "${prefix}" + def write_output = (!args.contains('-perm all')) ? " -output >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "-output ${prefix}.aln" + // muscle internally expands the shell pipe to a file descriptor of the form /dev/fd/ + // this causes it to fail, unless -output is left at the end of the call + // see also clustalo/align + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + """ + muscle \\ + -super5 ${fasta} \\ + ${args} \\ + -threads ${task.cpus} \\ + $write_output + + + # output may be multiple files if -perm all is set + # compress these individually + if ${args.contains('-perm all')}; then + pigz -p ${task.cpus} *.aln + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + muscle: \$(muscle -version | head -n 1 | cut -d ' ' -f 2 | sed 's/.linux64//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + muscle: \$(muscle -version | head -n 1 | cut -d ' ' -f 2 | sed 's/.linux64//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/muscle5/super5/meta.yml b/modules/mirpedrol/muscle5/super5/meta.yml new file mode 100644 index 00000000..939e584e --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/meta.yml @@ -0,0 +1,61 @@ +name: "muscle5_super5" +description: Muscle is a program for creating multiple alignments of amino acid or + nucleotide sequences. This particular module uses the super5 algorithm for very + big alignments. It can permutate the guide tree according to a set of flags. +keywords: + - align + - msa + - multiple sequence alignment + - msa + - alignment +class: "msa_alignment" +tools: + - muscle-super5: + description: "Muscle v5 is a major re-write of MUSCLE based on new algorithms." + homepage: "https://drive5.com/muscle5/" + documentation: "https://drive5.com/muscle5/manual/" + doi: "10.1101/2021.06.20.449169" + licence: ["Public Domain"] + identifier: "biotools:muscle" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input sequences for alignment must be in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.aln.gz": + type: file + description: Multiple sequence alignment produced in gzipped FASTA format. If + '-perm all' is passed in ext.args, this will be multiple files per input! + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@alessiovignoli" + - "@JoseEspinosa" +maintainers: + - "@alessiovignoli" + - "@JoseEspinosa" + - "@lrauschning" diff --git a/modules/mirpedrol/muscle5/super5/tests/main.nf.test b/modules/mirpedrol/muscle5/super5/tests/main.nf.test new file mode 100644 index 00000000..bfb375cf --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process MUSCLE5_SUPER5" + script "../main.nf" + process "MUSCLE5_SUPER5" + config "./nextflow.config" + + tag "modules" + tag "modules_mirpedrol" + tag "muscle5" + tag "muscle5/super5" + + + test("fasta - align_sequence") { + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment") }, + { assert snapshot(process.out.versions).match("versions_align") } + ) + } + } + + test("fasta - align_sequence - perm_all") { + config "./perm_all.config" + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("perm-all")}, + { assert snapshot(process.out.versions).match("versions_perm") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/muscle5/super5/tests/main.nf.test.snap b/modules/mirpedrol/muscle5/super5/tests/main.nf.test.snap new file mode 100644 index 00000000..e38233aa --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/tests/main.nf.test.snap @@ -0,0 +1,65 @@ +{ + "versions_align": { + "content": [ + [ + "versions.yml:md5,5b5af5ac30721027249837f33a4da01f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T11:56:05.493488" + }, + "perm-all": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "testabc.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", + "testacb.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", + "testbca.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", + "testnone.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-02-09T19:08:37.386512953" + }, + "versions_perm": { + "content": [ + [ + "versions.yml:md5,5b5af5ac30721027249837f33a4da01f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T11:56:08.627006" + }, + "alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T11:56:05.446683" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/muscle5/super5/tests/nextflow.config b/modules/mirpedrol/muscle5/super5/tests/nextflow.config new file mode 100644 index 00000000..e69de29b diff --git a/modules/mirpedrol/muscle5/super5/tests/perm_all.config b/modules/mirpedrol/muscle5/super5/tests/perm_all.config new file mode 100644 index 00000000..d3502716 --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/tests/perm_all.config @@ -0,0 +1,3 @@ +process { + ext.args = { "-perm all" } +} diff --git a/modules/mirpedrol/muscle5/super5/tests/tags.yml b/modules/mirpedrol/muscle5/super5/tests/tags.yml new file mode 100644 index 00000000..4c144ae6 --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/tests/tags.yml @@ -0,0 +1,2 @@ +muscle5/super5: + - "modules/mirpedrol/muscle5/super5/**" diff --git a/modules/mirpedrol/tcoffee/align/environment.yml b/modules/mirpedrol/tcoffee/align/environment.yml new file mode 100644 index 00000000..28f159fd --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/environment.yml @@ -0,0 +1,8 @@ +name: tcoffee_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/tcoffee/align/main.nf b/modules/mirpedrol/tcoffee/align/main.nf new file mode 100644 index 00000000..a5cc3bd2 --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/main.nf @@ -0,0 +1,58 @@ +process TCOFFEE_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': + 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' + t_coffee -seq ${fasta} \ + $args \ + -output fasta_aln \ + -thread ${task.cpus} \ + -outfile stdout \ + | pigz -cp ${task.cpus} > ${prefix}.aln.gz + + # If stdout file exist, then compress the file + # This is a patch for the current behaviour of the regressive algorithm + # that does not support the stdout redirection + if [ -f stdout ]; then + pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz + rm stdout + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/tcoffee/align/meta.yml b/modules/mirpedrol/tcoffee/align/meta.yml new file mode 100644 index 00000000..597be04d --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/meta.yml @@ -0,0 +1,62 @@ +name: "tcoffee_align" +description: Aligns sequences using T_COFFEE +keywords: + - alignment + - MSA + - genomics + - msa + - align +class: "msa_alignment" +tools: + - "tcoffee": + description: "A collection of tools for Computing, Evaluating and Manipulating + Multiple Alignments of DNA, RNA, Protein Sequences and Structures." + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + identifier: "biotools:tcoffee" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file in FASTA format. May be gzipped. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" + - "@alessiovignoli" +maintainers: + - "@luisas" + - "@JoseEspinosa" + - "@lrauschning" + - "@alessiovignoli" diff --git a/modules/mirpedrol/tcoffee/align/tests/main.nf.test b/modules/mirpedrol/tcoffee/align/tests/main.nf.test new file mode 100644 index 00000000..1b009d96 --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process TCOFFEE_ALIGN" + script "../main.nf" + process "TCOFFEE_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "tcoffee" + tag "tcoffee/align" + tag "untar" + + test("fasta - align_sequence") { + + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment")}, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/tcoffee/align/tests/main.nf.test.snap b/modules/mirpedrol/tcoffee/align/tests/main.nf.test.snap new file mode 100644 index 00000000..7a64b137 --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T15:28:18.280597" + }, + "alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,bd1db08ad04514cc6d1334598c1a6ef0" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-28T18:59:35.169119" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/tcoffee/align/tests/tags.yml b/modules/mirpedrol/tcoffee/align/tests/tags.yml new file mode 100644 index 00000000..f170df92 --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/align: + - "modules/mirpedrol/tcoffee/align/**" diff --git a/modules/mirpedrol/tcoffee/treealign/environment.yml b/modules/mirpedrol/tcoffee/treealign/environment.yml new file mode 100644 index 00000000..efc70609 --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/environment.yml @@ -0,0 +1,8 @@ +name: tcoffee_treealign +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/tcoffee/treealign/main.nf b/modules/mirpedrol/tcoffee/treealign/main.nf new file mode 100644 index 00000000..76ccb6fd --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/main.nf @@ -0,0 +1,60 @@ +process TCOFFEE_TREEALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': + 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' + t_coffee -seq ${fasta} \ + -usetree $tree \ + -output fasta_aln \ + $args \ + -thread ${task.cpus} \ + -outfile stdout \ + | pigz -cp ${task.cpus} > ${prefix}.aln.gz + + # If stdout file exist, then compress the file + # This is a patch for the current behaviour of the regressive algorithm + # that does not support the stdout redirection + if [ -f stdout ]; then + pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz + rm stdout + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/tcoffee/treealign/meta.yml b/modules/mirpedrol/tcoffee/treealign/meta.yml new file mode 100644 index 00000000..155cc3b4 --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/meta.yml @@ -0,0 +1,71 @@ +name: "tcoffee_treealign" +description: Aligns sequences using T_COFFEE +keywords: + - alignment + - treealignment + - MSA + - genomics + - msa +class: "msa_treealing" +tools: + - "tcoffee": + description: "A collection of tools for Computing, Evaluating and Manipulating + Multiple Alignments of DNA, RNA, Protein Sequences and Structures." + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + identifier: "biotools:tcoffee" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 + - - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file in FASTA format. May be gzipped. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" + - "@alessiovignoli" +maintainers: + - "@luisas" + - "@JoseEspinosa" + - "@lrauschning" + - "@alessiovignoli" diff --git a/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test b/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test new file mode 100644 index 00000000..b99c0d80 --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test @@ -0,0 +1,51 @@ +nextflow_process { + + name "Test Process TCOFFEE_TREEALIGN" + script "../main.nf" + process "TCOFFEE_TREEALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "tcoffee" + tag "tcoffee/treealign" + tag "famsa/guidetree" + tag "untar" + + test("sarscov2 - fasta - align_with_guide_tree") { + + setup { + + run("FAMSA_GUIDETREE") { + script "../../../famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment_guidetree")}, + { assert snapshot(process.out.versions).match("versions_guidetree") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test.snap b/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..a8a6142d --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions_guidetree": { + "content": [ + [ + "versions.yml:md5,1fe864892229d72f6fe3bca7871491bc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T09:13:29.39446115" + }, + "alignment_guidetree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,93bc8adfcd88f7913718eacc13da8e4a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T09:13:29.320114268" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/tcoffee/treealign/tests/tags.yml b/modules/mirpedrol/tcoffee/treealign/tests/tags.yml new file mode 100644 index 00000000..f170df92 --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/align: + - "modules/mirpedrol/tcoffee/align/**" diff --git a/subworkflows/mirpedrol/msa_alignment/main.nf b/subworkflows/mirpedrol/msa_alignment/main.nf new file mode 100644 index 00000000..9dd8da30 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/main.nf @@ -0,0 +1,81 @@ +include { MAFFT } from '../../../modules/mirpedrol/mafft/main' +include { KALIGN_ALIGN } from '../../../modules/mirpedrol/kalign/align/main' +include { FAMSA_ALIGN } from '../../../modules/mirpedrol/famsa/align/main' +include { MUSCLE5_SUPER5 } from '../../../modules/mirpedrol/muscle5/super5/main' +include { MAGUS_ALIGN } from '../../../modules/mirpedrol/magus/align/main' +include { CLUSTALO_ALIGN } from '../../../modules/mirpedrol/clustalo/align/main' +include { TCOFFEE_ALIGN } from '../../../modules/mirpedrol/tcoffee/align/main' +include { LEARNMSA_ALIGN } from '../../../modules/mirpedrol/learnmsa/align/main' + + +workflow MSA_ALIGNMENT { + + take: + ch_fasta + + main: + def ch_out_alignment = Channel.empty() + def ch_out_versions = Channel.empty() + + ch_fasta + .branch { + meta, fasta, tool -> + mafft: tool == "mafft" + return [ meta, fasta ] + kalign_align: tool == "kalign_align" + return [ meta, fasta ] + famsa_align: tool == "famsa_align" + return [ meta, fasta ] + muscle5_super5: tool == "muscle5_super5" + return [ meta, fasta ] + magus_align: tool == "magus_align" + return [ meta, fasta ] + clustalo_align: tool == "clustalo_align" + return [ meta, fasta ] + tcoffee_align: tool == "tcoffee_align" + return [ meta, fasta ] + learnmsa_align: tool == "learnmsa_align" + return [ meta, fasta ] + } + .set { ch_fasta_branch } + + MAFFT( ch_fasta_branch.mafft ) + ch_out_alignment = ch_out_alignment.mix(MAFFT.out.alignment) + ch_out_versions = ch_out_versions.mix(MAFFT.out.versions) + + KALIGN_ALIGN( ch_fasta_branch.kalign_align ) + ch_out_alignment = ch_out_alignment.mix(KALIGN_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(KALIGN_ALIGN.out.versions) + + FAMSA_ALIGN( ch_fasta_branch.famsa_align ) + ch_out_alignment = ch_out_alignment.mix(FAMSA_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(FAMSA_ALIGN.out.versions) + + MUSCLE5_SUPER5( ch_fasta_branch.muscle5_super5 ) + ch_out_alignment = ch_out_alignment.mix(MUSCLE5_SUPER5.out.alignment) + ch_out_versions = ch_out_versions.mix(MUSCLE5_SUPER5.out.versions) + + MAGUS_ALIGN( ch_fasta_branch.magus_align ) + ch_out_alignment = ch_out_alignment.mix(MAGUS_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(MAGUS_ALIGN.out.versions) + + CLUSTALO_ALIGN( ch_fasta_branch.clustalo_align ) + ch_out_alignment = ch_out_alignment.mix(CLUSTALO_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(CLUSTALO_ALIGN.out.versions) + + TCOFFEE_ALIGN( ch_fasta_branch.tcoffee_align ) + ch_out_alignment = ch_out_alignment.mix(TCOFFEE_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(TCOFFEE_ALIGN.out.versions) + + LEARNMSA_ALIGN( ch_fasta_branch.learnmsa_align ) + ch_out_alignment = ch_out_alignment.mix(LEARNMSA_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(LEARNMSA_ALIGN.out.versions) + + + + emit: + alignment = ch_out_alignment + versions = ch_out_versions + +} + diff --git a/subworkflows/mirpedrol/msa_alignment/meta.yml b/subworkflows/mirpedrol/msa_alignment/meta.yml new file mode 100644 index 00000000..4f373460 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/meta.yml @@ -0,0 +1,66 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json +name: "msa_alignment" +description: Perform multiple sequence alignment +keywords: ["alignment", "msa", "align"] +components: + [ + "mafft", + "kalign/align", + "famsa/align", + "muscle5/super5", + "magus/align", + "clustalo/align", + "tcoffee/align", + "learnmsa/align", + ] + +input: + - ch_fasta: + description: "Channel containing: meta, fasta" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'sample1', single_end:false ]` + + " + type: map + - fasta: + description: Input sequences in FASTA format + ontologies: + - edam: http://edamontology.org/format_1929 + pattern: "*.{fa,fasta}" + type: file + - tool: + description: The name of the tool to run + type: string + +output: + - alignment: + description: Output channel alignment + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - "*.aln.gz": + description: Alignment file, in FASTA format. + ontologies: + - edam: http://edamontology.org/format_1984 + pattern: "*.aln.gz" + type: file + - versions: + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + pattern: versions.yml + type: file + +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test new file mode 100644 index 00000000..4140ce03 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test @@ -0,0 +1,204 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_ALIGNMENT" + script "../main.nf" + workflow "MSA_ALIGNMENT" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_alignment" + tag "mafft" + tag "kalign/align" + tag "famsa/align" + tag "muscle5/super5" + tag "magus/align" + tag "clustalo/align" + tag "tcoffee/align" + tag "learnmsa/align" + + + test("run mafft") { + + when { + workflow { + """ + input[0] = Channel.of( [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['scaffolds_fasta'], checkIfExists: true) + , 'mafft' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("mafft_alignment")}, + { assert snapshot(workflow.out.versions).match("mafft_versions")} + ) + } + } + + test("run kalign/align") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + , 'kalign_align' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("kalign_align_alignment")}, + { assert snapshot(workflow.out.versions).match("kalign_align_versions")} + ) + } + } + + test("run famsa/align") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + , 'famsa_align' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("famsa_align_alignment")}, + { assert snapshot(workflow.out.versions).match("famsa_align_versions") } + ) + } + } + + test("run muscle5/super5") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + , 'muscle5_super5' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("muscle5_super5_alignment") }, + { assert snapshot(workflow.out.versions).match("muscle5_super5_versions") } + ) + } + } + + test("run magus/align") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + , 'magus_align' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1apf") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1atx") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1bds") }, + { assert snapshot(workflow.out.versions).match("magus_align_versions") } + ) + } + } + + test("run clustalo/align") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + , 'clustalo_align' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("clustalo_align_alignment")}, + { assert snapshot(workflow.out.versions).match("clustalo_align_versions") } + ) + } + } + + test("run tcoffee/align") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + , 'tcoffee_align' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("tcoffee_align_alignment")}, + { assert snapshot(workflow.out.versions).match("tcoffee_align_versions") } + ) + } + } + + test("run learnmsa/align") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + , 'learnmsa_align' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path(workflow.out.alignment.get(0).get(1)).getTextGzip().contains(">sample1") }, + { assert snapshot(workflow.out.versions).match("learnmsa_align_versions") } + ) + } + } + + +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap new file mode 100644 index 00000000..06e9cf67 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap @@ -0,0 +1,201 @@ +{ + "kalign_align_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:09:44.903862641" + }, + "learnmsa_align_versions": { + "content": [ + [ + "versions.yml:md5,475f61f05c9729887f723221b87de01d" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:16:10.257788333" + }, + "clustalo_align_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:11:39.016384316" + }, + "kalign_align_versions": { + "content": [ + [ + "versions.yml:md5,e7d33c95bb5d69e8573c8ad4eb2aca2c" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:09:44.918523969" + }, + "clustalo_align_versions": { + "content": [ + [ + "versions.yml:md5,75356bf56559adcb33a9c93aba830309" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:11:39.033642445" + }, + "tcoffee_align_versions": { + "content": [ + [ + "versions.yml:md5,c5208e86b43e8c973c39c2bd8ca2932a" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:12:46.142334018" + }, + "mafft_versions": { + "content": [ + [ + "versions.yml:md5,ac417224510279b05e804f041d82304d" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:09:24.697891518" + }, + "tcoffee_align_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,bd1db08ad04514cc6d1334598c1a6ef0" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:12:46.12691554" + }, + "famsa_align_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:10:17.412909358" + }, + "famsa_align_versions": { + "content": [ + [ + "versions.yml:md5,c74ca8b91c442fc4ea29219ee1b724fd" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:10:17.423586217" + }, + "muscle5_super5_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:10:32.694344" + }, + "magus_align_versions": { + "content": [ + [ + "versions.yml:md5,9258e7c6deb7c3d816ba75cf111e09a8" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:11:08.349010115" + }, + "mafft_alignment": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:09:24.67999956" + }, + "muscle5_super5_versions": { + "content": [ + [ + "versions.yml:md5,48ec1b7cf99109e8495f3bc00d67a1eb" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:10:32.708162989" + } +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_guidetree/main.nf b/subworkflows/mirpedrol/msa_guidetree/main.nf new file mode 100644 index 00000000..dfde7d91 --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/main.nf @@ -0,0 +1,46 @@ +include { FAMSA_GUIDETREE } from '../../../modules/mirpedrol/famsa/guidetree/main' +include { MAGUS_GUIDETREE } from '../../../modules/mirpedrol/magus/guidetree/main' +include { CLUSTALO_GUIDETREE } from '../../../modules/mirpedrol/clustalo/guidetree/main' + + +workflow MSA_GUIDETREE { + + take: + ch_fasta + + main: + def ch_out_tree = Channel.empty() + def ch_out_versions = Channel.empty() + + ch_fasta + .branch { + meta, fasta, tool -> + famsa_guidetree: tool == "famsa_guidetree" + return [ meta, fasta ] + magus_guidetree: tool == "magus_guidetree" + return [ meta, fasta ] + clustalo_guidetree: tool == "clustalo_guidetree" + return [ meta, fasta ] + } + .set { ch_fasta_branch } + + FAMSA_GUIDETREE( ch_fasta_branch.famsa_guidetree ) + ch_out_tree = ch_out_tree.mix(FAMSA_GUIDETREE.out.tree) + ch_out_versions = ch_out_versions.mix(FAMSA_GUIDETREE.out.versions) + + MAGUS_GUIDETREE( ch_fasta_branch.magus_guidetree ) + ch_out_tree = ch_out_tree.mix(MAGUS_GUIDETREE.out.tree) + ch_out_versions = ch_out_versions.mix(MAGUS_GUIDETREE.out.versions) + + CLUSTALO_GUIDETREE( ch_fasta_branch.clustalo_guidetree ) + ch_out_tree = ch_out_tree.mix(CLUSTALO_GUIDETREE.out.tree) + ch_out_versions = ch_out_versions.mix(CLUSTALO_GUIDETREE.out.versions) + + + + emit: + tree = ch_out_tree + versions = ch_out_versions + +} + diff --git a/subworkflows/mirpedrol/msa_guidetree/meta.yml b/subworkflows/mirpedrol/msa_guidetree/meta.yml new file mode 100644 index 00000000..94eeea7c --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/meta.yml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json +name: "msa_guidetree" +description: Compute a guide tree for multiple sequence alignment +keywords: ["align", "guide tree", "guidetree", "msa"] +components: ["famsa/guidetree", "magus/guidetree", "clustalo/guidetree"] + +input: + - ch_fasta: + description: "Channel containing: meta, fasta" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - fasta: + description: Input sequences in FASTA format + ontologies: + - edam: http://edamontology.org/format_1929 + pattern: "*.{fa,fasta}" + type: file + - tool: + description: The name of the tool to run + type: string + +output: + - tree: + description: Output channel tree + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - "*.dnd": + description: Guide tree file in Newick format + pattern: "*.{dnd}" + type: file + - versions: + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + pattern: versions.yml + type: file + +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test new file mode 100644 index 00000000..6e86f084 --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test @@ -0,0 +1,89 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_GUIDETREE" + script "../main.nf" + workflow "MSA_GUIDETREE" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_guidetree" + tag "famsa/guidetree" + tag "magus/guidetree" + tag "clustalo/guidetree" + + + test("run famsa/guidetree") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + , 'famsa_guidetree' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.tree).match("famsa_guidetree_tree")}, + { assert snapshot(workflow.out.versions).match("famsa_guidetree_versions") } + ) + } + } + + test("run magus/guidetree") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + , 'magus_guidetree' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + //{ assert snapshot(workflow.out.tree).match("magus_guidetree_tree")}, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(workflow.out.tree[0][1]).getText().contains("1apf") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1ahl") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1atx") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1sh1") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1bds") }, + { assert snapshot(workflow.out.versions).match("magus_guidetree_versions") } + ) + } + } + + test("run clustalo/guidetree") { + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + , 'clustalo_guidetree' + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.tree).match("clustalo_guidetree_tree")}, + { assert snapshot(workflow.out.versions).match("clustalo_guidetree_versions") } + ) + } + } + + +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..bcc0df09 --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "sarscov2 - famsa/guidetree": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dnd:md5,f3ef8b16a7a16cb4548942ebf2e7bad6" + ] + ], + "1": [ + "versions.yml:md5,723b3358beebc97847b4681f562bcea0" + ], + "guidetree": [ + [ + { + "id": "test" + }, + "test.dnd:md5,f3ef8b16a7a16cb4548942ebf2e7bad6" + ] + ], + "versions": [ + "versions.yml:md5,723b3358beebc97847b4681f562bcea0" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T13:12:32.7754812" + }, + "sarscov2 - clustalo/guidetree": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dnd:md5,5428bad500a0a0bd985744bec1a12a70" + ] + ], + "1": [ + "versions.yml:md5,b1ee3efbf09bc7cf7b4970916a00fddc" + ], + "guidetree": [ + [ + { + "id": "test" + }, + "test.dnd:md5,5428bad500a0a0bd985744bec1a12a70" + ] + ], + "versions": [ + "versions.yml:md5,b1ee3efbf09bc7cf7b4970916a00fddc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T13:12:06.216087789" + }, + "versions1": { + "content": [ + [ + "versions.yml:md5,75333144e16039f25cae8e933f30d003" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T13:13:24.823000398" + } +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_treealign/main.nf b/subworkflows/mirpedrol/msa_treealign/main.nf new file mode 100644 index 00000000..059a6ead --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/main.nf @@ -0,0 +1,67 @@ +include { FAMSA_TREEALIGN } from '../../../modules/mirpedrol/famsa/treealign/main' +include { MAGUS_TREEALIGN } from '../../../modules/mirpedrol/magus/treealign/main' +include { CLUSTALO_TREEALIGN } from '../../../modules/mirpedrol/clustalo/treealign/main' +include { TCOFFEE_TREEALIGN } from '../../../modules/mirpedrol/tcoffee/treealign/main' + + +workflow MSA_TREEALIGN { + + take: + ch_fasta + ch_tree + + main: + def ch_out_alignment = Channel.empty() + def ch_out_versions = Channel.empty() + + ch_fasta + .branch { + meta, fasta, tool -> + famsa_treealign: tool == "famsa_treealign" + return [ meta, fasta ] + magus_treealign: tool == "magus_treealign" + return [ meta, fasta ] + clustalo_treealign: tool == "clustalo_treealign" + return [ meta, fasta ] + tcoffee_treealign: tool == "tcoffee_treealign" + return [ meta, fasta ] + } + .set { ch_fasta_branch } + ch_tree + .branch { + meta, tree, tool -> + famsa_treealign: tool == "famsa_treealign" + return [ meta, tree ] + magus_treealign: tool == "magus_treealign" + return [ meta, tree ] + clustalo_treealign: tool == "clustalo_treealign" + return [ meta, tree ] + tcoffee_treealign: tool == "tcoffee_treealign" + return [ meta, tree ] + } + .set { ch_tree_branch } + + FAMSA_TREEALIGN( ch_fasta_branch.famsa_treealign, ch_tree_branch.famsa_treealign ) + ch_out_alignment = ch_out_alignment.mix(FAMSA_TREEALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(FAMSA_TREEALIGN.out.versions) + + MAGUS_TREEALIGN( ch_fasta_branch.magus_treealign, ch_tree_branch.magus_treealign ) + ch_out_alignment = ch_out_alignment.mix(MAGUS_TREEALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(MAGUS_TREEALIGN.out.versions) + + CLUSTALO_TREEALIGN( ch_fasta_branch.clustalo_treealign, ch_tree_branch.clustalo_treealign ) + ch_out_alignment = ch_out_alignment.mix(CLUSTALO_TREEALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(CLUSTALO_TREEALIGN.out.versions) + + TCOFFEE_TREEALIGN( ch_fasta_branch.tcoffee_treealign, ch_tree_branch.tcoffee_treealign ) + ch_out_alignment = ch_out_alignment.mix(TCOFFEE_TREEALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(TCOFFEE_TREEALIGN.out.versions) + + + + emit: + alignment = ch_out_alignment + versions = ch_out_versions + +} + diff --git a/subworkflows/mirpedrol/msa_treealign/meta.yml b/subworkflows/mirpedrol/msa_treealign/meta.yml new file mode 100644 index 00000000..4731d1ea --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/meta.yml @@ -0,0 +1,73 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json +name: "msa_treealign" +description: Perform multiple sequence alignment from a provided guide tree +keywords: ["alignment", "treealignment", "msa"] +components: ["famsa/treealign", "magus/treealign", "clustalo/treealign", "tcoffee/treealign"] + +input: + - ch_fasta: + description: "Channel containing: meta, fasta" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - fasta: + description: Input sequences in FASTA format + ontologies: + - edam: http://edamontology.org/format_1929 + pattern: "*.{fa,fasta}" + type: file + - tool: + description: The name of the tool to run + type: string + - ch_tree: + description: "Channel containing: meta, fasta" + structure: + - meta2: + description: "Groovy Map containing tree information + + e.g. `[ id:'test_tree']` + + " + type: map + - tree: + description: Input guide tree in Newick format + pattern: "*.{dnd}" + type: file + - tool: + description: The name of the tool to run + type: string + +output: + - alignment: + description: Output channel alignment + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - "*.aln.gz": + description: Alignment file, in gzipped fasta format + ontologies: + - edam: http://edamontology.org/format_1984 + pattern: "*.aln.gz" + type: file + - versions: + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + pattern: versions.yml + type: file + +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test new file mode 100644 index 00000000..fcb2a244 --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test @@ -0,0 +1,173 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_TREEALIGN" + script "../main.nf" + workflow "MSA_TREEALIGN" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_treealign" + tag "famsa/treealign" + tag "magus/treealign" + tag "clustalo/treealign" + tag "tcoffee/treealign" + tag "famsa/guidetree" + tag "magus/guidetree" + tag "clustalo/guidetree" + + + test("run famsa/treealign") { + + setup { + run("FAMSA_GUIDETREE") { + script "../../../../modules/mirpedrol/famsa/guidetree/main.nf" + process { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ]) + """ + } + } + } + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + , 'famsa_treealign' + ]) + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree, 'famsa_treealign']} + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("famsa_treealign_alignment")}, + { assert snapshot(workflow.out.versions).match("famsa_treealign_versions") } + ) + } + } + + test("run magus/treealign") { + + setup { + run("MAGUS_GUIDETREE") { + script "../../../../modules/mirpedrol/magus/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + } + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + , 'magus_treealign' + ]) + input[1] = MAGUS_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree, 'magus_treealign']} + """ + } + } + + then { + assertAll( + { assert workflow.success }, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1apf") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1atx") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1bds") }, + { assert snapshot(workflow.out.versions).match("magus_treealign_versions") } + ) + } + } + + test("run clustalo/treealign") { + + setup { + + run("CLUSTALO_GUIDETREE") { + script "../../../../modules/mirpedrol/clustalo/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + , 'clustalo_treealign' + ]) + input[1] = CLUSTALO_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree, 'clustalo_treealign']} + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("clustalo_treealign_alignment")}, + { assert snapshot(workflow.out.versions).match("clustalo_treealign_versions") } + ) + } + } + + test("run tcoffee/treealign") { + + setup { + + run("FAMSA_GUIDETREE") { + script "../../../../modules/mirpedrol/famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + + """ + } + } + } + + when { + workflow { + """ + input[0] = Channel.of( [ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + , 'tcoffee_treealign' + ]) + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test'], tree, 'tcoffee_treealign']} + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("tcoffee_treealign_alignment")}, + { assert snapshot(workflow.out.versions).match("tcoffee_treealign_versions") } + ) + } + } + + +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..c223c0ef --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap @@ -0,0 +1,200 @@ +{ + "clustalo_treealign_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-22T16:01:01.611023951" + }, + "magus_treealign_versions": { + "content": [ + [ + "versions.yml:md5,32409a4801ce34648c31cef62c4bb298" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-22T16:00:05.747842102" + }, + "clustalo_treealign_versions": { + "content": [ + [ + "versions.yml:md5,7d9dc598369d063294d8ff5b62d02e2f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-22T16:01:01.661726627" + }, + "magus_versions": { + "content": [ + [ + "versions.yml:md5,6bd0fc2decf3dd8e3b43a4d5b9c0cc58" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:31.000053" + }, + "tcoffee_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,93bc8adfcd88f7913718eacc13da8e4a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:57.317953" + }, + "tcoffee_treealign_versions": { + "content": [ + [ + "versions.yml:md5,c9000e613bf71e55f37a77eacd606fcf" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-22T16:01:18.493259432" + }, + "famsa_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:17.914586" + }, + "famsa_versions": { + "content": [ + [ + "versions.yml:md5,3002c8ce2c4f1b9a4b084b61efc0c2b4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:17.922286" + }, + "clustalo_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:52.483352" + }, + "clustalo_versions": { + "content": [ + [ + "versions.yml:md5,2c9f1ab1c3e8a431546fda2ef0170713" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:52.490603" + }, + "tcoffee_versions": { + "content": [ + [ + "versions.yml:md5,c6bb7f0fcab261972bd722de048f36b1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:57.324541" + }, + "famsa_treealign_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-22T15:59:47.187845271" + }, + "tcoffee_treealign_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,93bc8adfcd88f7913718eacc13da8e4a" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-22T16:01:18.433917307" + }, + "famsa_treealign_versions": { + "content": [ + [ + "versions.yml:md5,d62584b93ac23e3e3089a9c50f48e4e8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-22T15:59:47.275076014" + } +} \ No newline at end of file From bc5a5e0efc9242531632d43df25c59b0b86b78af Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 23 Jan 2025 16:52:31 +0100 Subject: [PATCH 3/5] first try to use classes - not tested --- README.md | 4 +- assets/multiqc_config.yml | 6 +- assets/schema_tools.json | 30 +++-- assets/toolsheet.csv | 2 +- bin/merge_scores.py | 2 +- bin/shiny_app/shiny_app.py | 4 +- conf/modules.config | 70 +++++++----- docs/usage.md | 18 +-- modules.json | 105 ++++++++++++++++++ .../main.nf | 6 +- subworkflows/local/visualization.nf | 2 +- workflows/multiplesequencealign.nf | 91 +++++++++++++-- 12 files changed, 270 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 5e3e7dd9..02679237 100644 --- a/README.md +++ b/README.md @@ -63,14 +63,14 @@ Each row represents a set of sequences (in this case the seatoxin and toxin prot The toolsheet specifies **which combination of tools will be deployed and benchmark in the pipeline**. Each line of the toolsheet defines a combination of guide tree and multiple sequence aligner to run with the respective arguments to be used. -The only required field is `aligner`. The fields `tree`, `args_tree` and `args_aligner` are optional and can be left empty. +The only required field is `aligner`. The fields `tree`, `args_guidetree` and `args_aligner` are optional and can be left empty. It should look at follows: `toolsheet.csv`: ```csv -tree,args_tree,aligner,args_aligner, +tree,args_guidetree,aligner,args_aligner, FAMSA, -gt upgma -medoidtree, FAMSA, , ,TCOFFEE, FAMSA,,REGRESSIVE, diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 98965b3c..3b0035f2 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -117,11 +117,11 @@ custom_table_header_config: hidden: False namespace: "Alignment" scale: "Paired" - args_tree: + args_guidetree: description: "Arguments used to build the tree." hidden: True namespace: "Alignment" - args_tree_clean: + args_guidetree_clean: description: "Arguments used to build the tree." hidden: True namespace: "Alignment" @@ -143,7 +143,7 @@ table_columns_placement: summary_stats: fasta: 90 tree: 150 - args_tree: 170 + args_guidetree: 170 aligner: 200 args_aligner: 220 n_sequences: 250 diff --git a/assets/schema_tools.json b/assets/schema_tools.json index f2e5c121..c1584822 100644 --- a/assets/schema_tools.json +++ b/assets/schema_tools.json @@ -7,31 +7,43 @@ "items": { "type": "object", "properties": { - "tree": { + "guidetree": { "type": "string", "pattern": "^\\S+$", "errorMessage": "tree name cannot contain spaces", - "meta": ["tree"], + "meta": ["guidetree"], "default": "" }, - "args_tree": { + "args_guidetree": { "type": "string", - "meta": ["args_tree"], + "meta": ["args_guidetree"], "default": "" }, - "aligner": { + "treealign": { "type": "string", - "meta": ["aligner"], + "meta": ["treealign"], "pattern": "^\\S+$", "errorMessage": "align name must be provided and cannot contain spaces", "default": "" }, - "args_aligner": { + "args_treealign": { "type": "string", - "meta": ["args_aligner"], + "meta": ["args_treealign"], + "default": "" + }, + "alignment": { + "type": "string", + "meta": ["alignment"], + "pattern": "^\\S+$", + "errorMessage": "align name must be provided and cannot contain spaces", + "default": "" + }, + "args_alignment": { + "type": "string", + "meta": ["args_alignment"], "default": "" } }, - "required": ["aligner"] + "oneOf": [{ "required": ["alignment"] }, { "required": ["guidetree", "treealign"] }] } } diff --git a/assets/toolsheet.csv b/assets/toolsheet.csv index ac21c853..c52ba6aa 100644 --- a/assets/toolsheet.csv +++ b/assets/toolsheet.csv @@ -1,3 +1,3 @@ -tree,args_tree,aligner,args_aligner +tree,args_guidetree,aligner,args_aligner FAMSA,,FAMSA, ,,MAFFT,--dpparttree diff --git a/bin/merge_scores.py b/bin/merge_scores.py index 399dc848..5e4f8c63 100755 --- a/bin/merge_scores.py +++ b/bin/merge_scores.py @@ -5,7 +5,7 @@ import sys import pandas as pd -merging_cols = ["id", "tree", "args_tree", "aligner", "args_aligner"] +merging_cols = ["id", "tree", "args_guidetree", "aligner", "args_aligner"] scores_files = sys.argv[2:] outfile = sys.argv[1] diff --git a/bin/shiny_app/shiny_app.py b/bin/shiny_app/shiny_app.py index d33d7ecd..d14954d1 100644 --- a/bin/shiny_app/shiny_app.py +++ b/bin/shiny_app/shiny_app.py @@ -24,10 +24,10 @@ def merge_tree_args(row): if str(row["tree"]) == "DEFAULT": return "None" - elif str(row["args_tree"]) == "default": + elif str(row["args_guidetree"]) == "default": return str(row["tree"]) + " ()" else: - return str(row["tree"]) + " (" + str(row["args_tree"]) + ")" + return str(row["tree"]) + " (" + str(row["args_guidetree"]) + ")" inputfile["tree_args"] = inputfile.apply(merge_tree_args, axis=1) diff --git a/conf/modules.config b/conf/modules.config index b026fd56..906ff014 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -64,18 +64,18 @@ } // - // Tree building + // Tree building (guidetree) // - withName: "CLUSTALO_GUIDETREE|FAMSA_GUIDETREE" { + withName: "CLUSTALO_GUIDETREE|FAMSA_GUIDETREE|MAGUS_GUIDETREE" { tag = { [ "${meta.id}", - meta.args_tree ? "args: ${meta.args_tree}" : "" + meta.args_guidetree ? "args: ${meta.args_guidetree}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}" } - ext.args = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}" } + ext.args = { "${meta.args_guidetree}" == "null" ? '' : "${meta.args_guidetree}" } publishDir = [ path: { "${params.outdir}/trees/${meta.id}" }, mode: params.publish_dir_mode, @@ -83,6 +83,26 @@ ] } + // + // Alignment from a tree (treealign) + // + + withName: "CLUSTALO_TREEALIGN|FAMSA_TREEALIGN|MAGUS_TREEALIGN|TCOFFEE_TREEALIGN"{ + tag = { + [ + "${meta.id}", + meta.args_treealign ? "args: ${meta.args_treealign}" : "" + ].join(' ').trim() + } + ext.prefix = { "${meta.id}_${meta.treealign}-args-${meta.args_treealign_clean}_${meta.guidetree}-args-${meta.args_guidetree_clean}" } + ext.args = { "${meta.args_treealign}" == "null" ? '' : "${meta.args_treealign}" } + publishDir = [ + path: { "${params.outdir}/alignment/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // // Alignment // @@ -90,24 +110,20 @@ withName: "CREATE_TCOFFEETEMPLATE" { ext.prefix = { "${meta.id}" } } - withName: "CLUSTALO_ALIGN|FAMSA_ALIGN|FOLDMASON_EASYMSA|KALIGN_ALIGN|LEARNMSA_ALIGN|MAFFT_ALIGN|MAGUS_ALIGN|MUSCLE5_SUPER5|TCOFFEE_REGRESSIVE|TCOFFEE_ALIGN|TCOFFEE3D_ALIGN|UPP_ALIGN" { + withName: "CLUSTALO_ALIGN|FAMSA_ALIGN|FOLDMASON_EASYMSA|KALIGN_ALIGN|LEARNMSA_ALIGN|MAFFT|MAGUS_ALIGN|MUSCLE5_SUPER5|TCOFFEE_REGRESSIVE|TCOFFEE_ALIGN|TCOFFEE3D_ALIGN|UPP_ALIGN" { tag = { [ "${meta.id}", - meta.tree ? "tree: ${meta.tree}" : "", - meta.args_tree ? "argstree: ${meta.args_tree}" : "", - meta.args_aligner ? "args: ${meta.args_aligner}" : "" + meta.args_alignment ? "args: ${meta.args_alignment}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } - ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } - if(params.skip_compression){ - publishDir = [ - path: { "${params.outdir}/alignment/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + ext.prefix = { "${meta.id}_${meta.alignment}-args-${meta.args_alignment_clean}" } + ext.args = { "${meta.args_alignment}" == "null" ? '' : "${meta.args_alignment}" } + publishDir = [ + path: { "${params.outdir}/trees/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: "MTMALIGN_ALIGN" { @@ -115,11 +131,11 @@ [ "${meta.id}", meta.tree ? "tree: ${meta.tree}" : "", - meta.args_tree ? "argstree: ${meta.args_tree}" : "", + meta.args_guidetree ? "argstree: ${meta.args_guidetree}" : "", meta.args_aligner ? "args: ${meta.args_aligner}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}" } ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } if(params.skip_compression){ publishDir = [ @@ -174,21 +190,21 @@ // withName: 'PARSE_IRMSD' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_irmsd" } } withName: 'TCOFFEE_ALNCOMPARE_SP' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_sp" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_sp" } ext.args = "-compare_mode sp" } withName: 'TCOFFEE_ALNCOMPARE_TC' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tc" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_tc" } ext.args = "-compare_mode tc" } withName: 'TCOFFEE_IRMSD' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_irmsd" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -198,7 +214,7 @@ } withName: "CALC_GAPS" { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_gaps" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_gaps" } } withName: "CONCAT_IRMSD" { @@ -222,7 +238,7 @@ } withName: 'TCOFFEE_TCS' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tcs" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_tcs" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -274,7 +290,7 @@ // Visualization // withName: 'FOLDMASON_MSA2LDDTREPORT' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}" } publishDir = [ path: { "${params.outdir}/visualization" }, mode: params.publish_dir_mode, diff --git a/docs/usage.md b/docs/usage.md index f7f82ed4..e0a1fd29 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,14 +131,14 @@ Each line of the toolsheet defines a combination of guide tree and multiple sequ A typical toolsheet should look at follows: ```csv title="toolsheet.csv" -tree,args_tree,aligner,args_aligner, +tree,args_guidetree,aligner,args_aligner, FAMSA, -gt upgma -medoidtree, FAMSA, , ,TCOFFEE, FAMSA,,REGRESSIVE, ``` :::note -Each of the trees and aligners are available as standalones. You can leave `args_tree` and `args_aligner` empty if you are cool with the default settings of each method. Alternatively, you can leave `args_tree` empty to use the default guide tree with each aligner. +Each of the trees and aligners are available as standalones. You can leave `args_guidetree` and `args_aligner` empty if you are cool with the default settings of each method. Alternatively, you can leave `args_guidetree` empty to use the default guide tree with each aligner. ::: :::note @@ -147,18 +147,18 @@ use the exact spelling as listed above in [align](#3-align) and [guide trees](#2 `tree` is the tool used to build the tree (optional). -Arguments to the tree tool can be provided using `args_tree`. Please refer to each tool's documentation (optional). +Arguments to the tree tool can be provided using `args_guidetree`. Please refer to each tool's documentation (optional). The `aligner` column contains the tool to run the alignment (optional). Finally, the arguments to the aligner tool can be set by using the `args_aligner` column (optional). -| Column | Description | -| -------------- | -------------------------------------------------------------------------------- | -| `tree` | Optional. Tool used to build the tree. | -| `args_tree` | Optional. Arguments to the tree tool. Please refer to each tool's documentation. | -| `aligner` | Required. Tool to run the alignment. Available options listed above. | -| `args_aligner` | Optional. Arguments to the alignment tool. | +| Column | Description | +| ---------------- | -------------------------------------------------------------------------------- | +| `tree` | Optional. Tool used to build the tree. | +| `args_guidetree` | Optional. Arguments to the tree tool. Please refer to each tool's documentation. | +| `aligner` | Required. Tool to run the alignment. Available options listed above. | +| `args_aligner` | Optional. Arguments to the alignment tool. | ## Running the pipeline diff --git a/modules.json b/modules.json index 1c00c223..5fe6d524 100644 --- a/modules.json +++ b/modules.json @@ -2,6 +2,106 @@ "name": "nf-core/multiplesequencealign", "homePage": "https://github.com/nf-core/multiplesequencealign", "repos": { + "https://github.com/mirpedrol/class-modules.git": { + "modules": { + "mirpedrol": { + "clustalo/align": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_alignment"] + }, + "clustalo/guidetree": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_guidetree"] + }, + "clustalo/treealign": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_treealign"] + }, + "famsa/align": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_alignment"] + }, + "famsa/guidetree": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_guidetree"] + }, + "famsa/treealign": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_treealign"] + }, + "kalign/align": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_alignment"] + }, + "learnmsa/align": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_alignment"] + }, + "mafft": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_alignment"] + }, + "magus/align": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_alignment"] + }, + "magus/guidetree": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_guidetree"] + }, + "magus/treealign": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_treealign"] + }, + "muscle5/super5": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_alignment"] + }, + "tcoffee/align": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_alignment"] + }, + "tcoffee/treealign": { + "branch": "main", + "git_sha": "82876573667a4b4536c590e1f757094cfc8bed3e", + "installed_by": ["msa_treealign"] + } + } + }, + "subworkflows": { + "mirpedrol": { + "msa_alignment": { + "branch": "main", + "git_sha": "e249a43bef30684f31d2628035bb61b3cfcf5a73", + "installed_by": ["subworkflows"] + }, + "msa_guidetree": { + "branch": "main", + "git_sha": "e249a43bef30684f31d2628035bb61b3cfcf5a73", + "installed_by": ["subworkflows"] + }, + "msa_treealign": { + "branch": "main", + "git_sha": "e249a43bef30684f31d2628035bb61b3cfcf5a73", + "installed_by": ["subworkflows"] + } + } + } + }, "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { @@ -41,6 +141,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "fastqc": { + "branch": "master", + "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", + "installed_by": ["modules"] + }, "foldmason/createdb": { "branch": "master", "git_sha": "0270c0fbbbb09456d7823605e4285c4a2c5bbf40", diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index da9bce4f..fb70ccac 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -76,8 +76,8 @@ workflow PIPELINE_INITIALISATION { def align_map = [:] tree_map["tree"] = Utils.clean_tree(meta_clone["tree"]) - tree_map["args_tree"] = meta_clone["args_tree"] - tree_map["args_tree_clean"] = Utils.cleanArgs(meta_clone.args_tree) + tree_map["args_guidetree"] = meta_clone["args_guidetree"] + tree_map["args_guidetree_clean"] = Utils.cleanArgs(meta_clone.args_guidetree) align_map["aligner"] = meta_clone["aligner"] align_map["args_aligner"] = Utils.check_required_args(meta_clone["aligner"], meta_clone["args_aligner"]) @@ -620,7 +620,7 @@ def merge_summary_and_traces(summary_file, trace_dir_path, outFileName, shinyOut // ------------------- def mergedData = [] data.each { row -> - def treeMatch = trace_file.traceTrees.find { it.id == row.id && it.tree == row.tree && it.args_tree_clean == row.args_tree_clean} + def treeMatch = trace_file.traceTrees.find { it.id == row.id && it.tree == row.tree && it.args_guidetree_clean == row.args_guidetree_clean} def alignMatch = trace_file.traceAlign.find { it.id == row.id && it.aligner == row.aligner && it.args_aligner_clean == row.args_aligner_clean} def mergedRow = row + (treeMatch ?: [:]) + (alignMatch ?: [:]) mergedData << mergedRow diff --git a/subworkflows/local/visualization.nf b/subworkflows/local/visualization.nf index dfc046d4..0616beb2 100644 --- a/subworkflows/local/visualization.nf +++ b/subworkflows/local/visualization.nf @@ -18,7 +18,7 @@ workflow VISUALIZATION { // split the msa meta to be able to merge with the tree meta ch_msa .map { - meta, file -> [ meta.subMap([ "id", "tree", "args_tree", "args_tree_clean" ]), meta, file ] + meta, file -> [ meta.subMap([ "id", "tree", "args_guidetree", "args_guidetree_clean" ]), meta, file ] } .join(ch_trees, by: [0], remainder:true ) .filter { diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index da7e1a76..0ed623ef 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -19,7 +19,6 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mult // SUBWORKFLOW: Local subworkflows // include { STATS } from '../subworkflows/local/stats' -include { ALIGN } from '../subworkflows/local/align' include { EVALUATE } from '../subworkflows/local/evaluate' include { TEMPLATES } from '../subworkflows/local/templates' include { PREPROCESS } from '../subworkflows/local/preprocess' @@ -46,6 +45,16 @@ include { CSVTK_JOIN as MERGE_STATS_EVAL } from '../modules/nf-core/csvtk/join/m include { PIGZ_COMPRESS } from '../modules/nf-core/pigz/compress/main' include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT CLASS-MODULES MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { MSA_ALIGNMENT } from '../subworkflows/mirpedrol/msa_alignment/main' +include { MSA_GUIDETREE } from '../subworkflows/mirpedrol/msa_guidetree/main' +include { MSA_TREEALIGN } from '../subworkflows/mirpedrol/msa_treealign/main' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -206,20 +215,78 @@ workflow MULTIPLESEQUENCEALIGN { stats_summary = stats_summary.mix(STATS.out.stats_summary) } + ch_tools + .multiMap { + it -> + guidetree: [it[0], it[1]] + alignment: [it[2], it[3]] + } + .set { ch_tools_split } + + ch_seqs + .combine(ch_tools) + // Add tools and arguments to the meta + .multiMap { + meta, fasta, guidetree, args_guidetree, treealign, args_treealign, alignment, args_alignment -> + guidetree: [ meta + ["guidetree":guidetree, "args_guidetree":args_guidetree], fasta, guidetree] + alignment: [ meta + ["alignment":alignment, "args_alignment":args_alignment], fasta, alignment] + } + .set { ch_fasta_tools } + + ch_fasta_tools.guidetree + .filter{ it -> it[0].guidetree } + .unique() + .set { ch_fasta_guidetree } + ch_fasta_tools.alignment + .filter{ it -> it[0].alignment } + .unique() + .set { ch_fasta_alignment } + + // + // Compute tree + // + MSA_GUIDETREE (ch_fasta_guidetree) + ch_versions = ch_versions.mix(MSA_GUIDETREE.out.versions) + + ch_seqs + .combine(MSA_GUIDETREE.out.tree, by:0) // combine by meta ID + .map { meta, fasta, tree -> [ meta.guidetree, meta, fasta, tree ] } + .combine(ch_tools, by: 0) // combine by guidetree + .map { + guidetree, meta, fasta, tree, args_guidetree, treealign, args_treealign, alignment, args_alignment -> + [meta + ["treealign":treealign, "args_treealign":args_treealign], fasta, tree, treealign] + } + .multiMap { + meta, fasta, tree, treealign -> + fastas: [ meta, fasta, treealign ] + trees: [ meta, tree, treealign ] + } + .set { ch_tree_treealign } + + ch_alignment_output = Channel.empty() + + // + // Align with a given tree + // + MSA_TREEALIGN ( + ch_tree_treealign.fastas, + ch_tree_treealign.trees + ) + ch_versions = ch_versions.mix(MSA_TREEALIGN.out.versions) + ch_alignment_output = ch_alignment_output.mix(MSA_TREEALIGN.out.alignment) + + // // Align // compress_during_align = !(params.skip_compression || (!params.skip_eval || params.build_consensus)) - ALIGN ( - ch_seqs, - ch_tools, - ch_optional_data_template, - compress_during_align - ) - ch_versions = ch_versions.mix(ALIGN.out.versions) + + MSA_ALIGNMENT ( ch_fasta_alignment ) + ch_versions = ch_versions.mix(MSA_ALIGNMENT.out.versions) + ch_alignment_output = ch_alignment_output.mix(MSA_ALIGNMENT.out.alignment) if (!params.skip_compression && !compress_during_align) { - PIGZ_COMPRESS (ALIGN.out.msa) + PIGZ_COMPRESS (ch_alignment_output) ch_versions = ch_versions.mix(PIGZ_COMPRESS.out.versions) } @@ -227,7 +294,7 @@ workflow MULTIPLESEQUENCEALIGN { // Evaluate the quality of the alignment // if (!params.skip_eval) { - EVALUATE (ALIGN.out.msa, ch_refs, ch_optional_data_template) + EVALUATE (ch_alignment_output, ch_refs, ch_optional_data_template) ch_versions = ch_versions.mix(EVALUATE.out.versions) evaluation_summary = evaluation_summary.mix(EVALUATE.out.eval_summary) } @@ -262,8 +329,8 @@ workflow MULTIPLESEQUENCEALIGN { if (!params.skip_visualisation) { VISUALIZATION ( - ALIGN.out.msa, - ALIGN.out.trees, + ch_alignment_output, + MSA_GUIDETREE.out.tree, ch_optional_data ) } From 78568285fda07e02e5237197e946a053e7863105 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Fri, 24 Jan 2025 11:37:01 +0100 Subject: [PATCH 4/5] add test toolshit to test with classes --- assets/test_toolsheet.csv | 21 +++++++++++++++++++++ conf/test.config | 2 +- conf/test_full.config | 2 +- conf/test_parameters.config | 2 +- 4 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 assets/test_toolsheet.csv diff --git a/assets/test_toolsheet.csv b/assets/test_toolsheet.csv new file mode 100644 index 00000000..a34fa0b6 --- /dev/null +++ b/assets/test_toolsheet.csv @@ -0,0 +1,21 @@ +guidetree,args_guidetree,treealign,args_treealign,alignment,args_alignment +,,,,CLUSTALO, +,,,,FAMSA, +,,,,FOLDMASON, +,,,,KALIGN, +,,,,LEARNMSA, +,,,,MAGUS, +,,,,MAFFT, +,,,,MAFFT, --dpparttree +,,,,MUSCLE5, +,,,,MTMALIGN, +,,,,REGRESSIVE, +,,,,REGRESSIVE,-reg_nseq 3 +,,,,TCOFFEE, +,,,,UPP, +,,,,3DCOFFEE, +,,,,3DCOFFEE,-method TMalign_pair +FAMSA,-gt upgma -medoidtree,FAMSA, +FAMSA,,MAGUS, +CLUSTALO,,REGRESSIVE, +MAFFT,,FOLDMASON, diff --git a/conf/test.config b/conf/test.config index ee9cf66c..e69c0a76 100644 --- a/conf/test.config +++ b/conf/test.config @@ -36,5 +36,5 @@ params { // Input data input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.1/samplesheet_test_af2.csv' - tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' + tools = "${projectDir}/assets/test_toolsheet.csv" } diff --git a/conf/test_full.config b/conf/test_full.config index 2ee5ec7e..4c0fa67f 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -37,5 +37,5 @@ params { // Input data for full size test input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.1/samplesheet_full.csv' - tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' + tools = "${projectDir}/assets/test_toolsheet.csv" } diff --git a/conf/test_parameters.config b/conf/test_parameters.config index 51e37ae0..70062694 100644 --- a/conf/test_parameters.config +++ b/conf/test_parameters.config @@ -26,5 +26,5 @@ params { // Input data input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.1/samplesheet_test_af2.csv' - tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' + tools = "${projectDir}/assets/test_toolsheet.csv" } From 2b5e18c3ea2d37ae75fbd83b8458f5b9ff68f410 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Fri, 24 Jan 2025 12:28:42 +0100 Subject: [PATCH 5/5] fix test toolsheet and ch_tools --- assets/test_toolsheet.csv | 28 +++++------ .../main.nf | 22 +++++---- workflows/multiplesequencealign.nf | 46 ++++++++++++------- 3 files changed, 56 insertions(+), 40 deletions(-) diff --git a/assets/test_toolsheet.csv b/assets/test_toolsheet.csv index a34fa0b6..9ba5a1f0 100644 --- a/assets/test_toolsheet.csv +++ b/assets/test_toolsheet.csv @@ -1,21 +1,21 @@ guidetree,args_guidetree,treealign,args_treealign,alignment,args_alignment -,,,,CLUSTALO, -,,,,FAMSA, -,,,,FOLDMASON, -,,,,KALIGN, -,,,,LEARNMSA, -,,,,MAGUS, -,,,,MAFFT, -,,,,MAFFT, --dpparttree -,,,,MUSCLE5, +,,,,clustalo_align, +,,,,famsa_align, +,,,,foldmason_align, +,,,,kalign_align, +,,,,learnmsa_align, +,,,,magus_align, +,,,,mafft, +,,,,mafft, --dpparttree +,,,,muscle5_super5, ,,,,MTMALIGN, ,,,,REGRESSIVE, ,,,,REGRESSIVE,-reg_nseq 3 -,,,,TCOFFEE, +,,,,tcoffee_align, ,,,,UPP, ,,,,3DCOFFEE, ,,,,3DCOFFEE,-method TMalign_pair -FAMSA,-gt upgma -medoidtree,FAMSA, -FAMSA,,MAGUS, -CLUSTALO,,REGRESSIVE, -MAFFT,,FOLDMASON, +famsa_guidetree,-gt upgma -medoidtree,famsa_treealign, +famsa_guidetree,,magus_treealign, +clustalo_align,,REGRESSIVE, +mafft,,FOLDMASON, diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index fb70ccac..43b0fbfb 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -75,13 +75,17 @@ workflow PIPELINE_INITIALISATION { def tree_map = [:] def align_map = [:] - tree_map["tree"] = Utils.clean_tree(meta_clone["tree"]) + tree_map["guidetree"] = Utils.clean_tree(meta_clone["guidetree"]) tree_map["args_guidetree"] = meta_clone["args_guidetree"] tree_map["args_guidetree_clean"] = Utils.cleanArgs(meta_clone.args_guidetree) - align_map["aligner"] = meta_clone["aligner"] - align_map["args_aligner"] = Utils.check_required_args(meta_clone["aligner"], meta_clone["args_aligner"]) - align_map["args_aligner_clean"] = Utils.cleanArgs(meta_clone.args_aligner) + tree_map["treealign"] = Utils.clean_tree(meta_clone["treealign"]) + tree_map["args_treealign"] = meta_clone["args_treealign"] + tree_map["args_treealign_clean"] = Utils.cleanArgs(meta_clone.args_treealign) + + align_map["alignment"] = meta_clone["alignment"] + align_map["args_alignment"] = Utils.check_required_args(meta_clone["alignment"], meta_clone["args_alignment"]) + align_map["args_alignment_clean"] = Utils.cleanArgs(meta_clone.args_alignment) [ tree_map, align_map ] }.unique() @@ -140,11 +144,11 @@ workflow PIPELINE_COMPLETION { def summary_file = "${outdir}/summary/complete_summary_stats_eval.csv" def summary_file_with_traces = "${outdir}/summary/complete_summary_stats_eval_times.csv" def trace_dir_path = "${outdir}/pipeline_info/" - if (shiny_trace_mode) { - merge_summary_and_traces(summary_file, trace_dir_path, summary_file_with_traces, "${shiny_dir_path}/complete_summary_stats_eval_times.csv") - }else{ - merge_summary_and_traces(summary_file, trace_dir_path, summary_file_with_traces, "") - } + //if (shiny_trace_mode) { + // merge_summary_and_traces(summary_file, trace_dir_path, summary_file_with_traces, "${shiny_dir_path}/complete_summary_stats_eval_times.csv") + //}else{ + // merge_summary_and_traces(summary_file, trace_dir_path, summary_file_with_traces, "") + //} } workflow.onError { diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 0ed623ef..f5aa5877 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -65,7 +65,7 @@ workflow MULTIPLESEQUENCEALIGN { take: ch_input // channel: [ meta, path(sequence.fasta), path(reference.fasta), path(dependency_files.tar.gz), path(templates.txt) ] - ch_tools // channel: [ val(guide_tree_tool), val(args_guide_tree_tool), val(alignment_tool), val(args_alignment_tool) ] + ch_tools // channel: [ meta_guidetree_treealign, meta_alignment ] main: ch_multiqc_files = Channel.empty() @@ -215,21 +215,13 @@ workflow MULTIPLESEQUENCEALIGN { stats_summary = stats_summary.mix(STATS.out.stats_summary) } - ch_tools - .multiMap { - it -> - guidetree: [it[0], it[1]] - alignment: [it[2], it[3]] - } - .set { ch_tools_split } - ch_seqs .combine(ch_tools) // Add tools and arguments to the meta .multiMap { - meta, fasta, guidetree, args_guidetree, treealign, args_treealign, alignment, args_alignment -> - guidetree: [ meta + ["guidetree":guidetree, "args_guidetree":args_guidetree], fasta, guidetree] - alignment: [ meta + ["alignment":alignment, "args_alignment":args_alignment], fasta, alignment] + meta, fasta, meta_guidetree_treealign, meta_alignment -> + guidetree: [ meta + ["guidetree":meta_guidetree_treealign.guidetree, "args_guidetree":meta_guidetree_treealign.args_guidetree, "args_guidetree_clean":meta_guidetree_treealign.args_guidetree_clean], fasta, meta_guidetree_treealign.guidetree] + alignment: [ meta + ["alignment":meta_alignment.alignment, "args_alignment":meta_alignment.args_alignment, "args_alignment_clean":meta_alignment.args_alignment_clean], fasta, meta_alignment.alignment] } .set { ch_fasta_tools } @@ -242,6 +234,9 @@ workflow MULTIPLESEQUENCEALIGN { .unique() .set { ch_fasta_alignment } + ch_fasta_guidetree.dump( tag: 'ch_fasta_guidetree' ) + ch_fasta_alignment.dump( tag: 'ch_fasta_alignment' ) + // // Compute tree // @@ -249,12 +244,29 @@ workflow MULTIPLESEQUENCEALIGN { ch_versions = ch_versions.mix(MSA_GUIDETREE.out.versions) ch_seqs - .combine(MSA_GUIDETREE.out.tree, by:0) // combine by meta ID - .map { meta, fasta, tree -> [ meta.guidetree, meta, fasta, tree ] } - .combine(ch_tools, by: 0) // combine by guidetree + .map { meta, fasta -> + [ meta.id, meta, fasta ] + } + .combine( + MSA_GUIDETREE.out.tree + .map { meta, tree -> + [ meta.id, meta, tree ] + } + , by:0 + ) // combine by meta ID + .map { meta_id, meta_fasta, fasta, meta_tree, tree -> + [ meta_tree.guidetree, meta_tree, fasta, tree ] + } + .combine( + ch_tools + .map { meta_guidetree_treealign, meta_alignment -> + [ meta_guidetree_treealign.guidetree, meta_guidetree_treealign ] + } + , by: 0 + ) // combine by guidetree .map { - guidetree, meta, fasta, tree, args_guidetree, treealign, args_treealign, alignment, args_alignment -> - [meta + ["treealign":treealign, "args_treealign":args_treealign], fasta, tree, treealign] + guidetree, meta, fasta, tree, meta_guidetree_treealign -> + [meta + ["treealign":meta_guidetree_treealign.treealign, "args_treealign":meta_guidetree_treealign.args_treealign, "args_treealign_clean":meta_guidetree_treealign.args_treealign_lean], fasta, tree, meta_guidetree_treealign.treealign] } .multiMap { meta, fasta, tree, treealign ->