diff --git a/modules/nf-core/gprofiler2/gost/main.nf b/modules/nf-core/gprofiler2/gost/main.nf index 8428f514b74..5e8055f7d55 100644 --- a/modules/nf-core/gprofiler2/gost/main.nf +++ b/modules/nf-core/gprofiler2/gost/main.nf @@ -9,8 +9,8 @@ process GPROFILER2_GOST { input: tuple val(meta), path(de_file) - path(gmt_file) - path(background_file) + tuple val(meta2), path(gmt_file) + tuple val(meta3), path(background_file) output: tuple val(meta), path("*.gprofiler2.all_enriched_pathways.tsv") , emit: all_enrich diff --git a/modules/nf-core/gprofiler2/gost/meta.yml b/modules/nf-core/gprofiler2/gost/meta.yml index 33c9bff24b3..2ec9318ba81 100644 --- a/modules/nf-core/gprofiler2/gost/meta.yml +++ b/modules/nf-core/gprofiler2/gost/meta.yml @@ -27,12 +27,18 @@ input: pattern: "*.{csv,tsv}" description: | CSV or TSV-format tabular file with differential analysis outputs - - - gmt_file: + - - meta2: + type: map + description: Groovy map + - gmt_file: type: file pattern: "*.gmt" description: | Path to a GMT file downloaded from g:profiler that should be queried instead of the online databases - - - background_file: + - - meta3: + type: map + description: Groovy map + - background_file: type: file pattern: "*.{csv,tsv,txt}" description: | diff --git a/modules/nf-core/gprofiler2/gost/tests/main.nf.test b/modules/nf-core/gprofiler2/gost/tests/main.nf.test index 28e9e751395..e2528ef2fe0 100644 --- a/modules/nf-core/gprofiler2/gost/tests/main.nf.test +++ b/modules/nf-core/gprofiler2/gost/tests/main.nf.test @@ -55,8 +55,14 @@ nextflow_process { ['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/Condition_genotype_WT_KO.deseq2.results_filtered.tsv", checkIfExists: true) ] - input[1] = file(params.modules_testdata_base_path + "genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists: true) - input[2] = file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/study.filtered.tsv", checkIfExists: true) + input[1] = [ + ['id': 'test'], + file(params.modules_testdata_base_path + "genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists: true) + ] + input[2] = [ + ['id': 'test'], + file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/study.filtered.tsv", checkIfExists: true) + ] """ } } @@ -66,9 +72,9 @@ nextflow_process { { assert process.success }, { assert snapshot( process.out.all_enrich, - process.out.plot_png, process.out.sub_enrich, - process.out.sub_plot, + file(process.out.plot_png[0][1]).name, //assert unstable file + process.out.sub_plot[0][1].collect{ file(it).name }, //assert unstable file process.out.filtered_gmt, process.out.session_info.collect{ meta,session_info -> file(session_info).name }, //assert unstable file process.out.versions, @@ -94,8 +100,14 @@ nextflow_process { ['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/Condition_genotype_WT_KO.deseq2.results_filtered.tsv", checkIfExists: true) ] - input[1] = file(params.modules_testdata_base_path + "genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists: true) - input[2] = file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/study.filtered.tsv", checkIfExists: true) + input[1] = [ + ['id': 'test'], + file(params.modules_testdata_base_path + "genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists: true) + ] + input[2] = [ + ['id': 'test'], + file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/study.filtered.tsv", checkIfExists: true) + ] """ } } diff --git a/modules/nf-core/gprofiler2/gost/tests/main.nf.test.snap b/modules/nf-core/gprofiler2/gost/tests/main.nf.test.snap index 0a052d5cca3..eec8d87cc62 100644 --- a/modules/nf-core/gprofiler2/gost/tests/main.nf.test.snap +++ b/modules/nf-core/gprofiler2/gost/tests/main.nf.test.snap @@ -13,18 +13,6 @@ "Condition_genotype_WT_KO.gprofiler2.all_enriched_pathways.tsv:md5,1134a02ca061c463bcbff277eefbfb19" ] ], - [ - [ - { - "id": "Condition_genotype_WT_KO", - "variable": "Condition genotype", - "reference": "WT", - "target": "KO", - "blocking": "batch" - }, - "Condition_genotype_WT_KO.gprofiler2.gostplot.png:md5,4b83d1bcf7dc9dbf6cef3d545e440c5b" - ] - ], [ [ { @@ -47,27 +35,17 @@ ] ] ], + "Condition_genotype_WT_KO.gprofiler2.gostplot.png", [ - [ - { - "id": "Condition_genotype_WT_KO", - "variable": "Condition genotype", - "reference": "WT", - "target": "KO", - "blocking": "batch" - }, - [ - "Condition_genotype_WT_KO.gprofiler2.GO:BP.sub_enriched_pathways.png:md5,d89498267e985adf0ad1266e2deb9f48", - "Condition_genotype_WT_KO.gprofiler2.GO:CC.sub_enriched_pathways.png:md5,e04cdd51b200671613254d021d3af242", - "Condition_genotype_WT_KO.gprofiler2.GO:MF.sub_enriched_pathways.png:md5,33ea0652d78111978677acde0fe7f807", - "Condition_genotype_WT_KO.gprofiler2.HP.sub_enriched_pathways.png:md5,6c040ac4baba73ae5637b00650e6aea1", - "Condition_genotype_WT_KO.gprofiler2.KEGG.sub_enriched_pathways.png:md5,fbd232c4eeced95ceda60b43a02dbe1f", - "Condition_genotype_WT_KO.gprofiler2.MIRNA.sub_enriched_pathways.png:md5,956880d3bf4852a06b0ffaaaba565732", - "Condition_genotype_WT_KO.gprofiler2.REAC.sub_enriched_pathways.png:md5,0e8f9217d275668986771dc7fede3170", - "Condition_genotype_WT_KO.gprofiler2.TF.sub_enriched_pathways.png:md5,0697164bc87e95e6508db966df94e01e", - "Condition_genotype_WT_KO.gprofiler2.WP.sub_enriched_pathways.png:md5,09976762c7541ff9e5009e8763986845" - ] - ] + "Condition_genotype_WT_KO.gprofiler2.GO:BP.sub_enriched_pathways.png", + "Condition_genotype_WT_KO.gprofiler2.GO:CC.sub_enriched_pathways.png", + "Condition_genotype_WT_KO.gprofiler2.GO:MF.sub_enriched_pathways.png", + "Condition_genotype_WT_KO.gprofiler2.HP.sub_enriched_pathways.png", + "Condition_genotype_WT_KO.gprofiler2.KEGG.sub_enriched_pathways.png", + "Condition_genotype_WT_KO.gprofiler2.MIRNA.sub_enriched_pathways.png", + "Condition_genotype_WT_KO.gprofiler2.REAC.sub_enriched_pathways.png", + "Condition_genotype_WT_KO.gprofiler2.TF.sub_enriched_pathways.png", + "Condition_genotype_WT_KO.gprofiler2.WP.sub_enriched_pathways.png" ], [ @@ -89,7 +67,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2025-01-09T13:43:18.555455129" + "timestamp": "2025-01-21T11:29:54.746689985" }, "stub": { "content": [ @@ -298,6 +276,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2025-01-09T13:43:36.462475057" + "timestamp": "2025-01-21T11:31:33.394855046" } } \ No newline at end of file diff --git a/modules/nf-core/gsea/gsea/main.nf b/modules/nf-core/gsea/gsea/main.nf index 2cc542dc989..494b8a5d9e4 100644 --- a/modules/nf-core/gsea/gsea/main.nf +++ b/modules/nf-core/gsea/gsea/main.nf @@ -10,7 +10,7 @@ process GSEA_GSEA { input: tuple val(meta), path(gct), path(cls), path(gene_sets) tuple val(reference), val(target) - path(chip) // Optional identifier mapping file + tuple val(meta2), path(chip) // Optional identifier mapping file output: tuple val(meta), path("*.rpt") , emit: rpt diff --git a/modules/nf-core/gsea/gsea/meta.yml b/modules/nf-core/gsea/gsea/meta.yml index 937d3a6ac4b..41a22941ccd 100644 --- a/modules/nf-core/gsea/gsea/meta.yml +++ b/modules/nf-core/gsea/gsea/meta.yml @@ -40,7 +40,10 @@ input: description: | String indicating which of the classes in the cls file should be used as the target level of the comparison. - - - chip: + - - meta2: + type: map + description: Groovy map + - chip: type: file description: | optional Broad-style chip file mapping identifiers in gct to diff --git a/modules/nf-core/gsea/gsea/tests/main.nf.test b/modules/nf-core/gsea/gsea/tests/main.nf.test index 869cf0ca0ef..1f97e97aec0 100644 --- a/modules/nf-core/gsea/gsea/tests/main.nf.test +++ b/modules/nf-core/gsea/gsea/tests/main.nf.test @@ -17,7 +17,10 @@ nextflow_process { """ input[0] = [['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_treatment_Control_Treated.gct", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_genotype_WT_KO.cls", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists:true)] input[1] = ['WT', 'KO'] - input[2] = file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true) + input[2] = [ + ['id': 'test'], + file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true) + ] """ } } @@ -63,7 +66,10 @@ nextflow_process { """ input[0] = [['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_treatment_Control_Treated.gct", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_genotype_WT_KO.cls", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists:true)] input[1] = ['WT', 'KO'] - input[2] = file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true) + input[2] = [ + ['id': 'test'], + file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true) + ] """ } } diff --git a/subworkflows/nf-core/differential_functional_enrichment/main.nf b/subworkflows/nf-core/differential_functional_enrichment/main.nf new file mode 100644 index 00000000000..ff18aedc0f6 --- /dev/null +++ b/subworkflows/nf-core/differential_functional_enrichment/main.nf @@ -0,0 +1,149 @@ + +// +// Perform enrichment analysis +// +include { GPROFILER2_GOST } from "../../../modules/nf-core/gprofiler2/gost/main.nf" +include { CUSTOM_TABULARTOGSEAGCT } from '../../../modules/nf-core/custom/tabulartogseagct/main.nf' +include { CUSTOM_TABULARTOGSEACLS } from '../../../modules/nf-core/custom/tabulartogseacls/main.nf' +include { CUSTOM_TABULARTOGSEACHIP } from '../../../modules/nf-core/custom/tabulartogseachip/main.nf' +include { GSEA_GSEA } from '../../../modules/nf-core/gsea/gsea/main.nf' +include { PROPR_GREA } from "../../../modules/nf-core/propr/grea/main.nf" + +// Combine meta maps, including merging non-identical values of shared keys (e.g. 'id') +def mergeMaps(meta, meta2){ + (meta + meta2).collectEntries { k, v -> + meta[k] && meta[k] != v ? [k, "${meta[k]}_${v}"] : [k, v] + } +} + +workflow DIFFERENTIAL_FUNCTIONAL_ENRICHMENT { + take: + // input data for functional analysis + // Note that genesets and background are optional depending on the method. + // Please set to [] if not provided, eg: [meta, input, [], [], method] + ch_input // [ meta_input, input file, genesets file, background file, method to run ] + + // other - for the moment these files are only needed for GSEA + // as it is the only one that takes expression data as input + // if in the future this setting is changed, this section could be removed + ch_contrasts // [ meta_contrast, contrast_variable, reference, target ] + ch_samplesheet // [ meta_exp, samples sheet ] + ch_featuresheet // [ meta_exp, features sheet, features id, features symbol ] + + main: + + ch_versions = Channel.empty() + + // Add method information into meta map of ch_input + // This information is used later to determine which method to run for each input + // Also, reorganize the structure to match them with the modules' input organization + + ch_input = ch_input + .multiMap { + meta_input, file, genesets, background, analysis_method -> + def meta_new = meta_input + [ 'method': analysis_method ] + input: + [ meta_new, file ] + genesets: + [ meta_new, genesets ] // NOTE here we assume that the modules will not make use of meta_genesets and meta_background + background: + [ meta_new, background ] + } + + // In the case of GSEA, it needs additional files coming from other channels that other methods don't use + // here we define the input channel for the GSEA section + + def criteria = multiMapCriteria { meta_input, input, genesets, meta_exp, samplesheet, featuresheet, features_id, features_symbol, meta_contrasts, variable, reference, target -> + def meta_contrasts_new = meta_contrasts + [ 'variable': variable, 'reference': reference, 'target': target ] // make sure variable, reference, target are in the meta + def meta_all = mergeMaps(meta_contrasts_new, meta_input) + input: + [ meta_all, input ] + genesets: + [ meta_all, genesets ] + contrasts_and_samples: + [ meta_all, samplesheet ] + features: + [ meta_exp, featuresheet ] + features_cols: + [ features_id, features_symbol ] + } + ch_preinput_for_gsea = ch_input.input + .join(ch_input.genesets) + .filter{ it[0].method == 'gsea' } + .combine(ch_samplesheet.join(ch_featuresheet)) + .combine(ch_contrasts) + .multiMap(criteria) + + // ---------------------------------------------------- + // Perform enrichment analysis with gprofiler2 + // ---------------------------------------------------- + + GPROFILER2_GOST( + ch_input.input.filter{ it[0].method == 'gprofiler2' }, + ch_input.genesets.filter{ it[0].method == 'gprofiler2'}, + ch_input.background.filter{ it[0].method == 'gprofiler2'} + ) + + // ---------------------------------------------------- + // Perform enrichment analysis with GSEA + // ---------------------------------------------------- + + // NOTE that GCT input can be more than 1, if they come from different tools (eg. limma, deseq2). + // CLS input can be as many as combinations of input x contrasts + // Whereas features can be only one file. + + CUSTOM_TABULARTOGSEAGCT(ch_preinput_for_gsea.input) + + CUSTOM_TABULARTOGSEACLS(ch_preinput_for_gsea.contrasts_and_samples) + + CUSTOM_TABULARTOGSEACHIP( + ch_preinput_for_gsea.features.first(), + ch_preinput_for_gsea.features_cols.first() + ) + + ch_input_for_gsea = CUSTOM_TABULARTOGSEAGCT.out.gct + .join(CUSTOM_TABULARTOGSEACLS.out.cls) + .join( ch_preinput_for_gsea.genesets ) + + GSEA_GSEA( + ch_input_for_gsea, + ch_input_for_gsea.map{ tuple(it[0].reference, it[0].target) }, + CUSTOM_TABULARTOGSEACHIP.out.chip.first() + ) + + // ---------------------------------------------------- + // Perform enrichment analysis with GREA + // ---------------------------------------------------- + + PROPR_GREA( + ch_input.input.filter{ it[0].method == 'grea' }, + ch_input.genesets.filter{ it[0].method == 'grea' } + ) + + // collect versions info + ch_versions = ch_versions + .mix(GPROFILER2_GOST.out.versions) + .mix(CUSTOM_TABULARTOGSEAGCT.out.versions) + .mix(CUSTOM_TABULARTOGSEACLS.out.versions) + .mix(CUSTOM_TABULARTOGSEACHIP.out.versions) + .mix(GSEA_GSEA.out.versions) + .mix(PROPR_GREA.out.versions) + + emit: + // here we emit the outputs that will be useful afterwards in the + // nf-core/differentialabundance pipeline + + // gprofiler2-specific outputs + gprofiler2_all_enrich = GPROFILER2_GOST.out.all_enrich + gprofiler2_sub_enrich = GPROFILER2_GOST.out.sub_enrich + gprofiler2_plot_html = GPROFILER2_GOST.out.plot_html + + // gsea-specific outputs + gsea_report = GSEA_GSEA.out.report_tsvs_ref.join(GSEA_GSEA.out.report_tsvs_target) + + // grea-specific outputs + grea_results = PROPR_GREA.out.results + + // tool versions + versions = ch_versions +} diff --git a/subworkflows/nf-core/differential_functional_enrichment/meta.yml b/subworkflows/nf-core/differential_functional_enrichment/meta.yml new file mode 100644 index 00000000000..e296e3fe54e --- /dev/null +++ b/subworkflows/nf-core/differential_functional_enrichment/meta.yml @@ -0,0 +1,151 @@ +name: "differential_functional_enrichment" +description: Run functional analysis on differential abundance analysis output +keywords: + - functional analysis + - functional enrichment + - differential + - over-representation analysis +components: + - gprofiler2/gost + - gsea/gsea + - propr/grea +input: + - ch_input: + description: Channel with the input data for functional analysis. + structure: + - meta_input: + type: map + description: Metadata map + - input: + type: file + description: | + Input file. This should be the DE statistics obtained from the DE modules, + or the normalized abundance matrix (in the case of running GSEA). + - genesets: + type: file + description: | + Gene sets database. Currently all methods support GMT format. + - background: + type: file + description: | + Background features for functional analysis. + For the moment, it is only required for gprofiler2. + - analysis_method: + type: value + description: Analysis method (gprofiler2, gsea, or grea) + - ch_contrasts: + description: Channel with contrast information + structure: + - meta_contrast: + type: map + description: Contrast metadata map + - contrast_variable: + type: value + description: Contrast variable + - reference: + type: value + description: Reference level + - target: + type: value + description: Target level + - ch_samplesheet: + description: Channel with sample information + structure: + - meta_exp: + type: map + description: Experiment metadata map + - samplesheet: + type: file + description: Sample information file + pattern: "*.{csv,tsv}" + - ch_featuresheet: + description: Channel with features information + structure: + - meta_exp: + type: map + description: Experiment metadata map + - features: + type: file + description: Features information file + pattern: "*.{csv,tsv}" + - features_id: + type: value + description: Features id column + - features_symbol: + type: value + description: Features symbol column + +output: + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" + - gprofiler2_all_enrich: + description: Channel containing the main enrichment table output from gprofiler2 + structure: + - meta: + type: map + description: Metadata map + - all_enrich: + type: file + description: table listing all enriched pathways that were found by gprofiler2. + It can be empty, if none is found. + pattern: "*.gprofiler2.all_enriched_pathways.tsv" + - gprofiler2_sub_enrich: + description: Channel containing the secondary enrichment table output from gprofiler. + structure: + - meta: + type: map + description: Metadata map + - sub_enrich: + type: file + description: | + table listing enriched pathways that were found from one particular source. + Note that it will only be created if any were found. + pattern: "*.gprofiler2.*.sub_enriched_pathways.tsv" + - gprofiler2_plot_html: + description: Channel containing the html report generated from gprofiler2. + structure: + - meta: + type: map + description: Metadata map + - plot_html: + type: file + description: | + Channel containing HTML file; interactive Manhattan plot of all enriched pathways. + Note that this file will only be generated if enriched pathways were found. + pattern: "*.gprofiler2.gostplot.html" + - gsea_report: + description: Channel containing all the output from GSEA needed for further reporting. + structure: + - meta: + type: map + description: Metadata map + - reports_ref: + type: file + description: Main TSV results report file for the reference group. + pattern: "*gsea_report_for_${reference}.tsv" + - reports_target: + type: file + description: Main TSV results report file for the target group. + pattern: "*gsea_report_for_${target}.tsv" + - grea_results: + description: | + Channel containing the output from GREA. + structure: + - meta: + type: map + description: Metadata map + - results: + type: file + description: Main TSV results file. + pattern: "*.grea.tsv" +authors: + - "@suzannejin" + - "@bjlang" + - "@caraiz2001" +maintainers: + - "@suzannejin" + - "@pinin4fjords" diff --git a/subworkflows/nf-core/differential_functional_enrichment/tests/all.config b/subworkflows/nf-core/differential_functional_enrichment/tests/all.config new file mode 100644 index 00000000000..7f092f0d879 --- /dev/null +++ b/subworkflows/nf-core/differential_functional_enrichment/tests/all.config @@ -0,0 +1,64 @@ +process { + + cpus = 1 + + withName: 'DESEQ2_DIFFERENTIAL' { + ext.args = { [ + "--round_digits 5", + "--vs_method rlog", + (meta.blocking == null) ? "" : "--blocking_variables $meta.blocking" + ].join(' ').trim() } + ext.prefix = { "${meta.id}_${meta.method}" } + } + + withName: 'DESEQ2_NORM' { + ext.prefix = { "${meta.id}_${meta.method}_norm --round_digits 5" } + } + + withName: 'LIMMA_DIFFERENTIAL' { + ext.args = { [ + "--blocking_variables $meta.blocking", + "--sample_id_col sample", + "--probe_id_col gene_id", + "--use_voom TRUE" + ].join(' ').trim() } + ext.prefix = { "${meta.id}_${meta.method}_voom" } + } + + withName: 'LIMMA_NORM' { + ext.prefix = { "${meta.id}_${meta.method}_voom_norm" } + } + + withName: "PROPR_PROPD"{ + ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"} + } + + withName: 'GPROFILER2_GOST' { + ext.args = [ + "--significant true", + "--measure_underrepresentation false", + "--correction_method gSCS", + "--evcodes false", + "--pval_threshold 0.05", + "--domain_scope annotated", + "--min_diff 1", + "--round_digits 4", + "--palette_name Blues", + "--de_id_column gene_id", + "--organism mmusculus" + ].join(' ').trim() + } + + withName: 'CUSTOM_TABULARTOGSEACLS' { + ext.args = { [ "variable": "$meta.variable" ] } + } + + withName: 'GSEA_GSEA' { + ext.args = { "-rnd_seed 10" } + ext.prefix = { "${meta.id}.${gene_sets.baseName}." } + } + + withName: "PROPR_GREA"{ + ext.args = { "--permutation 10 --set_min 10 --seed 123 --round_digits 5"} + } +} diff --git a/subworkflows/nf-core/differential_functional_enrichment/tests/deseq2_gprofiler2.config b/subworkflows/nf-core/differential_functional_enrichment/tests/deseq2_gprofiler2.config new file mode 100644 index 00000000000..458bb6dda14 --- /dev/null +++ b/subworkflows/nf-core/differential_functional_enrichment/tests/deseq2_gprofiler2.config @@ -0,0 +1,30 @@ +process { + withName: 'DESEQ2_DIFFERENTIAL' { + ext.args = { [ + "--round_digits 5", + "--vs_method rlog", + (meta.blocking == null) ? "" : "--blocking_variables $meta.blocking" + ].join(' ').trim() } + ext.prefix = { "${meta.id}_${meta.method}" } + } + + withName: 'DESEQ2_NORM' { + ext.prefix = { "${meta.id}_${meta.method}_norm --round_digits 5" } + } + + withName: 'GPROFILER2_GOST' { + ext.args = [ + "--significant true", + "--measure_underrepresentation false", + "--correction_method gSCS", + "--evcodes false", + "--pval_threshold 0.05", + "--domain_scope annotated", + "--min_diff 1", + "--round_digits 4", + "--palette_name Blues", + "--de_id_column gene_id", + "--organism mmusculus" + ].join(' ').trim() + } +} diff --git a/subworkflows/nf-core/differential_functional_enrichment/tests/deseq2_gsea.config b/subworkflows/nf-core/differential_functional_enrichment/tests/deseq2_gsea.config new file mode 100644 index 00000000000..4ec648e6cb8 --- /dev/null +++ b/subworkflows/nf-core/differential_functional_enrichment/tests/deseq2_gsea.config @@ -0,0 +1,23 @@ +process { + withName: 'DESEQ2_DIFFERENTIAL' { + ext.args = { [ + "--round_digits 5", + "--vs_method rlog", + (meta.blocking == null) ? "" : "--blocking_variables $meta.blocking" + ].join(' ').trim() } + ext.prefix = { "${meta.id}_${meta.method}" } + } + + withName: 'DESEQ2_NORM' { + ext.prefix = { "${meta.id}_${meta.method}_norm --round_digits 5" } + } + + withName: 'CUSTOM_TABULARTOGSEACLS' { + ext.args = { [ "variable": "$meta.variable" ] } + } + + withName: 'GSEA_GSEA' { + ext.args = { "-rnd_seed 10" } + ext.prefix = { "${meta.id}.${gene_sets.baseName}." } + } +} diff --git a/subworkflows/nf-core/differential_functional_enrichment/tests/gprofiler2.config b/subworkflows/nf-core/differential_functional_enrichment/tests/gprofiler2.config new file mode 100644 index 00000000000..1fe3a1c9b48 --- /dev/null +++ b/subworkflows/nf-core/differential_functional_enrichment/tests/gprofiler2.config @@ -0,0 +1,17 @@ +process { + withName: 'GPROFILER2_GOST' { + ext.args = [ + "--significant true", + "--measure_underrepresentation false", + "--correction_method gSCS", + "--evcodes false", + "--pval_threshold 0.05", + "--domain_scope annotated", + "--min_diff 1", + "--round_digits 4", + "--palette_name Blues", + "--de_id_column gene_id", + "--organism mmusculus" + ].join(' ').trim() + } +} diff --git a/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test b/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test new file mode 100644 index 00000000000..e1138d4f79e --- /dev/null +++ b/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test @@ -0,0 +1,490 @@ +nextflow_workflow { + + name "Test Subworkflow DIFFERENTIAL_FUNCTIONAL_ENRICHMENT" + script "../main.nf" + workflow "DIFFERENTIAL_FUNCTIONAL_ENRICHMENT" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/differential_functional_enrichment" + tag "gprofiler2" + tag "gprofiler2/gost" + tag "gsea" + tag "gsea/gsea" + tag "propr" + tag "propr/grea" + + test("test gprofiler2 - mouse") { + tag 'gprofiler2_basic' + config './gprofiler2.config' + // NOTE that this generates the same outputs as the module GPROFILER2_GOST test + + when { + workflow { + """ + ch_input = Channel.of([ + ['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], + file("https://github.com/nf-core/test-datasets/raw/refs/heads/differentialabundance/modules_testdata/Condition_genotype_WT_KO.deseq2.results_filtered.tsv", checkIfExists: true), + [], + file("https://github.com/nf-core/test-datasets/raw/refs/heads/differentialabundance/modules_testdata/study.filtered.tsv", checkIfExists: true), + 'gprofiler2' + ]) + + input[0] = ch_input + input[1] = Channel.of([[], [], [], []]) + input[2] = Channel.of([[], []]) + input[3] = Channel.of([[], [], [], []]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.gprofiler2_all_enrich, + workflow.out.gprofiler2_sub_enrich, + workflow.out.gprofiler2_plot_html.collect{ meta, html -> file(html).name }, //assert unstable file, + path(workflow.out.versions.get(0)).yaml, + workflow.out.versions + ).match()} + ) + } + } + + test("deseq2 + gprofiler2 - mouse") { + tag "deseq2+gprofiler2" + config "./deseq2_gprofiler2.config" + + setup { + run("ABUNDANCE_DIFFERENTIAL_FILTER") { + script "../../abundance_differential_filter/main.nf" + workflow { + """ + // Define test data + def testData = [ + expression_test_data_dir: params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/', + contrasts_file: 'SRP254919.contrasts.csv', + abundance_file: 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', + samplesheet_file: 'SRP254919.samplesheet.csv' + ] + + // Define inputs + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.samplesheet_file) + ]) + ch_transcript_lengths = Channel.of([ [], [] ]) + ch_control_features = Channel.of([ [], [] ]) + ch_contrasts = Channel.fromPath(file(testData.expression_test_data_dir + testData.contrasts_file)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + ch_input = Channel.of([ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.abundance_file), + 'deseq2', // analysis method + 1.5, // FC threshold + 0.05 // padj threshold + ]) + + input[0] = ch_input + input[1] = ch_samplesheet + input[2] = ch_transcript_lengths + input[3] = ch_control_features + input[4] = ch_contrasts + """ + } + } + } + + when { + workflow { + """ + ch_input = ABUNDANCE_DIFFERENTIAL_FILTER.out.results_genewise_filtered + .map { meta, results -> + def meta_new = meta - ['method': meta.method] + meta_new.method_de = meta.method + [meta_new, results, [], [], 'gprofiler2'] + } + + input[0] = ch_input + input[1] = Channel.of([[], [], [], []]) + input[2] = Channel.of([[], []]) + input[3] = Channel.of([[], [], [], []]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.gprofiler2_all_enrich, + workflow.out.gprofiler2_sub_enrich, + workflow.out.gprofiler2_plot_html.collect{ meta, html -> file(html).name }, //assert unstable file + path(workflow.out.versions.get(0)).yaml, + workflow.out.versions + ).match()}, + ) + } + } + + test("deseq2 + gsea - mouse") { + tag "deseq2+gsea" + config "./deseq2_gsea.config" + + setup { + run("ABUNDANCE_DIFFERENTIAL_FILTER") { + script "../../abundance_differential_filter/main.nf" + workflow { + """ + // Define test data + def testData = [ + expression_test_data_dir: params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/', + contrasts_file: 'SRP254919.contrasts.csv', + abundance_file: 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', + samplesheet_file: 'SRP254919.samplesheet.csv' + ] + + // Define inputs + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.samplesheet_file) + ]) + ch_transcript_lengths = Channel.of([ [], [] ]) + ch_control_features = Channel.of([ [], [] ]) + ch_contrasts = Channel.fromPath(file(testData.expression_test_data_dir + testData.contrasts_file)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + ch_input = Channel.of([ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.abundance_file), + 'deseq2', // analysis method + 1.5, // FC threshold + 0.05 // padj threshold + ]) + + input[0] = ch_input + input[1] = ch_samplesheet + input[2] = ch_transcript_lengths + input[3] = ch_control_features + input[4] = ch_contrasts + """ + } + } + } + + when { + workflow { + """ + // Define test data + def test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/' + def testData = [ + contrasts_file: test_data_dir + 'rnaseq_expression/SRP254919.contrasts.csv', + abundance_file: test_data_dir + 'rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv', + samplesheet_file: test_data_dir + 'rnaseq_expression/SRP254919.samplesheet.csv', + genesets_file: test_data_dir + 'gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt' + ] + + // define input channels + ch_input = ABUNDANCE_DIFFERENTIAL_FILTER.out.normalised_matrix + .combine(Channel.fromPath(testData.genesets_file)) + .map { meta, matrix, genesets -> + def meta_new = meta - ['method': meta.method] + meta_new.method_de = meta.method + [meta_new, matrix, genesets, [], 'gsea'] + } + ch_contrasts = Channel.fromPath(file(testData.contrasts_file)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(testData.samplesheet_file) + ]) + ch_featuresheet = Channel.of([ + [ id:'test' ], + file(testData.abundance_file), + 'gene_id', + 'gene_name' + ]) + + input[0] = ch_input + input[1] = ch_contrasts + input[2] = ch_samplesheet + input[3] = ch_featuresheet + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.gsea_report, + path(workflow.out.versions.get(0)).yaml, + workflow.out.versions + ).match()} + ) + } + } + + test("propd + grea - mouse") { + tag "propd+grea" + config "./propd_grea.config" + + setup { + run("ABUNDANCE_DIFFERENTIAL_FILTER") { + script "../../abundance_differential_filter/main.nf" + workflow { + """ + // Define test data + def testData = [ + expression_test_data_dir: params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/', + contrasts_file: 'SRP254919.contrasts.csv', + abundance_file: 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', + samplesheet_file: 'SRP254919.samplesheet.csv' + ] + + // Define inputs + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.samplesheet_file) + ]) + ch_transcript_lengths = Channel.of([ [], [] ]) + ch_control_features = Channel.of([ [], [] ]) + ch_contrasts = Channel.fromPath(file(testData.expression_test_data_dir + testData.contrasts_file)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + ch_input = Channel.of([ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.abundance_file), + 'propd', // analysis method + 1.5, // FC threshold + 100 // stat threshold + ]) + + input[0] = ch_input + input[1] = ch_samplesheet + input[2] = ch_transcript_lengths + input[3] = ch_control_features + input[4] = ch_contrasts + """ + } + } + } + + when { + workflow { + """ + ch_input = ABUNDANCE_DIFFERENTIAL_FILTER.out.adjacency + .combine(Channel.fromPath(params.modules_testdata_base_path + 'genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt')) + .map { meta, results, genesets -> + def meta_new = meta - ['method': meta.method] + meta_new.method_de = meta.method + [meta_new, results, genesets, [], 'grea'] + } + + input[0] = ch_input + input[1] = Channel.of([[], [], [], []]) + input[2] = Channel.of([[], []]) + input[3] = Channel.of([[], [], [], []]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.grea_results, + path(workflow.out.versions.get(0)).yaml, + workflow.out.versions + ).match()}, + ) + } + } + + test("deseq2|limmavoom|propd + gprofiler2|gsea|gsea - mouse") { + tag 'all' + config './all.config' + + setup { + run("ABUNDANCE_DIFFERENTIAL_FILTER") { + script "../../abundance_differential_filter/main.nf" + workflow { + """ + // Define test data + def testData = [ + expression_test_data_dir: params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/', + contrasts_file: 'SRP254919.contrasts.csv', + abundance_file: 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', + samplesheet_file: 'SRP254919.samplesheet.csv' + ] + + // Define inputs + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.samplesheet_file) + ]) + ch_transcript_lengths = Channel.of([ [], [] ]) + ch_control_features = Channel.of([ [], [] ]) + ch_contrasts = Channel.fromPath(file(testData.expression_test_data_dir + testData.contrasts_file)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + ch_input = Channel.of( + [ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.abundance_file), + 'deseq2', + 1.5, // FC threshold + 0.05 // stat (adjusted p-value) threshold + ], + [ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.abundance_file), + 'limma', + 1.5, // FC threshold + 0.05 // stat (adjusted p-value) threshold + ], + [ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.abundance_file), + 'propd', + 1.5, // FC threshold + 100 // stat (weighted connectivity) threshold + ] + ) + + input[0] = ch_input + input[1] = ch_samplesheet + input[2] = ch_transcript_lengths + input[3] = ch_control_features + input[4] = ch_contrasts + """ + } + } + } + + when { + workflow { + """ + // Define test data + def test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/' + def testData = [ + contrasts_file: test_data_dir + 'rnaseq_expression/SRP254919.contrasts.csv', + abundance_file: test_data_dir + 'rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv', + samplesheet_file: test_data_dir + 'rnaseq_expression/SRP254919.samplesheet.csv', + genesets_file: test_data_dir + 'gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt' + ] + + // define input channels + ch_input_for_gprofiler2 = ABUNDANCE_DIFFERENTIAL_FILTER.out.results_genewise_filtered + .map { meta, results -> + def meta_new = meta - ['method': meta.method] + meta_new.method_de = meta.method + [meta_new, results, 'gprofiler2'] + } + ch_input_for_gsea = ABUNDANCE_DIFFERENTIAL_FILTER.out.normalised_matrix + .map { meta, results -> + def meta_new = meta - ['method': meta.method] + meta_new.method_de = meta.method + [meta_new, results, 'gsea'] + } + ch_input_for_grea = ABUNDANCE_DIFFERENTIAL_FILTER.out.adjacency + .map { meta, results -> + def meta_new = meta - ['method': meta.method] + meta_new.method_de = meta.method + [meta_new, results, 'grea'] + } + ch_input = ch_input_for_gprofiler2 + .mix(ch_input_for_gsea) + .mix(ch_input_for_grea) + .combine(Channel.fromPath(testData.genesets_file)) + .map { meta, input_file, method, genesets -> + [meta, input_file, genesets, [], method] + } + + // define gsea-specific inputs + ch_contrasts = Channel.fromPath(file(testData.contrasts_file)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(testData.samplesheet_file) + ]) + ch_featuresheet = Channel.of([ + [ id:'test' ], + file(testData.abundance_file), + 'gene_id', + 'gene_name' + ]) + + input[0] = ch_input + input[1] = ch_contrasts + input[2] = ch_samplesheet + input[3] = ch_featuresheet + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.gprofiler2_all_enrich, + workflow.out.gprofiler2_sub_enrich, + workflow.out.gprofiler2_plot_html.collect{ meta, html -> file(html).name }, //assert unstable file + workflow.out.gsea_report, + workflow.out.grea_results, + workflow.out.versions + ).match()}, + ) + } + + } + + test("stub") { + tag 'gprofiler2_basic' + config './gprofiler2.config' + options '-stub' + + when { + workflow { + """ + ch_input = Channel.of([ + ['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], + file("https://github.com/nf-core/test-datasets/raw/refs/heads/differentialabundance/modules_testdata/Condition_genotype_WT_KO.deseq2.results_filtered.tsv", checkIfExists: true), + [], + file("https://github.com/nf-core/test-datasets/raw/refs/heads/differentialabundance/modules_testdata/study.filtered.tsv", checkIfExists: true), + 'gprofiler2' + ]) + + input[0] = ch_input + input[1] = Channel.of([[], [], [], []]) + input[2] = Channel.of([[], []]) + input[3] = Channel.of([[], [], [], []]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + +} diff --git a/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test.snap b/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test.snap new file mode 100644 index 00000000000..5591114c959 --- /dev/null +++ b/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test.snap @@ -0,0 +1,600 @@ +{ + "deseq2|limmavoom|propd + gprofiler2|gsea|gsea - mouse": { + "content": [ + [ + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "deseq2", + "method": "gprofiler2" + }, + "treatment_mCherry_hND6__test.gprofiler2.all_enriched_pathways.tsv:md5,d586d92fb6cc3e41d493d14c4d9a7d92" + ], + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "limma", + "method": "gprofiler2" + }, + "treatment_mCherry_hND6__test.gprofiler2.all_enriched_pathways.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "fc_threshold": 1.5, + "stat_threshold": 100, + "method_de": "propd", + "method": "gprofiler2" + }, + "treatment_mCherry_hND6__test.gprofiler2.all_enriched_pathways.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "deseq2", + "method": "gprofiler2" + }, + "treatment_mCherry_hND6_sample_number_test.gprofiler2.all_enriched_pathways.tsv:md5,fa880c70d06f56d8dad0ac8078e6233c" + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "limma", + "method": "gprofiler2" + }, + "treatment_mCherry_hND6_sample_number_test.gprofiler2.all_enriched_pathways.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "fc_threshold": 1.5, + "stat_threshold": 100, + "method_de": "propd", + "method": "gprofiler2" + }, + "treatment_mCherry_hND6_sample_number_test.gprofiler2.all_enriched_pathways.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "deseq2", + "method": "gprofiler2" + }, + [ + "treatment_mCherry_hND6__test.gprofiler2.GO:BP.sub_enriched_pathways.tsv:md5,4c95059438af872253aa95a5ff127fca", + "treatment_mCherry_hND6__test.gprofiler2.GO:CC.sub_enriched_pathways.tsv:md5,b4e4009e067f723694efc1a46a609e2b", + "treatment_mCherry_hND6__test.gprofiler2.GO:MF.sub_enriched_pathways.tsv:md5,ece72606ce6d1d037cfd5f5b198233a8", + "treatment_mCherry_hND6__test.gprofiler2.REAC.sub_enriched_pathways.tsv:md5,dc7b6f67903e25076ab95a7e1bb39bbe" + ] + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "deseq2", + "method": "gprofiler2" + }, + [ + "treatment_mCherry_hND6_sample_number_test.gprofiler2.GO:BP.sub_enriched_pathways.tsv:md5,dc27af1c941636f8b03e8c8724773725", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.GO:CC.sub_enriched_pathways.tsv:md5,09b7770d59d2efa8a58f300a16a41ae7", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.GO:MF.sub_enriched_pathways.tsv:md5,a261d658ca1b1ec0511e55da734d7810", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.KEGG.sub_enriched_pathways.tsv:md5,3cbdbbb1c5b232ab73d35d4a0953d19c", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.REAC.sub_enriched_pathways.tsv:md5,8472cb5792bf7ec044bb95df356ad47e" + ] + ] + ], + [ + "treatment_mCherry_hND6__test.gprofiler2.gostplot.html", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.gostplot.html" + ], + [ + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "method_de": "deseq2", + "method": "gsea" + }, + "treatment_mCherry_hND6__test.mh.all.v2022.1.Mm.symbols.gsea_report_for_mCherry.tsv:md5,ce8792382ae299749445767ff16aaecc", + "treatment_mCherry_hND6__test.mh.all.v2022.1.Mm.symbols.gsea_report_for_hND6.tsv:md5,354a961c7e1417db2bf1f7e8d00c54f0" + ], + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "method_de": "limma", + "method": "gsea" + }, + "treatment_mCherry_hND6__test.mh.all.v2022.1.Mm.symbols.gsea_report_for_mCherry.tsv:md5,ce8792382ae299749445767ff16aaecc", + "treatment_mCherry_hND6__test.mh.all.v2022.1.Mm.symbols.gsea_report_for_hND6.tsv:md5,a4dcc9581c9c63d35ef32ee0df882074" + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "method_de": "deseq2", + "method": "gsea" + }, + "treatment_mCherry_hND6_sample_number_test.mh.all.v2022.1.Mm.symbols.gsea_report_for_mCherry.tsv:md5,ce8792382ae299749445767ff16aaecc", + "treatment_mCherry_hND6_sample_number_test.mh.all.v2022.1.Mm.symbols.gsea_report_for_hND6.tsv:md5,354a961c7e1417db2bf1f7e8d00c54f0" + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "method_de": "limma", + "method": "gsea" + }, + "treatment_mCherry_hND6_sample_number_test.mh.all.v2022.1.Mm.symbols.gsea_report_for_mCherry.tsv:md5,ce8792382ae299749445767ff16aaecc", + "treatment_mCherry_hND6_sample_number_test.mh.all.v2022.1.Mm.symbols.gsea_report_for_hND6.tsv:md5,a4dcc9581c9c63d35ef32ee0df882074" + ] + ], + [ + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "method_de": "propd", + "method": "grea" + }, + "treatment_mCherry_hND6__test.grea.tsv:md5,786faeccf39926d2f7c980ef549a2697" + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "method_de": "propd", + "method": "grea" + }, + "treatment_mCherry_hND6_sample_number_test.grea.tsv:md5,786faeccf39926d2f7c980ef549a2697" + ] + ], + [ + "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", + "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", + "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", + "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", + "versions.yml:md5,7861e3047b941b86ef50124168a13b51", + "versions.yml:md5,7861e3047b941b86ef50124168a13b51", + "versions.yml:md5,7861e3047b941b86ef50124168a13b51", + "versions.yml:md5,7861e3047b941b86ef50124168a13b51", + "versions.yml:md5,7861e3047b941b86ef50124168a13b51", + "versions.yml:md5,7861e3047b941b86ef50124168a13b51", + "versions.yml:md5,cd9cd1563a983e586b15fd2276da8bfb", + "versions.yml:md5,cd9cd1563a983e586b15fd2276da8bfb", + "versions.yml:md5,f2db818ec8143f64399247548098b643", + "versions.yml:md5,f3be00003ea786284231f77c03c92eed", + "versions.yml:md5,f3be00003ea786284231f77c03c92eed", + "versions.yml:md5,f3be00003ea786284231f77c03c92eed", + "versions.yml:md5,f3be00003ea786284231f77c03c92eed", + "versions.yml:md5,f823a4e0a4d8744b89aff1391ca74e3a", + "versions.yml:md5,f823a4e0a4d8744b89aff1391ca74e3a", + "versions.yml:md5,f823a4e0a4d8744b89aff1391ca74e3a", + "versions.yml:md5,f823a4e0a4d8744b89aff1391ca74e3a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T15:12:40.203142527" + }, + "propd + grea - mouse": { + "content": [ + [ + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "method_de": "propd", + "method": "grea" + }, + "treatment_mCherry_hND6__test.grea.tsv:md5,786faeccf39926d2f7c980ef549a2697" + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "method_de": "propd", + "method": "grea" + }, + "treatment_mCherry_hND6_sample_number_test.grea.tsv:md5,786faeccf39926d2f7c980ef549a2697" + ] + ], + { + "DIFFERENTIAL_FUNCTIONAL_ENRICHMENT:PROPR_GREA": { + "r-propr": "5.1.5" + } + }, + [ + "versions.yml:md5,cd9cd1563a983e586b15fd2276da8bfb", + "versions.yml:md5,cd9cd1563a983e586b15fd2276da8bfb" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T15:10:22.944909501" + }, + "deseq2 + gprofiler2 - mouse": { + "content": [ + [ + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "deseq2", + "method": "gprofiler2" + }, + "treatment_mCherry_hND6__test.gprofiler2.all_enriched_pathways.tsv:md5,d586d92fb6cc3e41d493d14c4d9a7d92" + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "deseq2", + "method": "gprofiler2" + }, + "treatment_mCherry_hND6_sample_number_test.gprofiler2.all_enriched_pathways.tsv:md5,fa880c70d06f56d8dad0ac8078e6233c" + ] + ], + [ + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "deseq2", + "method": "gprofiler2" + }, + [ + "treatment_mCherry_hND6__test.gprofiler2.GO:BP.sub_enriched_pathways.tsv:md5,4c95059438af872253aa95a5ff127fca", + "treatment_mCherry_hND6__test.gprofiler2.GO:CC.sub_enriched_pathways.tsv:md5,b4e4009e067f723694efc1a46a609e2b", + "treatment_mCherry_hND6__test.gprofiler2.GO:MF.sub_enriched_pathways.tsv:md5,ece72606ce6d1d037cfd5f5b198233a8", + "treatment_mCherry_hND6__test.gprofiler2.REAC.sub_enriched_pathways.tsv:md5,dc7b6f67903e25076ab95a7e1bb39bbe" + ] + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "fc_threshold": 1.5, + "stat_threshold": 0.05, + "method_de": "deseq2", + "method": "gprofiler2" + }, + [ + "treatment_mCherry_hND6_sample_number_test.gprofiler2.GO:BP.sub_enriched_pathways.tsv:md5,dc27af1c941636f8b03e8c8724773725", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.GO:CC.sub_enriched_pathways.tsv:md5,09b7770d59d2efa8a58f300a16a41ae7", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.GO:MF.sub_enriched_pathways.tsv:md5,a261d658ca1b1ec0511e55da734d7810", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.KEGG.sub_enriched_pathways.tsv:md5,3cbdbbb1c5b232ab73d35d4a0953d19c", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.REAC.sub_enriched_pathways.tsv:md5,8472cb5792bf7ec044bb95df356ad47e" + ] + ] + ], + [ + "treatment_mCherry_hND6__test.gprofiler2.gostplot.html", + "treatment_mCherry_hND6_sample_number_test.gprofiler2.gostplot.html" + ], + { + "DIFFERENTIAL_FUNCTIONAL_ENRICHMENT:GPROFILER2_GOST": { + "r-base": "4.3.3", + "r-ggplot2": "3.4.3", + "r-gprofiler2": "0.2.2" + } + }, + [ + "versions.yml:md5,7861e3047b941b86ef50124168a13b51", + "versions.yml:md5,7861e3047b941b86ef50124168a13b51" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T15:08:35.939943561" + }, + "test gprofiler2 - mouse": { + "content": [ + [ + [ + { + "id": "Condition_genotype_WT_KO", + "variable": "Condition genotype", + "reference": "WT", + "target": "KO", + "blocking": "batch", + "method": "gprofiler2" + }, + "Condition_genotype_WT_KO.gprofiler2.all_enriched_pathways.tsv:md5,1134a02ca061c463bcbff277eefbfb19" + ] + ], + [ + [ + { + "id": "Condition_genotype_WT_KO", + "variable": "Condition genotype", + "reference": "WT", + "target": "KO", + "blocking": "batch", + "method": "gprofiler2" + }, + [ + "Condition_genotype_WT_KO.gprofiler2.GO:BP.sub_enriched_pathways.tsv:md5,d527b94cdb160070bcaa0bfb0cecf914", + "Condition_genotype_WT_KO.gprofiler2.GO:CC.sub_enriched_pathways.tsv:md5,ef418c3f06d50446317928e37ec8ddfb", + "Condition_genotype_WT_KO.gprofiler2.GO:MF.sub_enriched_pathways.tsv:md5,e460d4917feb0b64d334a528f59e0731", + "Condition_genotype_WT_KO.gprofiler2.HP.sub_enriched_pathways.tsv:md5,865d8f092503552831c51d775a98c6eb", + "Condition_genotype_WT_KO.gprofiler2.KEGG.sub_enriched_pathways.tsv:md5,413724002abe683f376ea914d4f21ade", + "Condition_genotype_WT_KO.gprofiler2.MIRNA.sub_enriched_pathways.tsv:md5,fce81051d7af955ddb2925ba2da9ff57", + "Condition_genotype_WT_KO.gprofiler2.REAC.sub_enriched_pathways.tsv:md5,e89e1876698ea644671a0720c85f4dbb", + "Condition_genotype_WT_KO.gprofiler2.TF.sub_enriched_pathways.tsv:md5,e272217ec7fcf01ea463ab8bcc8335cf", + "Condition_genotype_WT_KO.gprofiler2.WP.sub_enriched_pathways.tsv:md5,fdd1efa836d85bb127e933e925290cba" + ] + ] + ], + [ + "Condition_genotype_WT_KO.gprofiler2.gostplot.html" + ], + { + "DIFFERENTIAL_FUNCTIONAL_ENRICHMENT:GPROFILER2_GOST": { + "r-base": "4.3.3", + "r-ggplot2": "3.4.3", + "r-gprofiler2": "0.2.2" + } + }, + [ + "versions.yml:md5,7861e3047b941b86ef50124168a13b51" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T15:07:42.734133807" + }, + "deseq2 + gsea - mouse": { + "content": [ + [ + [ + { + "id": "treatment_mCherry_hND6__test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "", + "method_de": "deseq2", + "method": "gsea" + }, + "treatment_mCherry_hND6__test.mh.all.v2022.1.Mm.symbols.gsea_report_for_mCherry.tsv:md5,ce8792382ae299749445767ff16aaecc", + "treatment_mCherry_hND6__test.mh.all.v2022.1.Mm.symbols.gsea_report_for_hND6.tsv:md5,354a961c7e1417db2bf1f7e8d00c54f0" + ], + [ + { + "id": "treatment_mCherry_hND6_sample_number_test", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "sample_number", + "method_de": "deseq2", + "method": "gsea" + }, + "treatment_mCherry_hND6_sample_number_test.mh.all.v2022.1.Mm.symbols.gsea_report_for_mCherry.tsv:md5,ce8792382ae299749445767ff16aaecc", + "treatment_mCherry_hND6_sample_number_test.mh.all.v2022.1.Mm.symbols.gsea_report_for_hND6.tsv:md5,354a961c7e1417db2bf1f7e8d00c54f0" + ] + ], + { + "DIFFERENTIAL_FUNCTIONAL_ENRICHMENT:GSEA_GSEA": { + "gsea": "4.3.2" + } + }, + [ + "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", + "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", + "versions.yml:md5,f2db818ec8143f64399247548098b643", + "versions.yml:md5,f3be00003ea786284231f77c03c92eed", + "versions.yml:md5,f3be00003ea786284231f77c03c92eed", + "versions.yml:md5,f823a4e0a4d8744b89aff1391ca74e3a", + "versions.yml:md5,f823a4e0a4d8744b89aff1391ca74e3a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T15:09:55.875311067" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "Condition_genotype_WT_KO", + "variable": "Condition genotype", + "reference": "WT", + "target": "KO", + "blocking": "batch", + "method": "gprofiler2" + }, + "Condition_genotype_WT_KO.gprofiler2.all_enriched_pathways.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "Condition_genotype_WT_KO", + "variable": "Condition genotype", + "reference": "WT", + "target": "KO", + "blocking": "batch", + "method": "gprofiler2" + }, + "Condition_genotype_WT_KO.gprofiler2.*.sub_enriched_pathways.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "Condition_genotype_WT_KO", + "variable": "Condition genotype", + "reference": "WT", + "target": "KO", + "blocking": "batch", + "method": "gprofiler2" + }, + "Condition_genotype_WT_KO.gprofiler2.gostplot.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,7861e3047b941b86ef50124168a13b51" + ], + "gprofiler2_all_enrich": [ + [ + { + "id": "Condition_genotype_WT_KO", + "variable": "Condition genotype", + "reference": "WT", + "target": "KO", + "blocking": "batch", + "method": "gprofiler2" + }, + "Condition_genotype_WT_KO.gprofiler2.all_enriched_pathways.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gprofiler2_plot_html": [ + [ + { + "id": "Condition_genotype_WT_KO", + "variable": "Condition genotype", + "reference": "WT", + "target": "KO", + "blocking": "batch", + "method": "gprofiler2" + }, + "Condition_genotype_WT_KO.gprofiler2.gostplot.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gprofiler2_sub_enrich": [ + [ + { + "id": "Condition_genotype_WT_KO", + "variable": "Condition genotype", + "reference": "WT", + "target": "KO", + "blocking": "batch", + "method": "gprofiler2" + }, + "Condition_genotype_WT_KO.gprofiler2.*.sub_enriched_pathways.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "grea_results": [ + + ], + "gsea_report": [ + + ], + "versions": [ + "versions.yml:md5,7861e3047b941b86ef50124168a13b51" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-24T17:38:14.74535" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/differential_functional_enrichment/tests/propd_grea.config b/subworkflows/nf-core/differential_functional_enrichment/tests/propd_grea.config new file mode 100644 index 00000000000..c1a28351eb7 --- /dev/null +++ b/subworkflows/nf-core/differential_functional_enrichment/tests/propd_grea.config @@ -0,0 +1,15 @@ +process { + // set single core for reproducibility + // NOTE that GREA module relies on parallelization and permutation tests + // The permutations are done within each node, which makes set.seed not working properly when + // different nodes are starting/ending depending on the case + cpus = 1 + + withName: "PROPR_PROPD"{ + ext.args = { "--round_digits 5 --save_adjacency true --features_id_col gene_name"} + } + withName: "PROPR_GREA"{ + ext.args = { "--permutation 10 --set_min 10 --seed 123 --round_digits 5"} + } +} +