nf-core · vagkaratzas · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024
diff --git a/docs/output.md b/docs/output.md
@@ -26,6 +26,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 Note that when specifying the parameter `--coassemble_group`, for the corresponding output filenames/directories of the assembly or downsteam processes the group ID, or more precisely the term `group-[group_id]`, will be used instead of the sample ID.
 
+The pipeline can also generate downstream pipeline input samplesheets.
+These are stored in `<outdir>/downstream_samplesheets`.
+
 ## Quality control
 
 These steps trim away the adapter sequences present in input reads, trims away bad quality bases and sicard reads that are too short.
@@ -766,8 +769,8 @@ The pipeline can also generate input files for the following downstream pipeline
 <summary>Output files</summary>
 
 - `downstream_samplesheets/`
-  - `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembled contig FASTA files produced by nf-core/mag (MEGAHIT, SPAdes, SPAdesHybrid)
-  - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler csv with paths to preprocessed reads (adapter trimmed, host removed etc.) `.fastq.gz`
+  - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler `--input` csv with paths to preprocessed reads (adapter trimmed, host removed etc.) in `.fastq.gz` formats. I.e., the direct input into MEGAHIT, SPAdes, SPAdesHybrid.
+  - `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembled contig FASTA files produced by nf-core/mag (i.e., the direct output from MEGAHIT, SPAdes, SPAdesHybrid - not bins).
 
 </details>
 

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -96,9 +96,10 @@
                 },
                 "generate_pipeline_samplesheets": {
                     "type": "string",
-                    "default": "funcscan,taxprofiler",
                     "description": "Specify which pipeline to generate a samplesheet for.",
-                    "fa_icon": "fas fa-toolbox"
+                    "help": "Note that the nf-core/funcscan samplesheet will only include paths to raw assemblies, not bins\n\nThe nf-core/taxprofiler samplesheet will include of paths the pre-processed reads that are used are used as input for _de novo_ assembly.",
+                    "fa_icon": "fas fa-toolbox",
+                    "pattern": "^(taxprofiler|funcscan)(?:,(taxprofiler|funcscan)){0,1}"
                 }
             }
         },

diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -7,8 +7,7 @@ workflow SAMPLESHEET_TAXPROFILER {
     ch_reads
 
     main:
-    format     = 'csv' // most common format in nf-core
-    format_sep = ','
+    format     = 'csv'
 
     def fastq_rel_path = '/'
     if (params.bbnorm) {
@@ -36,7 +35,7 @@ workflow SAMPLESHEET_TAXPROFILER {
         }
         .tap{ ch_colnames }
 
-    channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets', 'taxprofiler', format, format_sep)
+    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/mag", format)
 
 }
 
@@ -45,8 +44,7 @@ workflow SAMPLESHEET_FUNCSCAN {
     ch_assemblies
 
     main:
-    format     = 'csv' // most common format in nf-core
-    format_sep = ','
+    format     = 'csv'
 
     ch_list_for_samplesheet = ch_assemblies
         .map {
@@ -57,8 +55,7 @@ workflow SAMPLESHEET_FUNCSCAN {
         }
         .tap{ ch_colnames }
 
-    channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets', 'funcscan', format, format_sep)
-
+    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/funcscan", format)
 }
 
 workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
@@ -78,14 +75,17 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     }
 }
 
-// Constructs the header string and then the strings of each row, and
-def channelToSamplesheet(ch_header, ch_list_for_samplesheet, outdir_subdir, pipeline, format, format_sep) {
+def channelToSamplesheet(ch_list_for_samplesheet, path, format) {
+    def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format]
+
+    def ch_header = ch_list_for_samplesheet
+
     ch_header
         .first()
-        .map{ it.keySet().join(format_sep) }
-        .concat( ch_list_for_samplesheet.map{ it.values().join(format_sep) })
+        .map { it.keySet().join(format_sep) }
+        .concat(ch_list_for_samplesheet.map { it.values().join(format_sep) })
         .collectFile(
-            name:"${params.outdir}/${outdir_subdir}/${pipeline}.${format}",
+            name: "${path}.${format}",
             newLine: true,
             sort: false
         )

diff --git a/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf b/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
@@ -118,6 +118,11 @@ workflow PIPELINE_INITIALISATION {
     //
     validateInputParameters(
         hybrid
+
+        // Validate samplesheet generation parameters
+        if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
+            error('[nf-core/createtaxdb] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
+        }
     )
 
     // Validate PRE-ASSEMBLED CONTIG input when supplied