improve conda directive, add profiles

theislab · Nov 4, 2023 · 3534e20 · 3534e20
1 parent ea82311
commit 3534e20
Show file tree

Hide file tree

Showing 51 changed files with 716 additions and 639 deletions.
diff --git a/bin/HTODemux-visualisation.R b/bin/HTODemux-visualisation.R
@@ -9,30 +9,30 @@ parser <- ArgumentParser("Parameters for HTODemux Visualisation")
 parser$add_argument("--hashtagPath",help="folder where rds object was saved from the first part of HTODemux")
 parser$add_argument("--assay",help="Name of the Hashtag assay HTO by default", default = "HTO")
 #Output graphs - Ridge Plot
-parser$add_argument("--ridgePlot", help = "Generates a ridge plot from the results, True to generate", default = "TRUE")
+parser$add_argument("--ridgePlot", help = "Generates a ridge plot from the results, True to generate", default = "True")
 parser$add_argument("--ridgeNCol", help = "Number of columns for ridgePlot", default = 3, type = "integer")
 
 #Output graphs - Scatter Feature
-parser$add_argument("--featureScatter",help = "Generates a ridge plot from the results, True to generate", default = "TRUE")
+parser$add_argument("--featureScatter",help = "Generates a ridge plot from the results, True to generate", default = "True")
 parser$add_argument("--scatterFeat1", help = "Feature 1 for Feature Scatter Plot", default = "hto_HTO-A")
 parser$add_argument("--scatterFeat2", help = "Feature 2 for Feature Scatter Plot", default = "hto_HTO-B")
 
 #Output graphs - Violin Plot
-parser$add_argument("--vlnPlot", help = "Generates a violin plot from the results, True to generate", default = "TRUE")
+parser$add_argument("--vlnPlot", help = "Generates a violin plot from the results, True to generate", default = "True")
 parser$add_argument("--vlnFeatures", help = "Features to plot (gene expression, metrics, PC scores, anything that can be retreived by FetchData)", default = "nCount_RNA")
 parser$add_argument("--vlnLog", help = "plot the feature axis on log scale", action = "store_true")
 
 #Output graphs - tSNE
-parser$add_argument("--tSNE", help = "Generate a two dimensional tSNE embedding for HTOs", default = "TRUE")
+parser$add_argument("--tSNE", help = "Generate a two dimensional tSNE embedding for HTOs", default = "True")
 parser$add_argument("--tSNEIdents", help = "What should we remove from the object (we have Singlet,Doublet and Negative)", default = "Negative")
-parser$add_argument("--tSNEInvert", action = "store_true") # TRUE
-parser$add_argument("--tSNEVerbose", action = "store_true") # FALSE
-parser$add_argument("--tSNEApprox", action = "store_true") # FALSE
+parser$add_argument("--tSNEInvert", action = "store_true")
+parser$add_argument("--tSNEVerbose", action = "store_true")
+parser$add_argument("--tSNEApprox", action = "store_true")
 parser$add_argument("--tSNEDimMax", help = "max number of donors ",type = "integer", default = 1)
 parser$add_argument("--tSNEPerplexity", help = "value for perplexity", type = "integer",  default = 100)
 
 #Output graphs - Heatmap
-parser$add_argument("--heatMap", help = "Generate a Heatmap", default = "FALSE")
+parser$add_argument("--heatMap", help = "Generate a Heatmap", default = "False")
 parser$add_argument("--heatMapNcells", help ="value for number of cells", type = "integer",  default = 500)
 parser$add_argument("--outputdir", help='Output directory')
 
@@ -56,24 +56,24 @@ hashtag <-readRDS(hash_file)
 
 # Ridge Plot
 # Group cells based on the max HTO signal
-if (args$ridgePlot == "TRUE") {
+if (args$ridgePlot == "True") {
   Idents(hashtag) <- paste0(args$assay, "_maxID")
   RidgePlot(hashtag, assay = args$assay, features = rownames(hashtag[[args$assay]]), ncol = args$ridgeNCol)
   ggsave(paste0(args$outputdir, '/ridge.jpeg'), device = 'jpeg', dpi = 500) # height = 10, width = 10
 }
 
-if (args$featureScatter == "TRUE") {
+if (args$featureScatter == "True") {
   FeatureScatter(hashtag, feature1 = args$scatterFeat1, feature2 = args$scatterFeat2)
   ggsave(paste0(args$outputdir, '/featureScatter.jpeg'), device = 'jpeg',dpi = 500)
 }
 
-if (args$vlnPlot == "TRUE") {
+if (args$vlnPlot == "True") {
   Idents(hashtag) <- paste0(args$assay, "_classification.global")
   VlnPlot(hashtag, features = args$vlnFeatures, pt.size = 0.1, log = args$vlnLog)
   ggsave(paste0(args$outputdir, '/violinPlot.jpeg'), device = 'jpeg', dpi = 500)
 }
 
-if (args$tSNE == "TRUE") {
+if (args$tSNE == "True") {
   hashtag.subset <- subset(hashtag, idents = args$tSNEIdents, invert = args$tSNEInvert)
   DefaultAssay(hashtag.subset) <- args$assay
   hashtag.subset <- ScaleData(hashtag.subset, features = rownames(hashtag.subset),
@@ -84,7 +84,7 @@ if (args$tSNE == "TRUE") {
   ggsave(paste0(args$outputdir, '/tSNE.jpeg'), device = 'jpeg', dpi = 500)
 }
 
-if (args$heatMap == "TRUE") {
+if (args$heatMap == "True") {
   HTOHeatmap(hashtag, assay = args$assay, ncells = args$heatMapNcells)
   ggsave(paste0(args$outputdir, '/heatMap.jpeg'), device = 'jpeg', dpi = 500)
 }

diff --git a/bin/demuxem.py b/bin/demuxem.py
@@ -16,7 +16,7 @@
 parser.add_argument('--alpha_noise', help='The Dirichlet prior concenration parameter on the background noise.', type=float, default=1.0)
 parser.add_argument('--tol', help='Threshold used for the EM convergence.', type=float, default=1e-6)
 parser.add_argument('--n_threads', help='Number of threads to use. Must be a positive integer.', type=int, default=1)
-parser.add_argument('--filter_demuxem', help='Use the filter for RNA, true or false', default='true')
+parser.add_argument('--filter_demuxem', help='Use the filter for RNA, True or False', default='True')
 parser.add_argument('--generateGenderPlot', help='Generate violin plots using gender-specific genes (e.g. Xist). <gene> is a comma-separated list of gene names.', default='')
 parser.add_argument('--objectOutDemuxem', help='Output name of demultiplexing results. All outputs will use it as the prefix.', default="demuxem_res")
 parser.add_argument('--outputdir', help='Output directory')

diff --git a/docs/source/general.md b/docs/source/general.md
@@ -13,72 +13,82 @@ The mode of the pipeline is set by `params.mode`. hadge provides 4 modes in tota
 
 ## **Pipeline configuration**
 
+The pipeline provides some pre-defined profiles. The standard profile is used by default when no profile is specified, where the pipeeline is run locall and all processes annotated with the big_mem label are assigned 4 cpus and 16 Gb of memory.
+
+```
+profiles{
+    standard {
+        process {
+            executor = 'local'
+            withLabel: big_mem {
+                cpus = 4
+                memory = 16.GB
+            }
+            withLabel: small_mem {
+                cpus = 2
+                memory = 8.GB
+            }
+        }
+
+    }
+```
+
 ### Conda environments:
 
-We provide a `environment.yml` file for each process. But you can also use local Conda environments to run a process:
+By using the `-profile conda` option, the pipeline executes each process within a Conda environment specified in the conda directive. Alternatively, you have the flexibility to add a new profile in the `nextflow.config` file, allowing you to use local Conda environments for running processes.
 
 ```
-// dont forget to enable conda
-conda.enable = true
-process {
-    // Use Conda environment files
-    withName:scSplit {
-        conda = './conda/scsplit.yml'
-    }
-    // Use Conda package names
-    withName:cellSNP {
-        conda = 'bioconda::cellsnp-lite'
-    }
-    // Use existing Conda environments
-    withName:summary {
-        conda = '/path/to/an/existing/env/directory'
+profiles{
+    my_conda_profile {
+        // dont forget to enable Conda
+        conda.enable = true
+        process {
+            // Use Conda environment files
+            withName:scSplit {
+                conda = './conda/scsplit.yml'
+            }
+            // Use Conda package names
+            withName:cellSNP {
+                conda = 'bioconda::cellsnp-lite'
+            }
+            // Use existing Conda environments
+            withName:summary {
+                conda = '/path/to/an/existing/env/directory'
+            }
+        }
     }
 }
-
 ```
 
 ### Containers:
 
 Nextflow also supports a variety of container runtimes, e.g. Docker. To specify a different Docker image for each process:
 
 ```
-process {
-    withName:foo {
-        container = 'image_name_1'
-    }
-    withName:bar {
-        container = 'image_name_2'
+profiles{
+    my_docker_profile {
+        // dont forget to enable Docker
+        docker.enabled = true
+        process {
+            withName:foo {
+                container = 'image_name_1'
+            }
+            withName:bar {
+                container = 'image_name_2'
+            }
+        }
     }
 }
-// do not forget to enable docker
 
-docker.enabled = true
 
 ```
 
 ### Executor and resource specifications:
 
-- The pipeline can be run either locally or on an HPC. You can set the executor by running the pipeline with `-profile standard` or `-profile cluster`. Of course, you can add other profiles if you want.
+- The pipeline can also be run on an HPC. You can set the executor by running the pipeline with `-profile cluster`.
 - Feel free to add other configurations, e.g. the number of CPUS, the memory allocation, etc. If you are new to Nextflow framework, please visit the [Nextlfow page](https://www.nextflow.io/docs/latest/config.html#).
-- As default, the pipeline is run locally with the standard profile, where all processes annotated with the big_mem label are assigned 4 cpus and 16 Gb of memory.
 
 ```
-profiles{
-    standard {
-        process {
-            executor = 'local'
-            withLabel: big_mem {
-                cpus = 4
-                memory = 16.GB
-            }
-            withLabel: small_mem {
-                cpus = 2
-                memory = 8.GB
-            }
-        }
-
-    }
-
     cluster {
         process {
             executor = 'slurm'
@@ -97,6 +107,14 @@ profiles{
 
 ```
 
+### Customized profiles
+
+Configuration files can contain the definition of one or more profiles. Multiple configuration profiles can be specified by separating the profile names with a comma (no whitespace), for example:
+
+```
+nextflow run main.nf -profile standard,conda
+```
+
 ## **Advanced usecases**
 
 ### **Running on multiple samples**