diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 289cdfee..031487da 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -3,3 +3,4 @@ **/shu_bmrc** @lquayle88 **/utd_ganymede** @edmundmiller @alyssa-ab **/unsw_katana** @jscgh +**/seadragon** @jiawku diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 299f3e5e..cb315423 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -137,6 +137,7 @@ jobs: - "sahmri" - "sanger" - "scw" + - "seadragon" - "seawulf" - "seg_globe" - "self_hosted_runner" diff --git a/README.md b/README.md index 5b1dfd10..b214207a 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,7 @@ Currently documentation is available for the following systems: - [ROSLIN](docs/roslin.md) - [SAGE BIONETWORKS](docs/sage.md) - [SANGER](docs/sanger.md) +- [SEADRAGON](docs/seadragon.md) - [SEATTLECHILDRENS](docs/seattlechildrens.md) - [SEAWULF](docs/seawulf.md) - [SEG_GLOBE](docs/seg_globe.md) diff --git a/conf/seadragon.config b/conf/seadragon.config new file mode 100644 index 00000000..e92de492 --- /dev/null +++ b/conf/seadragon.config @@ -0,0 +1,107 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'University of Texas at MD Anderson HPC cluster profile provided by nf-core/configs' + config_profile_contact = 'Jiawei Gu' + config_profile_contact_github = '@jiawku' + config_profile_contact_email = 'jiawei.gu@mdanderson.org' + config_profile_url = 'https://hpcweb.mdanderson.edu/' +} + +env { + SINGULARITY_CACHEDIR="/home/$USER/.singularity/cache" + APPTAINER_TMPDIR="/home/$USER/.apptainer/tmp" + APPTAINER_CACHEDIR="/home/$USER/.apptainer/cache" +} + +singularity { + enabled = true + envWhitelist='APPTAINERENV_NXF_TASK_WORKDIR,APPTAINERENV_NXF_DEBUG,APPTAINERENV_LD_LIBRARY_PATH,SINGULARITY_BINDPATH,LD_LIBRARY_PATH,TMPDIR,SINGULARITY_TMPDIR' + autoMounts = true + runOptions = '-B ${TMPDIR:-/tmp}' + cacheDir = "/home/$USER/.singularity/cache" +} + +def membership = "groups".execute().text + +def select_queue = { memory, cpu, walltime -> + // Cdragon queues + if (memory <= 168.GB && cpu <= 28) { + if (walltime <= 3.h) return 'short' + if (walltime <= 24.h) return 'medium' + if (walltime <= 240.h) return 'long' + } + + // Edragon E40 queues + if (memory <= 475.GB && cpu <= 40) { + if (walltime <= 3.h) return 'e40short' + if (walltime <= 24.h) return 'e40medium' + if (walltime <= 240.h) return 'e40long' + } + + // Edragon E80 queues + if (memory <= 950.GB && cpu <= 80) { + if (walltime <= 3.h) return 'e80short' + if (walltime <= 24.h) return 'e80medium' + if (walltime <= 240.h) return 'e80long' + } + + // High memory queues + if (memory <= 1900.GB && cpu <= 35) { + if (walltime <= 240.h) return 'highmem' + } + if (memory <= 2900.GB && cpu <= 24) { + if (walltime <= 240.h) return 'vhighmem' + } + + throw new IllegalArgumentException("No matching queue for memory=${memory}, cpu=${cpu}, time=${time}") + +} + +// Submit up to 100 concurrent jobs +// pollInterval and queueStatInterval of every 5 minutes +// submitRateLimit of 20 per minute +executor { + name = 'lsf' + queueSize = 100 + perJobMemLimit = true + queueStatInterval = '2 min' + submitRateLimit = '20 min' + jobName = { "${task.process.split(':').last()}" } +} + + +process { + resourceLimits = [ + memory: 2900.GB, // Max memory based on vhighmem node + cpus: 80, // Max CPUs based on E80 node + time: 240.h // Max time for long queues + ] + + executor = 'lsf' // Use LSF executor + + memory = { task.memory ?: params.default_memory } + cpus = { task.cpus ?: params.default_cpus } + time = { task.time ?: params.default_time } + + + maxRetries = 3 + afterScript = 'sleep 10' // Prevent abrupt re-submissions after retries + + queue = { select_queue(task.memory, task.cpus, task.time) } // Use the updated select_queue function + + + withLabel:process_gpu { + cpus = { 40 } // Use Gdragon nodes + memory = { 168.GB } // Max memory for GPU nodes + queue = 'gpu,gpu-medium' // Specific GPU queues + } +} + +params { + max_memory = 2900.GB // Maximum memory based on vhighmem nodes + max_cpus = 80 // Maximum CPUs based on E80 nodes + max_time = 240.h // Maximum runtime for long queues + igenomes_base = '/rsrch3/scratch/reflib/REFLIB_data/AWS-iGenomes' +} + +cleanup = true diff --git a/docs/seadragon.md b/docs/seadragon.md new file mode 100644 index 00000000..f694f21f --- /dev/null +++ b/docs/seadragon.md @@ -0,0 +1,31 @@ +# nf-core/configs: Seadragon Configuration + +To use, run a pipeline with `-profile seadragon`. This will download and launch the [`seadragon.config`](../conf/seadragon.config), which has been pre-configured with a setup suitable for the Seadragon HPC environment at The University of Texas MD Anderson Cancer Center. Using this profile, container images with all required software will be pulled and converted to Singularity images before job execution. + +## Module Requirements + +In order to run a pipeline on the Seadragon environment, you will need to load the following modules: + +```bash +module load Nextflow +``` + +## iGenomes Resource + +A local copy of the iGenomes resource is available on Seadragon `/rsrch3/scratch/reflib/REFLIB_data/AWS-iGenomes/` . You can reference it by using the `--genome ` parameter in an igenomes-supporting nf-core pipeline. This ensures that all genome-specific references are correctly configured and optimized for the cluster. + +## Notes + +- **Data Storage**: All intermediate files will be stored in the `work/` directory within the job's launch directory. On a successful completion of a run, these will be automatically deleted as these files can consume significant space. If a pipeline run fails, it will not be deleted. You can preserve all intermediate files by using `-profile debug,seadragon`. +- **User Access**: Ensure that you have an active account to use Seadragon. If unsure, contact the HPC support team at The University of Texas MD Anderson Cancer Center. +- **Job Submission**: Nextflow jobs must be submitted from the login nodes of Seadragon. If in doubt, refer to the cluster documentation or contact support. + +## Example Command + +```bash +nextflow run nf-core/rnaseq --reads '*_R{1,2}.fastq.gz' --genome GRCh38 -profile seadragon +``` + +## Further Information + +For more details about seadragon cluster, visit the Seadragon HPC webpage: [https://hpcweb.mdanderson.edu/](https://hpcweb.mdanderson.edu/) diff --git a/nfcore_custom.config b/nfcore_custom.config index e53138ff..a12864b5 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -322,6 +322,9 @@ profiles { scw { includeConfig "${params.custom_config_base}/conf/scw.config" } + seadragon { + includeConfig "${params.custom_config_base}/conf/seadragon.config" + } seawulf { includeConfig "${params.custom_config_base}/conf/seawulf.config" }