Skip to content

Commit eda67d8

Browse files
authored
Merge pull request #876 from ctastad/master
first commit - minerva hpc profile for icahn school of medicine at mo…
2 parents 1de0154 + 86bf6aa commit eda67d8

5 files changed

Lines changed: 294 additions & 0 deletions

File tree

.github/workflows/main.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ jobs:
123123
- "mjolnir_globe"
124124
- "mpcdf"
125125
- "mpcdf_viper"
126+
- "mssm"
126127
- "munin"
127128
- "nci_gadi"
128129
- "nu_genomics"

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ Currently documentation is available for the following systems:
174174
- [MJOLNIR_GLOBE](docs/mjolnir_globe.md)
175175
- [MPCDF](docs/mpcdf.md)
176176
- [MPCDF_VIPER](docs/mpcdf_viper.md)
177+
- [MSSM](docs/mssm.md)
177178
- [MUNIN](docs/munin.md)
178179
- [NCI GADI](docs/nci_gadi.md)
179180
- [NU_GENOMICS](docs/nu_genomics.md)

conf/mssm.config

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/*
2+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3+
Nextflow config for Minerva HPC at Icahn School of Medicine at Mount Sinai
4+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5+
Author: Christopher Tastad - Judy Cho Lab
6+
Contact: christopher.tastad@mssm.edu
7+
HPC Support: hpchelp@hpc.mssm.edu
8+
9+
IMPORTANT: Before running this pipeline, set the MINERVA_ALLOCATION environment
10+
variable in your master submission script:
11+
12+
export MINERVA_ALLOCATION="acc_YOUR-PROJECT-NAME"
13+
14+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
15+
*/
16+
17+
// Global default params
18+
params {
19+
config_profile_description = 'Minerva HPC at Icahn School of Medicine at Mount Sinai'
20+
config_profile_contact = 'Christopher Tastad (@ctastad)'
21+
config_profile_url = 'https://labs.icahn.mssm.edu/minervalab/'
22+
23+
// Cluster-specific parameters
24+
minerva_allocation = System.getenv('MINERVA_ALLOCATION') ?: 'default_allocation'
25+
max_cpus = 64
26+
max_memory = '1.5.TB'
27+
max_time = '336.h'
28+
}
29+
30+
31+
// Singularity configuration
32+
env {
33+
SINGULARITY_CACHEDIR = "/sc/arion/work/${System.getenv('USER')}/singularity_cache"
34+
SINGULARITY_TMPDIR = "/sc/arion/work/${System.getenv('USER')}/singularity_tmp"
35+
SINGULARITY_LOCALCACHEDIR = "/sc/arion/work/${System.getenv('USER')}/singularity_cache"
36+
SINGULARITY_PULLFOLDER = "/sc/arion/work/${System.getenv('USER')}/singularity_cache/pull"
37+
SINGULARITY_DISABLE_CACHE = "no"
38+
39+
}
40+
41+
singularity {
42+
enabled = true
43+
autoMounts = true
44+
cacheDir = "/sc/arion/work/${System.getenv('USER')}/singularity_cache"
45+
pullTimeout = '120 min'
46+
47+
// Pass proxy settings to container
48+
envWhitelist = ['http_proxy', 'https_proxy', 'all_proxy', 'no_proxy']
49+
}
50+
51+
52+
// LSF executor configuration
53+
executor {
54+
name = 'lsf'
55+
submitRateLimit = '2 sec'
56+
// Specific LSF settings for proper memory handling
57+
perJobMemLimit = false
58+
perTaskReserve = true
59+
max_time = '336.h'
60+
}
61+
62+
// Process configuration
63+
process {
64+
executor = 'lsf'
65+
resourceLimits = [
66+
cpus: 64,
67+
memory: 1.5.TB,
68+
time: 336.h
69+
]
70+
71+
// Dynamic queue selection based on job requirements
72+
queue = {
73+
if (task.time > 144.h) {
74+
return 'long'
75+
} else if (task.label && task.label.toString().contains('gpu') && task.time <= 30.min) {
76+
return 'gpuexpress'
77+
} else if (task.label && task.label.toString().contains('gpu')) {
78+
return 'gpu'
79+
} else if (task.time <= 12.h && task.cpus <= 8) {
80+
return 'express'
81+
} else {
82+
return 'premium'
83+
}
84+
}
85+
86+
// Cluster options with proper memory handling
87+
clusterOptions = {
88+
def options = "-P ${System.getenv('MINERVA_ALLOCATION') ?: params.minerva_allocation}"
89+
90+
// Handle memory requests - ensure consistency between -M and rusage
91+
if (task.memory) {
92+
def mem = task.memory.toMega()
93+
options += " -M ${mem}"
94+
}
95+
96+
// Add GPU-specific options
97+
if (task.label && task.label.toString().contains('gpu')) {
98+
def gpuNum = task.label.toString().contains('high_gpu') ? 2 : 1
99+
options += " -gpu num=${gpuNum}"
100+
}
101+
102+
return options
103+
}
104+
105+
// Add gpu awareness to container
106+
withLabel: 'gpu|.*gpu.*' {
107+
containerOptions = '--nv'
108+
}
109+
110+
// ERROR HANDLING CONFIGURATION
111+
// Default dynamic error strategy for most processes
112+
errorStrategy = {
113+
if (task.exitStatus in [130, 137, 140] && task.attempt <= 3)
114+
return 'retry'
115+
else if (task.exitStatus in [131..145] && task.attempt <= 1)
116+
return 'retry'
117+
else
118+
return 'finish'
119+
}
120+
maxRetries = 3
121+
122+
// Special error handling labels (these override the dynamic strategy above)
123+
withLabel:error_ignore {
124+
errorStrategy = 'ignore'
125+
maxRetries = 0
126+
}
127+
128+
withLabel:error_retry {
129+
errorStrategy = 'retry'
130+
maxRetries = 2
131+
}
132+
133+
// Set default failOnMissingField behavior (optional)
134+
shell = ['/bin/bash', '-euo', 'pipefail']
135+
}
136+

docs/mssm.md

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# nf-core/configs: MSSM Configuration
2+
3+
This nextflow profiles has been configured to be run with nf-core maintained pipelines for use on the **Minerva HPC** cluster at the **Icahn School of Medicine at Mount Sinai**. All testing has been done within pipelines that follow the DLS2 framework.
4+
5+
Run the pipeline with `-profile mssm`. This will download and launch the [`mssm.config`](../conf/mssm.config) which has been pre-configured with a setup suitable for the Minerva HPC cluster. Using this profile, a container image containing all of the required software will be downloaded, and converted to a Singularity image before execution of the pipeline. While this is typically governed by the configuration and execution framework, some manual adjustments will be needed at times. See custom configurations to address this.
6+
7+
## Contact Information
8+
9+
**HPC Support:** hpchelp@hpc.mssm.edu
10+
**Author:** Christopher Tastad - Judy Cho Lab
11+
**Email:** christopher.tastad@mssm.edu
12+
13+
## Required Environment Setup
14+
15+
Before running any nf-core pipeline on Minerva, you **must set the MINERVA_ALLOCATION environment variable** in your submission script:
16+
17+
```bash
18+
export MINERVA_ALLOCATION="acc_YOUR-PROJECT-NAME"
19+
```
20+
21+
## Module Requirements
22+
23+
To run nf-core pipelines on Minerva, you need Nextflow and Singularity. Through experience, it has been found that the local nextflow module can produce some difficulties with execution, and the conda environment installation given by nf-core is preferred.
24+
25+
```bash
26+
ml java
27+
ml anaconda3
28+
ml singularity-ce
29+
30+
# Activate Nextflow from conda environment
31+
source /hpc/packages/minerva-centos7/anaconda3/2018.12/etc/profile.d/conda.sh
32+
conda init bash
33+
conda activate nextflow
34+
```
35+
36+
## Proxy Configuration
37+
38+
Minerva requires proxy settings to download containers from remote sources:
39+
40+
```bash
41+
export http_proxy=http://172.28.7.1:3128
42+
export https_proxy=http://172.28.7.1:3128
43+
export all_proxy=http://172.28.7.1:3128
44+
export no_proxy=localhost,*.chimera.hpc.mssm.edu,172.28.0.0/16
45+
```
46+
47+
## Configuration Details
48+
49+
This profile includes:
50+
51+
- **LSF executor** configuration optimized for Minerva
52+
- **Dynamic queue selection** based on job requirements:
53+
- `express` queue for short jobs (≤12h, ≤8 CPUs)
54+
- `premium` queue for standard jobs
55+
- `long` queue for jobs >144h
56+
- `gpu` and `gpuexpress` queues for GPU workloads
57+
- **Singularity container** support with proper cache directories
58+
- **Error handling** strategies with automatic retries
59+
60+
## Example Submission Script
61+
62+
```bash
63+
#!/bin/bash
64+
#BSUB -J nfcore-pipeline-job
65+
#BSUB -P acc_YOUR-PROJECT-NAME
66+
#BSUB -W 48:00
67+
#BSUB -q premium
68+
#BSUB -n 2
69+
#BSUB -R rusage[mem=8GB]
70+
#BSUB -R span[hosts=1]
71+
#BSUB -o output_%J.stdout
72+
#BSUB -eo error_%J.stderr
73+
#BSUB -L /bin/bash
74+
75+
PROJ_DIR=/path/to/project/
76+
NFC_PIPE=nf-core/rnaseq
77+
NFC_VER=3.18.0
78+
NFC_PROFILE=mssm,singularity
79+
SAMPLESHEET=$PROJ_DIR/samplesheet.csv
80+
OUTDIR=$PROJ_DIR
81+
GENOME=GRCh38
82+
83+
# Required environment variable
84+
export MINERVA_ALLOCATION='acc_YOUR-PROJECT-NAME'
85+
86+
# Proxy settings
87+
export http_proxy=http://172.28.7.1:3128
88+
export https_proxy=http://172.28.7.1:3128
89+
export all_proxy=http://172.28.7.1:3128
90+
export no_proxy=localhost,*.chimera.hpc.mssm.edu,172.28.0.0/16
91+
92+
# Load modules
93+
ml java
94+
ml anaconda3
95+
ml singularity-ce
96+
97+
# Set up Nextflow environment
98+
source /hpc/packages/minerva-centos7/anaconda3/2018.12/etc/profile.d/conda.sh
99+
conda init bash
100+
conda activate nextflow
101+
102+
cd $PROJ_DIR
103+
104+
# Run pipeline
105+
nextflow run $NFC_PIPE \
106+
-r $NFC_VER \
107+
-profile $NFC_PROFILE \
108+
-w /sc/arion/scratch/${USER}/work \
109+
-c $PROJ_DIR/custom.config \
110+
--input $SAMPLESHEET \
111+
--outdir $OUTDIR \
112+
--genome $GENOME
113+
```
114+
115+
## Custom Configuration
116+
117+
Users can supplement the base configuration by creating a `custom.config` file. Many processes require minor modifications address specific parameters of a dataset or condition. Given the generalized nature of the main config profile and diversity of process requirements, needing this flexibility is common.
118+
119+
```nextflow
120+
process {
121+
withName: 'PICARD_MARKDUPLICATES' {
122+
ext.suffix = 'bam' // Explicitly set the suffix to avoid using getExtension()
123+
}
124+
125+
withName: 'STRINGTIE_STRINGTIE' {
126+
memory = '24.GB' // Increase to at least 2-3x the default
127+
}
128+
}
129+
```
130+
131+
## Troubleshooting
132+
133+
### Common Issues
134+
135+
- **Singularity cache errors**: If you encounter errors related to Singularity caching, check your scratch or work space allocation, and clean up the cache directory if needed. A common issue is related to the singularity `pullTimeout` argument. Large, remote images may exceed this and would benefit from being pulled into the cache manually.
136+
- **Memory issues**: Some processes may require more memory than the default allocation. Use a custom config to increase memory for specific processes.
137+
- **LSF job submission failures**: Ensure your MINERVA_ALLOCATION variable is set correctly and that you have sufficient allocation hours remaining.
138+
139+
## Tested Versions
140+
141+
This configuration has been tested with:
142+
143+
- Nextflow: 24.10.3
144+
- Singularity-ce: 4.1.1
145+
- nf-core pipelines: DSL2 compatible (2022-2025)
146+
147+
:::note
148+
You will need an account and allocation on the Minerva HPC cluster to run nf-core pipelines. For accounts and allocation requests, contact your lab administrator or hpchelp@hpc.mssm.edu.
149+
:::
150+
151+
:::note
152+
All jobs will be submitted to the cluster via LSF scheduler. For technical assistance with the HPC environment, contact hpchelp@hpc.mssm.edu.
153+
:::

nfcore_custom.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,9 @@ profiles {
277277
mpcdf_viper {
278278
includeConfig "${params.custom_config_base}/conf/mpcdf_viper.config"
279279
}
280+
mssm {
281+
includeConfig "${params.custom_config_base}/conf/mssm.config"
282+
}
280283
munin {
281284
includeConfig "${params.custom_config_base}/conf/munin.config"
282285
}

0 commit comments

Comments
 (0)