Skip to content

Commit 13fe5a5

Browse files
authored
Merge branch 'master' into wustl_htcf
2 parents 9b79afa + 53b74a0 commit 13fe5a5

10 files changed

Lines changed: 457 additions & 6 deletions

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@
1212
**/crg** @joseespinosa
1313
**/iris** @nikhil
1414
**/mahuika** @jen-reeve
15+
**/purdue_** @aseetharam

.github/workflows/main.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,9 @@ jobs:
169169
- "pdc_kth"
170170
- "phoenix"
171171
- "psmn"
172+
- "purdue_bell"
173+
- "purdue_gautschi"
174+
- "purdue_negishi"
172175
- "qmul_apocrita"
173176
- "rki"
174177
- "rosalind"

conf/bi.config

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,9 @@ params {
77
config_profile_description = 'Boehringer Ingelheim internal profile provided by nf-core/configs.'
88
config_profile_contact = 'Alexander Peltzer (@apeltzer)'
99
config_profile_url = 'https://www.boehringer-ingelheim.com/'
10+
bi_globalConfig = System.getenv('NXF_GLOBAL_CONFIG') ?:
11+
System.err.println("WARNING: For bi.config requires NXF_GLOBAL_CONFIG env var to be set. Point it to global.config file if you want to use this profile.")
1012
}
1113

12-
params.bi_globalConfig = System.getenv('NXF_GLOBAL_CONFIG')
13-
if (params.bi_globalConfig == null) {
14-
System.err.println("WARNING: For bi.config requires NXF_GLOBAL_CONFIG env var to be set. Point it to global.config file if you want to use this profile.")
15-
} else {
16-
includeConfig params.bi_globalConfig
17-
}
14+
// Include the global config if set
15+
includeConfig(params.bi_globalConfig ?: '/dev/null')

conf/purdue_bell.config

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// nf-core/configs: Purdue RCAC Bell cluster profile
2+
// Bell: AMD EPYC 7662 (Rome), 128 cores / 256 GB per cpu node; 1 TB highmem nodes
3+
// https://www.rcac.purdue.edu/compute/bell
4+
5+
params {
6+
config_profile_description = 'Purdue RCAC Bell cluster profile (CPU-only nf-core pipelines).'
7+
config_profile_contact = 'Arun Seetharam (@aseetharam)'
8+
config_profile_url = 'https://www.rcac.purdue.edu/compute/bell'
9+
10+
// Shared iGenomes mirror (identical path on Bell, Negishi, Gautschi)
11+
igenomes_base = '/depot/itap/datasets/igenomes'
12+
13+
// REQUIRED. Run `slist` on Bell to list your accounts.
14+
cluster_account = null
15+
16+
// Opt-in: route jobs that fit within standby limits (<= 4 h, <= 256 GB)
17+
// through the 4 h standby QoS. Long or high-memory jobs stay on normal QoS.
18+
use_standby = false
19+
}
20+
21+
// Tell nf-core schema validation to ignore our custom params
22+
validation {
23+
ignoreParams = ['cluster_account', 'use_standby']
24+
}
25+
26+
process {
27+
executor = 'slurm'
28+
29+
// Global ceiling: largest available node (highmem: 1 TB, 128 cores, 14 d).
30+
// Covers all partitions; per-task routing below picks the right one.
31+
resourceLimits = [
32+
cpus : 128,
33+
memory: 1000.GB,
34+
time : 336.h
35+
]
36+
37+
// Dynamic partition routing:
38+
// highmem (1 TB, 24 h, >= 65 cores required by Slurm policy) when task.memory > 256 GB
39+
// cpu (256 GB, 14 d) otherwise
40+
queue = { task.memory > 256.GB ? 'highmem' : 'cpu' }
41+
clusterOptions = {
42+
if (!params.cluster_account) {
43+
System.err.println("ERROR: purdue_bell profile requires --cluster_account=<slurm_account>.")
44+
System.err.println(" Run 'slist' on a Bell login node to list your accounts.")
45+
System.exit(1)
46+
}
47+
// standby QoS has a 4 h walltime cap and does not apply to highmem.
48+
def standby = params.use_standby && task.memory <= 256.GB && task.time <= 4.h
49+
"--account=${params.cluster_account}" + (standby ? ' --qos=standby' : '')
50+
}
51+
}
52+
53+
executor {
54+
queueSize = 50
55+
pollInterval = '30 sec'
56+
queueStatInterval = '5 min'
57+
submitRateLimit = '10 sec'
58+
}
59+
60+
apptainer {
61+
enabled = true
62+
autoMounts = true
63+
cacheDir = "${System.getenv('RCAC_SCRATCH') ?: System.getenv('SCRATCH') ?: System.getProperty('user.home')}/.apptainer/cache"
64+
}

conf/purdue_gautschi.config

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// nf-core/configs: Purdue RCAC Gautschi cluster profile
2+
// Gautschi: AMD EPYC 9654 (Genoa), 192 cores / 384 GB per cpu node; 1.5 TB highmem nodes;
3+
// NVIDIA L40 (smallgpu) and H100 (ai) GPU partitions
4+
// https://www.rcac.purdue.edu/knowledge/gautschi
5+
6+
params {
7+
config_profile_description = 'Purdue RCAC Gautschi cluster profile (CPU + NVIDIA GPU nf-core pipelines).'
8+
config_profile_contact = 'Arun Seetharam (@aseetharam)'
9+
config_profile_url = 'https://www.rcac.purdue.edu/knowledge/gautschi'
10+
11+
// Shared iGenomes mirror (identical path on Bell, Negishi, Gautschi)
12+
igenomes_base = '/depot/itap/datasets/igenomes'
13+
14+
// REQUIRED. Run `slist` on Gautschi to list your accounts.
15+
cluster_account = null
16+
17+
// Opt-in: route CPU jobs that fit within standby limits (<= 4 h, <= 384 GB)
18+
// through the 4 h standby QoS. Long, high-memory, and GPU jobs stay on normal QoS.
19+
use_standby = false
20+
21+
// GPU partition for process_gpu label: 'smallgpu' (2x L40) or 'ai' (8x H100)
22+
gpu_partition = 'smallgpu'
23+
}
24+
25+
// Tell nf-core schema validation to ignore our custom params
26+
validation {
27+
ignoreParams = ['cluster_account', 'use_standby', 'gpu_partition']
28+
}
29+
30+
process {
31+
executor = 'slurm'
32+
33+
// Global ceiling: largest available node (highmem: 1.5 TB, 192 cores, 14 d).
34+
// Covers all partitions; per-task routing below picks the right one.
35+
resourceLimits = [
36+
cpus : 192,
37+
memory: 1500.GB,
38+
time : 336.h
39+
]
40+
41+
// Dynamic partition routing:
42+
// highmem (1.5 TB, 24 h, >= 49 cores required by Slurm policy) when task.memory > 384 GB
43+
// cpu (384 GB, 14 d) otherwise
44+
queue = { task.memory > 384.GB ? 'highmem' : 'cpu' }
45+
clusterOptions = {
46+
if (!params.cluster_account) {
47+
System.err.println("ERROR: purdue_gautschi profile requires --cluster_account=<slurm_account>.")
48+
System.err.println(" Run 'slist' on a Gautschi login node to list your accounts.")
49+
System.exit(1)
50+
}
51+
// standby QoS has a 4 h walltime cap and does not apply to highmem or GPU partitions.
52+
def standby = params.use_standby && task.memory <= 384.GB && task.time <= 4.h
53+
"--account=${params.cluster_account}" + (standby ? ' --qos=standby' : '')
54+
}
55+
56+
// GPU jobs. Default to smallgpu (L40); override with --gpu_partition=ai for H100.
57+
// GPU count is derived from the task's `accelerator.request` so multi-GPU
58+
// workflows (e.g. parabricks) work without profile changes.
59+
withLabel: process_gpu {
60+
queue = {
61+
if (!(params.gpu_partition in ['smallgpu', 'ai'])) {
62+
System.err.println("ERROR: purdue_gautschi params.gpu_partition must be 'smallgpu' or 'ai' (got '${params.gpu_partition}').")
63+
System.exit(1)
64+
}
65+
params.gpu_partition
66+
}
67+
clusterOptions = {
68+
if (!params.cluster_account) {
69+
System.err.println("ERROR: purdue_gautschi profile requires --cluster_account=<slurm_account>.")
70+
System.err.println(" Run 'slist' on a Gautschi login node to list your accounts.")
71+
System.exit(1)
72+
}
73+
def gpus = task.accelerator?.request ?: 1
74+
"--account=${params.cluster_account} --gres=gpu:${gpus}"
75+
}
76+
}
77+
}
78+
79+
executor {
80+
queueSize = 50
81+
pollInterval = '30 sec'
82+
queueStatInterval = '5 min'
83+
submitRateLimit = '10 sec'
84+
}
85+
86+
apptainer {
87+
enabled = true
88+
autoMounts = true
89+
cacheDir = "${System.getenv('RCAC_SCRATCH') ?: System.getenv('SCRATCH') ?: System.getProperty('user.home')}/.apptainer/cache"
90+
}

conf/purdue_negishi.config

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// nf-core/configs: Purdue RCAC Negishi cluster profile
2+
// Negishi: AMD EPYC 7763 (Milan), 128 cores / 256 GB per cpu node; 1 TB highmem nodes
3+
// https://www.rcac.purdue.edu/knowledge/negishi/gateway
4+
5+
params {
6+
config_profile_description = 'Purdue RCAC Negishi cluster profile (CPU-only nf-core pipelines).'
7+
config_profile_contact = 'Arun Seetharam (@aseetharam)'
8+
config_profile_url = 'https://www.rcac.purdue.edu/knowledge/negishi/gateway'
9+
10+
// Shared iGenomes mirror (identical path on Bell, Negishi, Gautschi)
11+
igenomes_base = '/depot/itap/datasets/igenomes'
12+
13+
// REQUIRED. Run `slist` on Negishi to list your accounts.
14+
cluster_account = null
15+
16+
// Opt-in: route jobs that fit within standby limits (<= 4 h, <= 256 GB)
17+
// through the 4 h standby QoS. Long or high-memory jobs stay on normal QoS.
18+
use_standby = false
19+
}
20+
21+
// Tell nf-core schema validation to ignore our custom params
22+
validation {
23+
ignoreParams = ['cluster_account', 'use_standby']
24+
}
25+
26+
process {
27+
executor = 'slurm'
28+
29+
// Global ceiling: largest available node (highmem: 1 TB, 128 cores, 14 d).
30+
// Covers all partitions; per-task routing below picks the right one.
31+
resourceLimits = [
32+
cpus : 128,
33+
memory: 1000.GB,
34+
time : 336.h
35+
]
36+
37+
// Dynamic partition routing:
38+
// highmem (1 TB, 24 h, >= 65 cores required by Slurm policy) when task.memory > 256 GB
39+
// cpu (256 GB, 14 d) otherwise
40+
queue = { task.memory > 256.GB ? 'highmem' : 'cpu' }
41+
clusterOptions = {
42+
if (!params.cluster_account) {
43+
System.err.println("ERROR: purdue_negishi profile requires --cluster_account=<slurm_account>.")
44+
System.err.println(" Run 'slist' on a Negishi login node to list your accounts.")
45+
System.exit(1)
46+
}
47+
// standby QoS has a 4 h walltime cap and does not apply to highmem.
48+
def standby = params.use_standby && task.memory <= 256.GB && task.time <= 4.h
49+
"--account=${params.cluster_account}" + (standby ? ' --qos=standby' : '')
50+
}
51+
}
52+
53+
executor {
54+
queueSize = 50
55+
pollInterval = '30 sec'
56+
queueStatInterval = '5 min'
57+
submitRateLimit = '10 sec'
58+
}
59+
60+
apptainer {
61+
enabled = true
62+
autoMounts = true
63+
cacheDir = "${System.getenv('RCAC_SCRATCH') ?: System.getenv('SCRATCH') ?: System.getProperty('user.home')}/.apptainer/cache"
64+
}

docs/purdue_bell.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# nf-core/configs: Purdue RCAC Bell
2+
3+
The `purdue_bell` profile configures nf-core pipelines to run on the Bell cluster operated by the Rosen Center for Advanced Computing (RCAC) at Purdue University.
4+
5+
Bell is an AMD EPYC 7662 (Rome) cluster with 128 cores and 256 GB RAM per standard node, plus 1 TB highmem nodes. See the [RCAC Bell user guide](https://www.rcac.purdue.edu/compute/bell) for hardware and policy details.
6+
7+
## Prerequisites
8+
9+
```bash
10+
module purge
11+
module load nextflow
12+
```
13+
14+
The `nextflow` module pulls in a compatible JDK (`openjdk/17.0.2_8` is available on Bell). Apptainer is system-wide; `/usr/bin/singularity` is a symlink to `apptainer`.
15+
16+
## Required parameter: `--cluster_account`
17+
18+
RCAC Slurm jobs must specify an account. List yours with `slist`, then pass it to Nextflow:
19+
20+
```bash
21+
slist
22+
23+
nextflow run nf-core/<pipeline> \
24+
-profile purdue_bell \
25+
--cluster_account <your_account> \
26+
--input samplesheet.csv \
27+
--outdir results
28+
```
29+
30+
The profile will refuse to submit jobs if `--cluster_account` is unset.
31+
32+
## Partition routing
33+
34+
The profile routes each task dynamically based on its memory request:
35+
36+
| Memory request | Partition | Walltime cap | Notes |
37+
| -------------- | --------- | ------------ | ------------------------------------------------------ |
38+
| `<= 256 GB` | `cpu` | 14 d | Default for most pipeline steps |
39+
| `> 256 GB` | `highmem` | 24 h | Slurm requires `>= 65 cores` per job on this partition |
40+
41+
If a pipeline step requests more than 256 GB RAM but fewer than 65 cores, Slurm will reject the submission. Raise the step's CPU request in a pipeline-level config, or lower its memory request if the real need is below 256 GB.
42+
43+
GPU partitions on Bell are AMD MI50 (`gpu`) and MI60 (`multigpu`), both ROCm-based. They are **not exposed** by this profile because nf-core GPU pipelines are CUDA-only.
44+
45+
## Standby queue (optional)
46+
47+
Bell offers a 4 h standby QoS with higher throughput for short jobs:
48+
49+
```bash
50+
nextflow run ... -profile purdue_bell --use_standby true ...
51+
```
52+
53+
Jobs are routed through standby only when they fit within the QoS limits (<= 4 h walltime, <= 256 GB memory). Longer or larger steps automatically fall back to the normal QoS.
54+
55+
## Reference data
56+
57+
A shared iGenomes mirror is mounted at `/depot/itap/datasets/igenomes` and the profile sets `params.igenomes_base` accordingly. Use the standard nf-core `--genome` keys (e.g. `--genome GRCh38`) in supported pipelines.
58+
59+
To use your own reference instead, pass the relevant pipeline parameters explicitly (`--fasta`, `--gtf`, etc.).
60+
61+
## Container cache and work directory
62+
63+
```bash
64+
export NXF_SINGULARITY_CACHEDIR=$RCAC_SCRATCH/.apptainer/cache
65+
nextflow run ... -w $RCAC_SCRATCH/nextflow-work ...
66+
```
67+
68+
## Contact
69+
70+
- Arun Seetharam, [@aseetharam](https://github.com/aseetharam), <aseethar@purdue.edu>
71+
- [RCAC support](https://www.rcac.purdue.edu/about/contact)

0 commit comments

Comments
 (0)