Merge branch 'master' into wustl_htcf

maxulysse · web-flow · commit 13fe5a576faf · 2026-04-15T16:50:54.000+02:00
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -12,3 +12,4 @@
 **/crg** @joseespinosa
 **/iris** @nikhil
 **/mahuika** @jen-reeve
+**/purdue_** @aseetharam
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -169,6 +169,9 @@ jobs:
           - "pdc_kth"
           - "phoenix"
           - "psmn"
+          - "purdue_bell"
+          - "purdue_gautschi"
+          - "purdue_negishi"
           - "qmul_apocrita"
           - "rki"
           - "rosalind"
diff --git a/conf/bi.config b/conf/bi.config
@@ -7,11 +7,9 @@ params {
     config_profile_description = 'Boehringer Ingelheim internal profile provided by nf-core/configs.'
     config_profile_contact = 'Alexander Peltzer (@apeltzer)'
     config_profile_url = 'https://www.boehringer-ingelheim.com/'
+    bi_globalConfig = System.getenv('NXF_GLOBAL_CONFIG') ?:
+        System.err.println("WARNING: For bi.config requires NXF_GLOBAL_CONFIG env var to be set. Point it to global.config file if you want to use this profile.")
 }
 
-params.bi_globalConfig = System.getenv('NXF_GLOBAL_CONFIG')
-if (params.bi_globalConfig == null) {
-    System.err.println("WARNING: For bi.config requires NXF_GLOBAL_CONFIG env var to be set. Point it to global.config file if you want to use this profile.")
-} else {
-    includeConfig params.bi_globalConfig
-}
+// Include the global config if set
+includeConfig(params.bi_globalConfig ?: '/dev/null')
diff --git a/conf/purdue_bell.config b/conf/purdue_bell.config
@@ -0,0 +1,64 @@
+// nf-core/configs: Purdue RCAC Bell cluster profile
+// Bell: AMD EPYC 7662 (Rome), 128 cores / 256 GB per cpu node; 1 TB highmem nodes
+// https://www.rcac.purdue.edu/compute/bell
+
+params {
+    config_profile_description = 'Purdue RCAC Bell cluster profile (CPU-only nf-core pipelines).'
+    config_profile_contact     = 'Arun Seetharam (@aseetharam)'
+    config_profile_url         = 'https://www.rcac.purdue.edu/compute/bell'
+
+    // Shared iGenomes mirror (identical path on Bell, Negishi, Gautschi)
+    igenomes_base = '/depot/itap/datasets/igenomes'
+
+    // REQUIRED. Run `slist` on Bell to list your accounts.
+    cluster_account = null
+
+    // Opt-in: route jobs that fit within standby limits (<= 4 h, <= 256 GB)
+    // through the 4 h standby QoS. Long or high-memory jobs stay on normal QoS.
+    use_standby = false
+}
+
+// Tell nf-core schema validation to ignore our custom params
+validation {
+    ignoreParams = ['cluster_account', 'use_standby']
+}
+
+process {
+    executor = 'slurm'
+
+    // Global ceiling: largest available node (highmem: 1 TB, 128 cores, 14 d).
+    // Covers all partitions; per-task routing below picks the right one.
+    resourceLimits = [
+        cpus  : 128,
+        memory: 1000.GB,
+        time  : 336.h
+    ]
+
+    // Dynamic partition routing:
+    //   highmem (1 TB, 24 h, >= 65 cores required by Slurm policy) when task.memory > 256 GB
+    //   cpu     (256 GB, 14 d)                                     otherwise
+    queue          = { task.memory > 256.GB ? 'highmem' : 'cpu' }
+    clusterOptions = {
+        if (!params.cluster_account) {
+            System.err.println("ERROR: purdue_bell profile requires --cluster_account=<slurm_account>.")
+            System.err.println("       Run 'slist' on a Bell login node to list your accounts.")
+            System.exit(1)
+        }
+        // standby QoS has a 4 h walltime cap and does not apply to highmem.
+        def standby = params.use_standby && task.memory <= 256.GB && task.time <= 4.h
+        "--account=${params.cluster_account}" + (standby ? ' --qos=standby' : '')
+    }
+}
+
+executor {
+    queueSize         = 50
+    pollInterval      = '30 sec'
+    queueStatInterval = '5 min'
+    submitRateLimit   = '10 sec'
+}
+
+apptainer {
+    enabled    = true
+    autoMounts = true
+    cacheDir   = "${System.getenv('RCAC_SCRATCH') ?: System.getenv('SCRATCH') ?: System.getProperty('user.home')}/.apptainer/cache"
+}
diff --git a/conf/purdue_gautschi.config b/conf/purdue_gautschi.config
@@ -0,0 +1,90 @@
+// nf-core/configs: Purdue RCAC Gautschi cluster profile
+// Gautschi: AMD EPYC 9654 (Genoa), 192 cores / 384 GB per cpu node; 1.5 TB highmem nodes;
+//           NVIDIA L40 (smallgpu) and H100 (ai) GPU partitions
+// https://www.rcac.purdue.edu/knowledge/gautschi
+
+params {
+    config_profile_description = 'Purdue RCAC Gautschi cluster profile (CPU + NVIDIA GPU nf-core pipelines).'
+    config_profile_contact     = 'Arun Seetharam (@aseetharam)'
+    config_profile_url         = 'https://www.rcac.purdue.edu/knowledge/gautschi'
+
+    // Shared iGenomes mirror (identical path on Bell, Negishi, Gautschi)
+    igenomes_base = '/depot/itap/datasets/igenomes'
+
+    // REQUIRED. Run `slist` on Gautschi to list your accounts.
+    cluster_account = null
+
+    // Opt-in: route CPU jobs that fit within standby limits (<= 4 h, <= 384 GB)
+    // through the 4 h standby QoS. Long, high-memory, and GPU jobs stay on normal QoS.
+    use_standby = false
+
+    // GPU partition for process_gpu label: 'smallgpu' (2x L40) or 'ai' (8x H100)
+    gpu_partition = 'smallgpu'
+}
+
+// Tell nf-core schema validation to ignore our custom params
+validation {
+    ignoreParams = ['cluster_account', 'use_standby', 'gpu_partition']
+}
+
+process {
+    executor = 'slurm'
+
+    // Global ceiling: largest available node (highmem: 1.5 TB, 192 cores, 14 d).
+    // Covers all partitions; per-task routing below picks the right one.
+    resourceLimits = [
+        cpus  : 192,
+        memory: 1500.GB,
+        time  : 336.h
+    ]
+
+    // Dynamic partition routing:
+    //   highmem (1.5 TB, 24 h, >= 49 cores required by Slurm policy) when task.memory > 384 GB
+    //   cpu     (384 GB, 14 d)                                       otherwise
+    queue          = { task.memory > 384.GB ? 'highmem' : 'cpu' }
+    clusterOptions = {
+        if (!params.cluster_account) {
+            System.err.println("ERROR: purdue_gautschi profile requires --cluster_account=<slurm_account>.")
+            System.err.println("       Run 'slist' on a Gautschi login node to list your accounts.")
+            System.exit(1)
+        }
+        // standby QoS has a 4 h walltime cap and does not apply to highmem or GPU partitions.
+        def standby = params.use_standby && task.memory <= 384.GB && task.time <= 4.h
+        "--account=${params.cluster_account}" + (standby ? ' --qos=standby' : '')
+    }
+
+    // GPU jobs. Default to smallgpu (L40); override with --gpu_partition=ai for H100.
+    // GPU count is derived from the task's `accelerator.request` so multi-GPU
+    // workflows (e.g. parabricks) work without profile changes.
+    withLabel: process_gpu {
+        queue = {
+            if (!(params.gpu_partition in ['smallgpu', 'ai'])) {
+                System.err.println("ERROR: purdue_gautschi params.gpu_partition must be 'smallgpu' or 'ai' (got '${params.gpu_partition}').")
+                System.exit(1)
+            }
+            params.gpu_partition
+        }
+        clusterOptions = {
+            if (!params.cluster_account) {
+                System.err.println("ERROR: purdue_gautschi profile requires --cluster_account=<slurm_account>.")
+                System.err.println("       Run 'slist' on a Gautschi login node to list your accounts.")
+                System.exit(1)
+            }
+            def gpus = task.accelerator?.request ?: 1
+            "--account=${params.cluster_account} --gres=gpu:${gpus}"
+        }
+    }
+}
+
+executor {
+    queueSize         = 50
+    pollInterval      = '30 sec'
+    queueStatInterval = '5 min'
+    submitRateLimit   = '10 sec'
+}
+
+apptainer {
+    enabled    = true
+    autoMounts = true
+    cacheDir   = "${System.getenv('RCAC_SCRATCH') ?: System.getenv('SCRATCH') ?: System.getProperty('user.home')}/.apptainer/cache"
+}
diff --git a/conf/purdue_negishi.config b/conf/purdue_negishi.config
@@ -0,0 +1,64 @@
+// nf-core/configs: Purdue RCAC Negishi cluster profile
+// Negishi: AMD EPYC 7763 (Milan), 128 cores / 256 GB per cpu node; 1 TB highmem nodes
+// https://www.rcac.purdue.edu/knowledge/negishi/gateway
+
+params {
+    config_profile_description = 'Purdue RCAC Negishi cluster profile (CPU-only nf-core pipelines).'
+    config_profile_contact     = 'Arun Seetharam (@aseetharam)'
+    config_profile_url         = 'https://www.rcac.purdue.edu/knowledge/negishi/gateway'
+
+    // Shared iGenomes mirror (identical path on Bell, Negishi, Gautschi)
+    igenomes_base = '/depot/itap/datasets/igenomes'
+
+    // REQUIRED. Run `slist` on Negishi to list your accounts.
+    cluster_account = null
+
+    // Opt-in: route jobs that fit within standby limits (<= 4 h, <= 256 GB)
+    // through the 4 h standby QoS. Long or high-memory jobs stay on normal QoS.
+    use_standby = false
+}
+
+// Tell nf-core schema validation to ignore our custom params
+validation {
+    ignoreParams = ['cluster_account', 'use_standby']
+}
+
+process {
+    executor = 'slurm'
+
+    // Global ceiling: largest available node (highmem: 1 TB, 128 cores, 14 d).
+    // Covers all partitions; per-task routing below picks the right one.
+    resourceLimits = [
+        cpus  : 128,
+        memory: 1000.GB,
+        time  : 336.h
+    ]
+
+    // Dynamic partition routing:
+    //   highmem (1 TB, 24 h, >= 65 cores required by Slurm policy) when task.memory > 256 GB
+    //   cpu     (256 GB, 14 d)                                     otherwise
+    queue          = { task.memory > 256.GB ? 'highmem' : 'cpu' }
+    clusterOptions = {
+        if (!params.cluster_account) {
+            System.err.println("ERROR: purdue_negishi profile requires --cluster_account=<slurm_account>.")
+            System.err.println("       Run 'slist' on a Negishi login node to list your accounts.")
+            System.exit(1)
+        }
+        // standby QoS has a 4 h walltime cap and does not apply to highmem.
+        def standby = params.use_standby && task.memory <= 256.GB && task.time <= 4.h
+        "--account=${params.cluster_account}" + (standby ? ' --qos=standby' : '')
+    }
+}
+
+executor {
+    queueSize         = 50
+    pollInterval      = '30 sec'
+    queueStatInterval = '5 min'
+    submitRateLimit   = '10 sec'
+}
+
+apptainer {
+    enabled    = true
+    autoMounts = true
+    cacheDir   = "${System.getenv('RCAC_SCRATCH') ?: System.getenv('SCRATCH') ?: System.getProperty('user.home')}/.apptainer/cache"
+}
diff --git a/docs/purdue_bell.md b/docs/purdue_bell.md
@@ -0,0 +1,71 @@
+# nf-core/configs: Purdue RCAC Bell
+
+The `purdue_bell` profile configures nf-core pipelines to run on the Bell cluster operated by the Rosen Center for Advanced Computing (RCAC) at Purdue University.
+
+Bell is an AMD EPYC 7662 (Rome) cluster with 128 cores and 256 GB RAM per standard node, plus 1 TB highmem nodes. See the [RCAC Bell user guide](https://www.rcac.purdue.edu/compute/bell) for hardware and policy details.
+
+## Prerequisites
+
+```bash
+module purge
+module load nextflow
+```
+
+The `nextflow` module pulls in a compatible JDK (`openjdk/17.0.2_8` is available on Bell). Apptainer is system-wide; `/usr/bin/singularity` is a symlink to `apptainer`.
+
+## Required parameter: `--cluster_account`
+
+RCAC Slurm jobs must specify an account. List yours with `slist`, then pass it to Nextflow:
+
+```bash
+slist
+
+nextflow run nf-core/<pipeline> \
+    -profile purdue_bell \
+    --cluster_account <your_account> \
+    --input samplesheet.csv \
+    --outdir results
+```
+
+The profile will refuse to submit jobs if `--cluster_account` is unset.
+
+## Partition routing
+
+The profile routes each task dynamically based on its memory request:
+
+| Memory request | Partition | Walltime cap | Notes                                                  |
+| -------------- | --------- | ------------ | ------------------------------------------------------ |
+| `<= 256 GB`    | `cpu`     | 14 d         | Default for most pipeline steps                        |
+| `> 256 GB`     | `highmem` | 24 h         | Slurm requires `>= 65 cores` per job on this partition |
+
+If a pipeline step requests more than 256 GB RAM but fewer than 65 cores, Slurm will reject the submission. Raise the step's CPU request in a pipeline-level config, or lower its memory request if the real need is below 256 GB.
+
+GPU partitions on Bell are AMD MI50 (`gpu`) and MI60 (`multigpu`), both ROCm-based. They are **not exposed** by this profile because nf-core GPU pipelines are CUDA-only.
+
+## Standby queue (optional)
+
+Bell offers a 4 h standby QoS with higher throughput for short jobs:
+
+```bash
+nextflow run ... -profile purdue_bell --use_standby true ...
+```
+
+Jobs are routed through standby only when they fit within the QoS limits (<= 4 h walltime, <= 256 GB memory). Longer or larger steps automatically fall back to the normal QoS.
+
+## Reference data
+
+A shared iGenomes mirror is mounted at `/depot/itap/datasets/igenomes` and the profile sets `params.igenomes_base` accordingly. Use the standard nf-core `--genome` keys (e.g. `--genome GRCh38`) in supported pipelines.
+
+To use your own reference instead, pass the relevant pipeline parameters explicitly (`--fasta`, `--gtf`, etc.).
+
+## Container cache and work directory
+
+```bash
+export NXF_SINGULARITY_CACHEDIR=$RCAC_SCRATCH/.apptainer/cache
+nextflow run ... -w $RCAC_SCRATCH/nextflow-work ...
+```
+
+## Contact
+
+- Arun Seetharam, [@aseetharam](https://github.com/aseetharam), <aseethar@purdue.edu>
+- [RCAC support](https://www.rcac.purdue.edu/about/contact)
diff --git a/docs/purdue_gautschi.md b/docs/purdue_gautschi.md
diff --git a/docs/purdue_negishi.md b/docs/purdue_negishi.md
diff --git a/nfcore_custom.config b/nfcore_custom.config