Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 8 additions & 34 deletions conf/sanger.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,40 +17,12 @@ process {
cpus = 1
memory = 6.Gb

// Currently a single set of rules for all clusters, but we could apply
// different rules to different clusters in their respective configs under ./sanger/
queue = {
if (task.time >= 15.day) {
if (task.memory > 680.GB) {
error "There is no queue for jobs that need >680 GB and >15 days"
} else {
return "basement"
}
} else if (task.memory > 720.GB) {
return "teramem"
} else if (task.memory > 350.GB) {
return "hugemem"
} else if (task.time > 7.day) {
return "basement"
} else if (task.time > 2.day) {
return "week"
} else if (task.time > 12.hour) {
return "long"
} else if (task.time > 1.min || !task.time) {
return "normal"
} else {
return "small"
}
}
clusterOptions = { task.accelerator ? "-gpu \"num=${task.accelerator.request}/host:mode=shared:j_exclusive=yes\"" : null }

withLabel: gpu {
clusterOptions = {
"-M "+task.memory.toMega()+" -R 'select[mem>="+task.memory.toMega()+"] rusage[mem="+task.memory.toMega()+"] span[ptile=1]' -gpu 'num=1:j_exclusive=yes'"
}
queue = { task.time > 12.h ? 'gpu-huge' : task.time > 48.h ? 'gpu-basement' : 'gpu-normal' }
containerOptions = {
containerOptions = {
if (task.accelerator) {
workflow.containerEngine == "singularity" ? '--containall --cleanenv --nv':
( workflow.containerEngine == "docker" ? '--gpus all': null )
( workflow.containerEngine == "docker" ? '--gpus all': null )
}
}
}
Expand All @@ -76,8 +48,10 @@ includeConfig ({

if (clustername == "tol22") {
return "sanger/tol22.config"
} else if (clustername == "farm22") {
return "sanger/farm22.config"
} else if (clustername == "farm22" || clustername == "casm22") {
return "sanger/cpu-farms22.config"
} else if (clustername == "tiger22" || clustername == "cub22") {
return "sanger/gpu-farms22.config"
} else {
return "/dev/null"
}
Expand Down
50 changes: 50 additions & 0 deletions conf/sanger/cpu-farms22.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// farm22 cluster at Wellcome Sanger Institute

params {
max_memory = 2.9.TB
max_cpus = 256
max_time = 43200.min // 30 days
}

process {
resourceLimits = [
memory: 2.9.TB,
cpus: 256,
time: 43200.min
]
}

singularity {
// Mount all filesystems by default
runOptions = '--bind /lustre --bind /nfs --bind /data --bind /software'
}

// Currently a single set of rules for all clusters, but we could apply
// different rules to different clusters in their respective configs under ./sanger/
queue = {
if (task.accelerator) {
return task.time > 12.h ? 'gpu-huge' : task.time > 48.h ? 'gpu-basement' : 'gpu-normal'
} else {
if (task.time >= 15.day) {
if (task.memory > 680.GB) {
error "There is no queue for jobs that need >680 GB and >15 days we suggest you use checkpointing"
} else {
return "basement"
}
} else if (task.memory > 720.GB) {
return "teramem"
} else if (task.memory > 350.GB) {
return "hugemem"
} else if (task.time > 7.day) {
return "basement"
} else if (task.time > 2.day) {
return "week"
} else if (task.time > 12.hour) {
return "long"
} else if (task.time > 1.min || !task.time) {
return "normal"
} else {
return "small"
}
}
}
20 changes: 0 additions & 20 deletions conf/sanger/farm22.config

This file was deleted.

31 changes: 31 additions & 0 deletions conf/sanger/gpu-farms22.config
Comment thread
mp15 marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// farm22 cluster at Wellcome Sanger Institute

params {
max_memory = 2.9.TB
max_cpus = 256
max_time = 43200.min // 30 days
}

process {
resourceLimits = [
memory: 2.9.TB,
cpus: 256,
time: 43200.min
]
}

singularity {
// Mount all filesystems by default
runOptions = '--bind /lustre --bind /nfs --bind /data --bind /software'
}

// Currently a single set of rules for all clusters, but we could apply
// different rules to different clusters in their respective configs under ./sanger/
queue = {
if (task.accelerator) {
return "inference"
} else {
// Temporary oversubscribed queue for CPU jobs until we assign more CPU resources to these clusters
return "oversubscribed"
}
}
30 changes: 30 additions & 0 deletions conf/sanger/tol22.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,33 @@ process {
time: 89280.min
]
}

// Currently a single set of rules for all clusters, but we could apply
// different rules to different clusters in their respective configs under ./sanger/
queue = {
if (task.accelerator) {
error "There is no queue for GPU jobs on tol22"
} else {
if (task.time >= 15.day) {
if (task.memory > 680.GB) {
error "There is no queue for jobs that need >680 GB and >15 days we suggest you use checkpointing"
} else {
return "basement"
}
} else if (task.memory > 720.GB) {
return "teramem"
} else if (task.memory > 350.GB) {
return "hugemem"
} else if (task.time > 7.day) {
return "basement"
} else if (task.time > 2.day) {
return "week"
} else if (task.time > 12.hour) {
return "long"
} else if (task.time > 1.min || !task.time) {
return "normal"
} else {
return "small"
}
}
}
Loading