Skip to content

Commit c65e0fb

Browse files
pinin4fjordsclaude
andcommitted
Stage module and project bin scripts as task input files [ci fast]
Replace the bind-mount mechanism for module binaries and project bin/ scripts with input file staging. Bin scripts are staged into a hidden .bin/ directory in each task's work directory and made available via PATH. On local/HPC executors, scripts are symlinked (zero-cost). On cloud executors, scripts are uploaded once to {workDir}/.nextflow/bin/ and staged per-task via standard cloud download commands. Project bin scripts are filtered to only those referenced in the task script, avoiding unnecessary staging for large bin/ directories. The nextflow.enable.moduleBinaries feature flag is deprecated with a warning. Setting it to false is honored for one release cycle. Signed-off-by: Jonathan Manning <jonathan.manning@seqera.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Jonathan Manning <jonathan.manning@seqera.io>
1 parent 3d2e4c4 commit c65e0fb

18 files changed

Lines changed: 350 additions & 74 deletions

File tree

docs/module.md

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,13 @@ baseDir
253253

254254
Modules can define binary scripts that are locally scoped to the processes defined by the tasks.
255255

256-
To enable this feature, set the following flag in your pipeline script or configuration file:
256+
The binary scripts can be placed in any of the following directories within the module:
257257

258-
```nextflow
259-
nextflow.enable.moduleBinaries = true
260-
```
258+
- `<module-dir>/resources/bin`
259+
- `<module-dir>/resources/usr/bin`
260+
- `<module-dir>/resources/usr/local/bin`
261261

262-
The binary scripts must be placed in the module directory named `<module-dir>/resources/usr/bin`:
262+
For example:
263263

264264
```
265265
<module-dir>
@@ -271,11 +271,7 @@ The binary scripts must be placed in the module directory named `<module-dir>/re
271271
└── another-module-script2.py
272272
```
273273

274-
Those scripts will be made accessible like any other command in the task environment, provided they have been granted the Linux execute permissions.
275-
276-
:::{note}
277-
This feature requires the use of a local or shared file system for the pipeline work directory, or {ref}`wave-page` when using cloud-based executors.
278-
:::
274+
Those scripts will be made accessible like any other command in the task environment, provided they have been granted the Linux execute permissions. Module binaries work on all executors, including cloud-based executors.
279275

280276
## Sharing modules
281277

docs/reference/feature-flags.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ Feature flags with the `nextflow.preview` prefix can cause pipelines run with ne
2020
: Defines the DSL version to use (`1` or `2`).
2121

2222
`nextflow.enable.moduleBinaries`
23-
: When `true`, enables the use of modules with binary scripts. See {ref}`module-binaries` for more information.
23+
: :::{deprecated} 25.04.0
24+
:::
25+
: Module binaries are now enabled by default. This flag is no longer required. See {ref}`module-binaries` for more information.
2426

2527
`nextflow.enable.strict`
2628
: :::{deprecated} 26.04.0

modules/nextflow/src/main/groovy/nextflow/NF.groovy

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ class NF {
6262
NextflowMeta.instance.isModuleBinariesEnabled()
6363
}
6464

65+
static boolean isModuleBinariesDisabled() {
66+
NextflowMeta.instance.isModuleBinariesDisabled()
67+
}
68+
6569
static boolean isRecurseEnabled() {
6670
NextflowMeta.instance.preview.recursion
6771
}

modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,12 @@ class NextflowMeta {
124124
return enable.moduleBinaries
125125
}
126126

127+
boolean moduleBinariesDisabled
128+
127129
void moduleBinaries(boolean mode) {
128130
enable.moduleBinaries = mode
131+
if( !mode )
132+
moduleBinariesDisabled = true
129133
}
130134

131135
}

modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -486,11 +486,17 @@ class CmdRun extends CmdBase implements HubOptions {
486486
}
487487

488488
static void detectModuleBinaryFeature(ConfigMap config) {
489-
final moduleBinaries = config.navigate('nextflow.enable.moduleBinaries', false)
489+
final moduleBinaries = config.navigate('nextflow.enable.moduleBinaries')
490+
if( moduleBinaries == null )
491+
return
490492
if( moduleBinaries ) {
491-
log.debug "Enabling module binaries"
493+
log.warn "Configuration `nextflow.enable.moduleBinaries` is no longer needed -- module binaries are now enabled by default"
492494
NextflowMeta.instance.moduleBinaries(true)
493495
}
496+
else {
497+
log.warn "Configuration `nextflow.enable.moduleBinaries = false` is deprecated and will be ignored in a future version -- module binaries are now always enabled"
498+
NextflowMeta.instance.moduleBinaries(false)
499+
}
494500
}
495501

496502
static void detectStrictFeature(ConfigMap config, Map sysEnv) {

modules/nextflow/src/main/groovy/nextflow/executor/BashWrapperBuilder.groovy

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ class BashWrapperBuilder {
391391
binding.trace_cmd = getTraceCommand(interpreter)
392392
binding.launch_cmd = getLaunchCommand(interpreter,env)
393393
binding.stage_cmd = getStageCommand()
394+
binding.module_bin_path = binFilesStaged ? getBinPathScript() : null
394395
binding.unstage_cmd = getUnstageCommand()
395396
binding.unstage_controls = changeDir || shouldUnstageControls() ? getUnstageControls() : null
396397

@@ -700,9 +701,6 @@ class BashWrapperBuilder {
700701
if( stageInMode != 'copy' && allowContainerMounts )
701702
builder.addMountForInputs(inputFiles)
702703

703-
if( allowContainerMounts )
704-
builder.addMounts(binDirs)
705-
706704
if(this.containerMount)
707705
builder.addMount(containerMount)
708706

@@ -775,6 +773,12 @@ class BashWrapperBuilder {
775773
p != -1 ? "nxf_module_load ${name.substring(0,p)} ${name.substring(p+1)}" : "nxf_module_load ${name}"
776774
}
777775

776+
protected String getBinPathScript() {
777+
final binDir = TaskRun.BIN_DIR
778+
"chmod +x ${binDir}/*\n" +
779+
"export PATH=\"\$PWD/${binDir}:\$PATH\""
780+
}
781+
778782
protected String getStageCommand() { 'nxf_stage' }
779783

780784
protected String getUnstageCommand() { 'nxf_unstage' }

modules/nextflow/src/main/groovy/nextflow/processor/TaskBean.groovy

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import java.nio.file.Path
2020

2121
import groovy.transform.CompileStatic
2222
import groovy.transform.PackageScope
23+
import groovy.util.logging.Slf4j
2324
import nextflow.container.ContainerConfig
2425
import nextflow.executor.BashWrapperBuilder
2526
import nextflow.executor.TaskArrayExecutor
@@ -30,6 +31,7 @@ import nextflow.util.MemoryUnit
3031
*
3132
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
3233
*/
34+
@Slf4j
3335
@CompileStatic
3436
class TaskBean implements Serializable, Cloneable {
3537

@@ -117,6 +119,8 @@ class TaskBean implements Serializable, Cloneable {
117119

118120
Boolean stageFileEnabled
119121

122+
boolean binFilesStaged
123+
120124
@PackageScope
121125
TaskBean() {
122126
shell = BashWrapperBuilder.BASH
@@ -166,6 +170,17 @@ class TaskBean implements Serializable, Cloneable {
166170
this.statsEnabled = task.getProcessor().getSession().statsEnabled
167171

168172
this.inputFiles = task.getInputFilesMap()
173+
final moduleBinFiles = task.getProcessor().getModuleBinFiles() ?: Collections.<String,Path>emptyMap()
174+
final projectBinFiles = task.getProcessor().getReferencedProjectBinFiles(task.source) ?: Collections.<String,Path>emptyMap()
175+
this.binFilesStaged = !moduleBinFiles.isEmpty() || !projectBinFiles.isEmpty()
176+
if( binFilesStaged ) {
177+
this.inputFiles.putAll(moduleBinFiles)
178+
for( Map.Entry<String,Path> e : projectBinFiles ) {
179+
if( moduleBinFiles.containsKey(e.key) )
180+
log.warn "Project bin script '${e.key.substring(TaskRun.BIN_DIR.length() + 1)}' overrides module bin script with the same name"
181+
this.inputFiles.put(e.key, e.value)
182+
}
183+
}
169184
this.outputFiles = task.getOutputFilesNames()
170185
this.binDirs = task.getProcessor().getBinDirs()
171186
this.stageInMode = task.config.getStageInMode()

modules/nextflow/src/main/groovy/nextflow/processor/TaskHasher.groovy

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ class TaskHasher {
9696
keys.addAll(binEntries)
9797
}
9898

99+
final moduleBinFiles = processor.getModuleBinFiles()
100+
if( moduleBinFiles ) {
101+
log.trace "Task: ${task.processor.name} > Adding module bin files: ${-> moduleBinFiles.values().join('; ')}"
102+
keys.addAll(moduleBinFiles.values())
103+
}
104+
99105
// add environment modules (`module` directive)
100106
final modules = task.getConfig().getModule()
101107
if( modules ) {
@@ -212,7 +218,7 @@ class TaskHasher {
212218
@Memoized
213219
List<Path> getTaskBinEntries(String script) {
214220
List<Path> result = []
215-
final tokenizer = new StringTokenizer(script, " \t\n\r\f()[]{};&|<>`")
221+
final tokenizer = new StringTokenizer(script, TaskProcessor.SCRIPT_TOKEN_DELIMITERS)
216222
while( tokenizer.hasMoreTokens() ) {
217223
final token = tokenizer.nextToken()
218224
final path = session.binEntries.get(token)

modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy

Lines changed: 109 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import static nextflow.processor.ErrorStrategy.*
1919

2020
import java.nio.file.FileSystems
2121
import java.nio.file.Path
22+
import java.util.concurrent.ConcurrentHashMap
2223
import java.util.concurrent.atomic.AtomicBoolean
2324
import java.util.concurrent.atomic.AtomicInteger
2425
import java.util.concurrent.atomic.AtomicIntegerArray
@@ -64,6 +65,7 @@ import nextflow.executor.Executor
6465
import nextflow.executor.StoredTaskHandler
6566
import nextflow.extension.CH
6667
import nextflow.extension.DataflowHelper
68+
import nextflow.extension.FilesEx
6769
import nextflow.file.FileHelper
6870
import nextflow.file.FileHolder
6971
import nextflow.file.FilePorter
@@ -1568,16 +1570,114 @@ class TaskProcessor {
15681570
return meta?.isModule() ? meta.getModuleBundle() : null
15691571
}
15701572

1571-
@Memoized
1573+
/**
1574+
* @deprecated Bin dirs are no longer bind-mounted. Bin scripts are now staged
1575+
* as input files under {@link TaskRun#BIN_DIR}. Kept for backward compatibility
1576+
* with plugins (e.g. nf-k8s) that reference this method.
1577+
*/
1578+
@Deprecated
15721579
protected List<Path> getBinDirs() {
1573-
final result = new ArrayList(10)
1574-
// module bundle bin dir have priority, add before
1575-
final bundle = session.enableModuleBinaries() ? getModuleBundle() : null
1576-
if( bundle!=null )
1577-
result.addAll(bundle.getBinDirs())
1578-
// then add project bin dir
1579-
if( executor.binDir )
1580-
result.add(executor.binDir)
1580+
return Collections.<Path>emptyList()
1581+
}
1582+
1583+
/**
1584+
* Collect module bin files to be staged into the task work directory.
1585+
* These are scripts from the module's resources/bin/ directory that will
1586+
* be staged as input files under {@link TaskRun#BIN_DIR} and made available via PATH.
1587+
*
1588+
* @return A map of staging name to file path, e.g. {@code '.bin/myscript.sh' -> /path/to/file}
1589+
*/
1590+
@Memoized
1591+
Map<String,Path> getModuleBinFiles() {
1592+
if( NF.isModuleBinariesDisabled() )
1593+
return Collections.<String,Path>emptyMap()
1594+
final bundle = getModuleBundle()
1595+
if( bundle == null )
1596+
return Collections.<String,Path>emptyMap()
1597+
final rawFiles = bundle.getBinFiles()
1598+
if( rawFiles.isEmpty() )
1599+
return Collections.<String,Path>emptyMap()
1600+
final files = isLocalWorkDir() ? rawFiles : uploadBinFiles(rawFiles)
1601+
return prefixBinFiles(files)
1602+
}
1603+
1604+
/**
1605+
* Collect project-level bin files to be staged into the task work directory.
1606+
* These are scripts from the project's bin/ directory that will be staged
1607+
* as input files under {@link TaskRun#BIN_DIR} and made available via PATH.
1608+
*
1609+
* @return A map of staging name to file path, e.g. {@code '.bin/myscript.sh' -> /path/to/file}
1610+
*/
1611+
@Memoized
1612+
Map<String,Path> getProjectBinFiles() {
1613+
final entries = session.binEntries
1614+
if( !entries )
1615+
return Collections.<String,Path>emptyMap()
1616+
final files = isLocalWorkDir() ? entries : uploadBinFiles(entries)
1617+
return prefixBinFiles(files)
1618+
}
1619+
1620+
static final String SCRIPT_TOKEN_DELIMITERS = " \t\n\r\f()[]{};&|<>`"
1621+
1622+
/**
1623+
* Filter project bin files to only those referenced in the given script.
1624+
* Memoized so tokenization runs once per unique script source across all tasks in a process.
1625+
*/
1626+
@Memoized
1627+
Map<String,Path> getReferencedProjectBinFiles(String script) {
1628+
final allBinFiles = getProjectBinFiles()
1629+
if( !allBinFiles || !script )
1630+
return Collections.<String,Path>emptyMap()
1631+
final referenced = new LinkedHashMap<String,Path>(allBinFiles.size())
1632+
final tokenizer = new StringTokenizer(script, SCRIPT_TOKEN_DELIMITERS)
1633+
while( tokenizer.hasMoreTokens() ) {
1634+
final key = TaskRun.BIN_DIR + '/' + tokenizer.nextToken()
1635+
final path = allBinFiles.get(key)
1636+
if( path )
1637+
referenced.put(key, path)
1638+
}
1639+
return referenced
1640+
}
1641+
1642+
/**
1643+
* Prefix all keys in the given map with the bin staging directory name.
1644+
*/
1645+
private static Map<String,Path> prefixBinFiles(Map<String,Path> files) {
1646+
if( files.isEmpty() )
1647+
return Collections.<String,Path>emptyMap()
1648+
final prefix = TaskRun.BIN_DIR + '/'
1649+
final result = new LinkedHashMap<String,Path>(files.size())
1650+
for( Map.Entry<String,Path> e : files ) {
1651+
result.put(prefix + e.key, e.value)
1652+
}
1653+
return result
1654+
}
1655+
1656+
private static final ConcurrentHashMap<Path,Path> uploadedBinFiles = new ConcurrentHashMap<>()
1657+
1658+
@TestOnly
1659+
static void resetBinFileUploadCache() {
1660+
uploadedBinFiles.clear()
1661+
}
1662+
1663+
/**
1664+
* Upload bin files to cloud storage so they can be staged by cloud copy strategies.
1665+
* Files are uploaded to {@code {workDir}/.nextflow/bin/}. Uses a shared cache
1666+
* so that multiple processors sharing the same work directory upload each file only once.
1667+
*/
1668+
@PackageScope
1669+
Map<String,Path> uploadBinFiles(Map<String,Path> files) {
1670+
final stageDir = executor.workDir.resolve('.nextflow/bin')
1671+
FilesEx.mkdirs(stageDir)
1672+
final result = new LinkedHashMap<String,Path>(files.size())
1673+
for( Map.Entry<String,Path> e : files ) {
1674+
final target = stageDir.resolve(e.key)
1675+
final uploaded = uploadedBinFiles.putIfAbsent(e.value, target)
1676+
if( uploaded == null ) {
1677+
FileHelper.copyPath(e.value, target)
1678+
}
1679+
result.put(e.key, target)
1680+
}
15811681
return result
15821682
}
15831683

@@ -1604,24 +1704,6 @@ class TaskProcessor {
16041704
log.debug "Invalid 'session.config.env' object: ${session.config.env?.class?.name}"
16051705
}
16061706

1607-
// append the 'bin' folder to the task environment
1608-
List<Path> paths
1609-
if( isLocalWorkDir() && (paths=getBinDirs()) ) {
1610-
for( Path it : paths ) {
1611-
if( result.containsKey('PATH') ) {
1612-
// note: do not escape potential blanks in the bin path because the PATH
1613-
// variable is enclosed in `"` when in rendered in the launcher script -- see #630
1614-
result['PATH'] = "${result['PATH']}:${it}".toString()
1615-
}
1616-
else {
1617-
// note: append custom bin path *after* the system PATH
1618-
// to prevent unnecessary network round-trip for each command
1619-
// when the added path is a shared file system directory
1620-
result['PATH'] = "\$PATH:${it}".toString()
1621-
}
1622-
}
1623-
}
1624-
16251707
return Collections.unmodifiableMap(result)
16261708
}
16271709

modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,11 @@ class TaskRun implements Cloneable {
562562
static final public String CMD_TRACE = '.command.trace'
563563
static final public String CMD_ENV = '.command.env'
564564

565+
/**
566+
* The directory name used for staging bin scripts in the task work directory
567+
*/
568+
static final public String BIN_DIR = '.bin'
569+
565570

566571
String toString( ) {
567572
"id: $id; name: $name; type: $type; exit: ${exitStatus==Integer.MAX_VALUE ? '-' : exitStatus}; error: $error; workDir: $workDir"

0 commit comments

Comments
 (0)