Skip to content

Commit 5712745

Browse files
committed
Validate module spec against JSON schema
Add formal JSON Schema validation for `nextflow module validate`, backed by the upstream Nextflow module schema. Schema validation runs between structure and Nextflow-specific spec checks, and overlapping hand-coded checks (name/description required, per-param type/description required, TODO type placeholder) are removed in favour of the schema as single source of truth. A `--schema` flag accepts a remote URL, a `file:` URI, or a local path to override the default schema location; load failures abort with a clear error. Signed-off-by: Paolo Di Tommaso <paolo.ditommaso@gmail.com>
1 parent d30e48d commit 5712745

9 files changed

Lines changed: 348 additions & 29 deletions

File tree

modules/nextflow/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ dependencies {
7373
api 'org.apache.commons:commons-compress:1.27.1' // For tar.gz extraction
7474
api 'io.seqera:npr-api:0.22.0'
7575
api 'io.seqera:npr-client:0.22.0'
76+
api 'com.networknt:json-schema-validator:1.5.6'
7677

7778
testImplementation 'org.subethamail:subethasmtp:3.1.7'
7879
testImplementation (project(':nf-lineage'))

modules/nextflow/src/main/groovy/nextflow/cli/module/CmdModuleValidate.groovy

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import groovy.util.logging.Slf4j
2626
import nextflow.cli.CmdBase
2727
import nextflow.exception.AbortOperationException
2828
import nextflow.module.ModuleReference
29+
import nextflow.module.ModuleSchemaValidator
2930
import nextflow.module.ModuleStorage
3031
import nextflow.module.ModuleValidator
3132
import nextflow.util.TestOnly
@@ -43,6 +44,9 @@ class CmdModuleValidate extends CmdBase {
4344
@Parameter(description = "[namespace/name or path]", required = true)
4445
List<String> args
4546

47+
@Parameter(names = '--schema', description = 'URL or local path of the JSON schema used to validate meta.yml')
48+
String schema
49+
4650
@TestOnly
4751
protected Path root
4852

@@ -57,7 +61,8 @@ class CmdModuleValidate extends CmdBase {
5761
throw new AbortOperationException("Incorrect number of arguments -- usage: nextflow module validate <namespace/name>")
5862

5963
final moduleDir = determineModuleDir(args[0])
60-
final errors = ModuleValidator.validate(moduleDir)
64+
final schemaLocation = schema ?: ModuleSchemaValidator.DEFAULT_SCHEMA_URL
65+
final errors = ModuleValidator.validate(moduleDir, schemaLocation)
6166

6267
if( errors ) {
6368
throw new AbortOperationException(
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* Copyright 2013-2026, Seqera Labs
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package nextflow.module
18+
19+
import java.nio.file.Files
20+
import java.nio.file.Path
21+
import java.nio.file.Paths
22+
23+
import com.fasterxml.jackson.databind.JsonNode
24+
import com.fasterxml.jackson.databind.ObjectMapper
25+
import com.networknt.schema.JsonSchema
26+
import com.networknt.schema.JsonSchemaFactory
27+
import com.networknt.schema.SpecVersion
28+
import com.networknt.schema.ValidationMessage
29+
import groovy.transform.CompileStatic
30+
import groovy.util.logging.Slf4j
31+
import nextflow.exception.AbortOperationException
32+
import org.yaml.snakeyaml.Yaml
33+
34+
/**
35+
* Validates a module spec (meta.yml) against the Nextflow module JSON schema.
36+
*
37+
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
38+
*/
39+
@Slf4j
40+
@CompileStatic
41+
class ModuleSchemaValidator {
42+
43+
static final String DEFAULT_SCHEMA_URL =
44+
'https://raw.githubusercontent.com/nextflow-io/schemas/refs/heads/main/module/v1/schema.json'
45+
46+
private static final ObjectMapper JSON_MAPPER = new ObjectMapper()
47+
48+
/**
49+
* Validate a meta.yml file against the JSON schema located at the given
50+
* URL or local file path.
51+
*
52+
* @param metaYaml Path to the meta.yml file to validate
53+
* @param schemaLocation URL (http/https), file: URI, or local file path of the schema
54+
* @return List of validation error messages, empty if the spec is valid
55+
*/
56+
static List<String> validate(Path metaYaml, String schemaLocation) {
57+
final schemaText = loadSchema(schemaLocation)
58+
final factory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V202012)
59+
final JsonSchema schema
60+
try {
61+
schema = factory.getSchema(schemaText)
62+
}
63+
catch( Exception e ) {
64+
throw new AbortOperationException("Invalid module schema at '${schemaLocation}': ${e.message}", e)
65+
}
66+
67+
Object yamlData
68+
try( final stream = Files.newInputStream(metaYaml) ) {
69+
yamlData = new Yaml().load(stream)
70+
}
71+
catch( Exception e ) {
72+
throw new AbortOperationException("Failed to read module spec '${metaYaml}': ${e.message}", e)
73+
}
74+
75+
final JsonNode node = JSON_MAPPER.valueToTree(yamlData)
76+
final Set<ValidationMessage> messages = schema.validate(node)
77+
return messages.collect { it.message }.toList()
78+
}
79+
80+
static List<String> validate(Path metaYaml) {
81+
return validate(metaYaml, DEFAULT_SCHEMA_URL)
82+
}
83+
84+
/**
85+
* Load the JSON schema text from a remote URL, file: URI, or local file path.
86+
* Hard-fails with AbortOperationException on any I/O error.
87+
*/
88+
private static String loadSchema(String location) {
89+
try {
90+
if( location.startsWith('http://') || location.startsWith('https://') ) {
91+
final url = new URL(location)
92+
final conn = url.openConnection()
93+
conn.setConnectTimeout(10_000)
94+
conn.setReadTimeout(20_000)
95+
try( final stream = conn.getInputStream() ) {
96+
return new String(stream.readAllBytes(), 'UTF-8')
97+
}
98+
}
99+
if( location.startsWith('file:') ) {
100+
return Files.readString(Paths.get(URI.create(location)))
101+
}
102+
return Files.readString(Paths.get(location))
103+
}
104+
catch( Exception e ) {
105+
throw new AbortOperationException(
106+
"Failed to load module schema from '${location}': ${e.message}. " +
107+
"Pass --schema <url-or-local-path> to override.", e)
108+
}
109+
}
110+
}

modules/nextflow/src/main/groovy/nextflow/module/ModuleSpec.groovy

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,22 +71,17 @@ class ModuleSpec {
7171
Map<String, Object> _passthrough
7272

7373
/**
74-
* Validate the module spec for required fields
74+
* Validate Nextflow-specific module spec rules that are not expressed by
75+
* the JSON schema (see ModuleSchemaValidator).
7576
*
7677
* @return List of validation errors (empty if valid)
7778
*/
7879
List<String> validate() {
7980
final List<String> errors = []
8081

81-
if( !name )
82-
errors << "Missing required field: name"
83-
8482
if( !version )
8583
errors << "Missing required field: version"
8684

87-
if( !description )
88-
errors << "Missing required field: description"
89-
9085
if( !license )
9186
errors << "Missing required field: license"
9287

@@ -123,11 +118,8 @@ class ModuleSpec {
123118
return
124119
}
125120

126-
if( !param.type || param.type == TODO_TYPE )
127-
errors << "Missing type for ${name}${param.name ? " ($param.name)" : ''}".toString()
128-
129-
if( !param.description || param.description == TODO_DESCRIPTION )
130-
errors << "Missing description for ${name}${param.name ? " ($param.name)" : ''}".toString()
121+
if( param.description == TODO_DESCRIPTION )
122+
errors << "Placeholder description for ${name}${param.name ? " ($param.name)" : ''}".toString()
131123
}
132124

133125
/**

modules/nextflow/src/main/groovy/nextflow/module/ModuleValidator.groovy

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,23 @@ class ModuleValidator {
3737
* An empty list means the module is valid.
3838
*
3939
* @param moduleDir
40+
* @param schemaLocation URL or local path of the JSON schema used to validate meta.yml
4041
*/
41-
static List<String> validate(Path moduleDir) {
42+
static List<String> validate(Path moduleDir, String schemaLocation) {
4243
final errors = new ArrayList<String>()
4344

4445
// Level 1: validate module structure
4546
errors.addAll(validateStructure(moduleDir))
4647
if( errors )
4748
return errors // can't proceed without required files
4849

49-
// Level 2: validate module spec (meta.yml)
50+
// Level 2a: validate module spec (meta.yml) against the JSON schema
5051
final manifestPath = moduleDir.resolve(ModuleStorage.MODULE_MANIFEST_FILE)
52+
errors.addAll(ModuleSchemaValidator.validate(manifestPath, schemaLocation))
53+
if( errors )
54+
return errors
55+
56+
// Level 2b: validate Nextflow-specific rules not expressed by the schema
5157
final spec = ModuleSpecFactory.fromYaml(manifestPath)
5258
errors.addAll(spec.validate())
5359
if( errors )
@@ -61,6 +67,10 @@ class ModuleValidator {
6167
return errors
6268
}
6369

70+
static List<String> validate(Path moduleDir) {
71+
return validate(moduleDir, ModuleSchemaValidator.DEFAULT_SCHEMA_URL)
72+
}
73+
6474
/**
6575
* Check that required files exist.
6676
*

modules/nextflow/src/test/groovy/nextflow/cli/module/CmdModuleValidateTest.groovy

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,42 @@ class CmdModuleValidateTest extends Specification {
3232
@TempDir
3333
Path tempDir
3434

35+
private static final String SCHEMA_JSON = '''\
36+
{
37+
"$schema": "https://json-schema.org/draft/2020-12/schema",
38+
"type": "object",
39+
"properties": {
40+
"name": { "type": "string" },
41+
"version": { "type": "string" },
42+
"description": { "type": "string" },
43+
"license": { "type": "string" },
44+
"input": {
45+
"type": "array",
46+
"items": {
47+
"type": "object",
48+
"properties": {
49+
"name": { "type": "string" },
50+
"type": {
51+
"type": "string",
52+
"enum": ["boolean", "float", "integer", "string", "list", "map", "file", "directory"]
53+
},
54+
"description": { "type": "string" }
55+
},
56+
"required": ["type", "description"]
57+
}
58+
}
59+
},
60+
"required": ["name", "description"]
61+
}
62+
'''.stripIndent()
63+
64+
private Path schemaPath() {
65+
final p = tempDir.resolve('schema.json')
66+
if( !Files.exists(p) )
67+
Files.writeString(p, SCHEMA_JSON)
68+
return p
69+
}
70+
3571
private Path createValidModule(String namespace='myorg', String name='hello') {
3672
def moduleDir = tempDir.resolve("modules/$namespace/$name")
3773
Files.createDirectories(moduleDir)
@@ -73,7 +109,7 @@ class CmdModuleValidateTest extends Specification {
73109
def moduleDir = createValidModule()
74110

75111
when:
76-
def errors = ModuleValidator.validate(moduleDir)
112+
def errors = ModuleValidator.validate(moduleDir, schemaPath().toString())
77113

78114
then:
79115
errors.isEmpty()
@@ -85,7 +121,7 @@ class CmdModuleValidateTest extends Specification {
85121
Files.delete(moduleDir.resolve('main.nf'))
86122

87123
when:
88-
def errors = ModuleValidator.validate(moduleDir)
124+
def errors = ModuleValidator.validate(moduleDir, schemaPath().toString())
89125

90126
then:
91127
errors.any { it.contains('main.nf') }
@@ -97,7 +133,7 @@ class CmdModuleValidateTest extends Specification {
97133
Files.delete(moduleDir.resolve('meta.yml'))
98134

99135
when:
100-
def errors = ModuleValidator.validate(moduleDir)
136+
def errors = ModuleValidator.validate(moduleDir, schemaPath().toString())
101137

102138
then:
103139
errors.any { it.contains('meta.yml') }
@@ -109,13 +145,13 @@ class CmdModuleValidateTest extends Specification {
109145
Files.delete(moduleDir.resolve('README.md'))
110146

111147
when:
112-
def errors = ModuleValidator.validate(moduleDir)
148+
def errors = ModuleValidator.validate(moduleDir, schemaPath().toString())
113149

114150
then:
115151
errors.any { it.contains('README.md') }
116152
}
117153

118-
def 'should fail when meta.yml has missing required fields'() {
154+
def 'should fail when meta.yml is missing schema-required fields'() {
119155
given:
120156
def moduleDir = createValidModule()
121157
moduleDir.resolve('meta.yml').text = '''\
@@ -124,10 +160,31 @@ class CmdModuleValidateTest extends Specification {
124160
'''.stripIndent()
125161

126162
when:
127-
def errors = ModuleValidator.validate(moduleDir)
163+
def errors = ModuleValidator.validate(moduleDir, schemaPath().toString())
128164

129165
then:
166+
// schema-level validation runs first; reports missing required `description`
130167
errors.any { it.contains('description') }
168+
}
169+
170+
def 'should fail when meta.yml is missing nextflow-only fields'() {
171+
given:
172+
def moduleDir = createValidModule()
173+
moduleDir.resolve('meta.yml').text = '''\
174+
name: myorg/hello
175+
description: A test module
176+
input:
177+
- name: greeting
178+
type: string
179+
description: A greeting string
180+
'''.stripIndent()
181+
182+
when:
183+
def errors = ModuleValidator.validate(moduleDir, schemaPath().toString())
184+
185+
then:
186+
// schema passes, then ModuleSpec.validate() reports missing version + license
187+
errors.any { it.contains('version') }
131188
errors.any { it.contains('license') }
132189
}
133190

@@ -142,7 +199,7 @@ class CmdModuleValidateTest extends Specification {
142199
'''.stripIndent()
143200

144201
when:
145-
def errors = ModuleValidator.validate(moduleDir)
202+
def errors = ModuleValidator.validate(moduleDir, schemaPath().toString())
146203

147204
then:
148205
errors.any { it.contains('version') }

0 commit comments

Comments
 (0)