Skip to content

Commit edd2c8f

Browse files
jmchiltonclaude
andcommitted
Add recursive subworkflow linting and output source validation
Recurse into inline subworkflows (run: {class: GalaxyWorkflow}) in lint_format2, matching lint_ga's existing recursion pattern. Add _validate_output_sources to catch dangling outputSource refs pointing to nonexistent steps — fixes the nested_no_steps bug. Add LintContext.child(prefix) for nested error context paths e.g. "[step nested_workflow] Output 'x' references step 'y'..." Closes the xfail test — linter now catches exit code 2. See #162 for expanded lint mode (URL/@import subworkflows). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 78b901f commit edd2c8f

3 files changed

Lines changed: 56 additions & 27 deletions

File tree

gxformat2/lint.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,16 +118,58 @@ def lint_format2(lint_context, workflow_dict, path=None):
118118
lint_context.error("Validation failed " + str(e))
119119

120120
steps = ensure_key_if_present(lint_context, workflow_dict, "steps", default={}, has_class=(dict, list))
121-
steps = steps.values() if isinstance(steps, dict) else steps
122-
for step in steps:
121+
steps_items = list(walk_id_list_or_dict(steps)) if steps else []
122+
for step_key, step in steps_items:
123123
_lint_step_errors(lint_context, step)
124124
_lint_tool_if_present(lint_context, step)
125125

126+
# Recurse into inline subworkflows
127+
run = step.get("run")
128+
if isinstance(run, dict) and run.get("class") == "GalaxyWorkflow":
129+
step_label = step.get("label") or step_key or "?"
130+
child_ctx = lint_context.child(f"step {step_label}")
131+
lint_format2(child_ctx, run, path=path)
132+
133+
_validate_output_sources(lint_context, workflow_dict, steps_items)
126134
_validate_input_types(lint_context, workflow_dict)
127135
_validate_report(lint_context, workflow_dict)
128136
_lint_training(lint_context, workflow_dict)
129137

130138

139+
def _validate_output_sources(lint_context, workflow_dict, steps_items):
140+
"""Check that outputSource references point to existing step/input labels."""
141+
step_labels = set()
142+
for step_key, step in steps_items:
143+
label = step.get("label") or step_key
144+
if label is not None:
145+
step_labels.add(str(label))
146+
147+
# Also include input labels
148+
inputs = workflow_dict.get("inputs")
149+
if inputs:
150+
for input_key, inp in walk_id_list_or_dict(inputs):
151+
label = inp.get("label") or input_key if isinstance(inp, dict) else input_key
152+
if label is not None:
153+
step_labels.add(str(label))
154+
155+
outputs = workflow_dict.get("outputs")
156+
if not outputs:
157+
return
158+
for output_key, output in walk_id_list_or_dict(outputs):
159+
if not isinstance(output, dict):
160+
continue
161+
output_source = output.get("outputSource")
162+
if not output_source or not isinstance(output_source, str):
163+
continue
164+
# Extract step reference — format is "step_label/output_name" or just "step_label"
165+
step_ref = output_source.split("/")[0]
166+
if step_ref not in step_labels:
167+
lint_context.error(
168+
f"Output '{output_key}' references step '{step_ref}' via outputSource "
169+
f"'{output_source}', but no step or input with that label exists"
170+
)
171+
172+
131173
def _validate_input_types(lint_context: LintContext, workflow_dict: dict):
132174
try:
133175
inputs = Inputs(workflow_dict)

gxformat2/linting.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,31 @@
1212
class LintContext:
1313
"""Track running status (state) of linting."""
1414

15-
def __init__(self, level=LEVEL_WARN, training_topic=DEFAULT_TRAINING_LINT):
15+
def __init__(self, level=LEVEL_WARN, training_topic=DEFAULT_TRAINING_LINT, _prefix=""):
1616
"""Create LintContext with specified 'level' (currently unused)."""
1717
self.level = level
1818
self.training_topic = training_topic
1919
self.found_errors = False
2020
self.found_warns = False
21+
self._prefix = _prefix
2122

2223
# self.valid_messages = []
2324
# self.info_messages = []
2425
self.warn_messages = []
2526
self.error_messages = []
2627

28+
def child(self, prefix):
29+
"""Create child context that prefixes messages and shares parent message lists."""
30+
full_prefix = f"{self._prefix}[{prefix}] " if not self._prefix else f"{self._prefix[:-1]} > {prefix}] "
31+
child_ctx = LintContext(level=self.level, training_topic=self.training_topic, _prefix=full_prefix)
32+
child_ctx.warn_messages = self.warn_messages
33+
child_ctx.error_messages = self.error_messages
34+
return child_ctx
35+
2736
def __handle_message(self, message_list, message, *args, **kwds):
2837
if kwds or args:
2938
message = message.format(*args, **kwds)
30-
message_list.append(message)
39+
message_list.append(f"{self._prefix}{message}")
3140

3241
# def valid(self, message, *args, **kwds):
3342
# self.__handle_message(self.valid_messages, message, *args, **kwds)

tests/test_lint.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -172,14 +172,7 @@ def setup_module(module):
172172

173173
invalid_format2_nested = _deep_copy(green_format2_nested)
174174
del invalid_format2_nested["steps"]["nested_workflow"]["run"]["steps"]
175-
# BUG: should be exit code 2 but widening WorkflowStep.run to Any?
176-
# (for URL/import support in d7e5dae) means neither schema-salad nor
177-
# pydantic validates nested subworkflow structure. The subworkflow dict
178-
# passes as an opaque dict instead of being validated as GalaxyWorkflow.
179-
# Normalization also doesn't catch it — the dict stays raw and to_native
180-
# crashes with a label lookup failure. Needs a lint rule that validates
181-
# inline subworkflow dicts independently.
182-
_dump_with_exit_code(invalid_format2_nested, 0, "format2_nested_no_steps")
175+
_dump_with_exit_code(invalid_format2_nested, 2, "format2_nested_no_steps")
183176

184177
invalid_native_nested = _deep_copy(green_native_nested)
185178
del invalid_native_nested["steps"]["2"]["subworkflow"]["steps"]
@@ -265,8 +258,6 @@ def setup_module(module):
265258
# ensure that round tripping all green format2 workflows still lint green.
266259
for file_name in os.listdir(TEST_LINT_EXAMPLES):
267260
if file_name.startswith("0_format2") and "roundtrip" not in file_name:
268-
if "no_steps" in file_name:
269-
continue # tested separately via test_roundtrip_nested_no_steps
270261
roundtrip_contents = round_trip(open(os.path.join(TEST_LINT_EXAMPLES, file_name)).read())
271262
base = os.path.splitext(file_name)[0][len("0_") :]
272263
_dump_with_exit_code(roundtrip_contents, 0, base + "_roundtrip")
@@ -275,19 +266,6 @@ def setup_module(module):
275266
SKIP_BP = "--skip-best-practices"
276267

277268

278-
@pytest.mark.xfail(
279-
reason="WorkflowStep.run widened to Any? for URL/import support — "
280-
"inline subworkflow dicts are not validated as GalaxyWorkflow, "
281-
"so missing steps is undetected. Needs a dedicated lint rule.",
282-
raises=ValueError,
283-
)
284-
def test_roundtrip_nested_no_steps():
285-
"""Round-tripping a nested workflow with deleted steps should fail lint."""
286-
path = os.path.join(TEST_LINT_EXAMPLES, "0_format2_nested_no_steps.yml")
287-
if os.path.exists(path):
288-
round_trip(open(path).read())
289-
290-
291269
def test_lint_ga_basic():
292270
assert main(["lint", SKIP_BP, example_path("real-shed-tools-raw.ga")]) == 1 # no outputs
293271

0 commit comments

Comments
 (0)