Skip to content

Commit d67adb6

Browse files
authored
Merge pull request #22507 from jmchilton/yaml_schema_harden
Narrow YAML schema
2 parents 46ec5e0 + bc7080c commit d67adb6

22 files changed

Lines changed: 5319 additions & 635 deletions

File tree

client/src/api/schema/schema.ts

Lines changed: 4183 additions & 531 deletions
Large diffs are not rendered by default.

client/src/components/Panels/UserToolPanel.vue

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ function getToolBadges(tool: UnprivilegedToolResponse) {
8181
return [
8282
{
8383
id: "version",
84-
label: tool.representation.version,
84+
label: tool.representation.version ?? "",
8585
title: "Version of this custom tool",
8686
},
8787
];

client/src/components/Tool/ToolSourceSchema.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

doc/source/admin/user_defined_tools.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,66 @@ See https://training.galaxyproject.org/training-material/topics/admin/tutorials/
9393
and see https://github.com/galaxyproject/galaxy/blob/dev/test/integration/embedded_pulsar_job_conf.yml#L29 for a simple example that uses embedded pulsar to isolate mounts and disables network access.
9494
While the feature is in beta we recommend that only trusted users are allowed to use this feature.
9595

96+
## Supported input parameter types
97+
98+
The YAML authoring layer exposes a deliberately narrow subset of Galaxy's
99+
parameter model. The JSON schema published to the editor (`ToolSourceSchema.json`)
100+
is generated from this subset and rejects any unknown fields via
101+
`extra="forbid"`, so unsupported attributes fail fast at parse time.
102+
103+
Supported leaf parameter types:
104+
105+
| Type | Supported fields (beyond `name`, `label`, `help`, `optional`) |
106+
| ----------------- | ---------------------------------------------------------------------- |
107+
| `boolean` | `value` |
108+
| `integer` | `value`, `min`, `max`, `validators` (`in_range` only) |
109+
| `float` | `value`, `min`, `max`, `validators` (`in_range` only) |
110+
| `text` | `value`, `area`, `validators` (`length`, `regex`, `empty_field`) |
111+
| `select` | `options` (static, non-empty), `multiple`, `validators` (`no_options`) |
112+
| `color` | `value` |
113+
| `data` | `format`, `multiple`, `min`, `max` |
114+
| `data_collection` | `collection_type`, `format` |
115+
116+
Supported structural groups: `conditional`, `repeat`, `section`. These recurse
117+
into the supported leaf set.
118+
119+
Example using several supported types:
120+
121+
```yaml
122+
class: GalaxyUserTool
123+
id: example_tool
124+
version: "0.1"
125+
name: Example
126+
container: busybox
127+
shell_command: echo "$(inputs.greeting) $(inputs.count)"
128+
inputs:
129+
- name: greeting
130+
type: select
131+
options:
132+
- { label: Hi, value: hi, selected: true }
133+
- { label: Hello, value: hello, selected: false }
134+
- name: count
135+
type: integer
136+
value: 1
137+
min: 1
138+
max: 10
139+
- name: extras
140+
type: repeat
141+
min: 0
142+
parameters:
143+
- name: input_file
144+
type: data
145+
format: txt
146+
outputs: []
147+
```
148+
149+
Types that exist in the XML tool vocabulary but are **not** supported in YAML
150+
user-defined tools and will be rejected at parse time: `hidden`, `drill_down`,
151+
`data_column`, `genomebuild`, `group_tag`, `baseurl`, `rules`, `directory`.
152+
XML-only fields such as `truevalue`, `falsevalue`, `argument`, `is_dynamic`,
153+
`hidden` (as a field), and `parameter_type` are likewise rejected on any
154+
parameter.
155+
96156
## Limitations
97157

98158
The user-defined tool language is still evolving, and additional safety audits are ongoing.

lib/galaxy/tool_util/parameters/convert.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Dict,
1010
List,
1111
Optional,
12+
Sequence,
1213
)
1314

1415
from galaxy.tool_util_models.parameters import (
@@ -537,12 +538,58 @@ def decode_callback(parameter: ToolParameterT, value: Any):
537538
CollectionToRuntimeJson = Callable[[DataCollectionRequestInternal, Optional[str]], Any]
538539

539540

541+
# Parameter models the narrow YAML authoring layer is allowed to produce.
542+
# Mirrors the v1 supported set in `lib/galaxy/tool_util_models/yaml_parameters.py`.
543+
# Anything outside this set in a YAML-origin tool indicates a bug in the
544+
# authoring → internal mapping and should hard-fail before we try to build
545+
# runtime state.
546+
YAML_V1_SUPPORTED_PARAMETER_MODELS: frozenset = frozenset(
547+
{
548+
BooleanParameterModel,
549+
IntegerParameterModel,
550+
FloatParameterModel,
551+
TextParameterModel,
552+
SelectParameterModel,
553+
DataParameterModel,
554+
DataCollectionParameterModel,
555+
ConditionalParameterModel,
556+
RepeatParameterModel,
557+
SectionParameterModel,
558+
}
559+
)
560+
561+
562+
def assert_yaml_v1_parameters(parameters: Sequence[ToolParameterT]) -> None:
563+
"""Raise if any parameter (including nested ones) is outside the YAML v1 set.
564+
565+
Defense-in-depth: the narrow ``YamlGalaxyToolParameter`` cannot produce a
566+
disallowed type, so this guard should never fire for a legitimately
567+
constructed YAML tool. It exists to surface mapping bugs instead of letting
568+
an unsupported type silently pass through ``runtimeify``'s default-case
569+
``VISITOR_NO_REPLACEMENT``.
570+
"""
571+
for parameter in parameters:
572+
if type(parameter) not in YAML_V1_SUPPORTED_PARAMETER_MODELS:
573+
raise AssertionError(
574+
f"YAML-origin tool produced unsupported parameter type "
575+
f"{type(parameter).__name__} for parameter {getattr(parameter, 'name', '?')!r}"
576+
)
577+
if isinstance(parameter, ConditionalParameterModel):
578+
for when in parameter.whens:
579+
assert_yaml_v1_parameters(when.parameters)
580+
elif isinstance(parameter, (RepeatParameterModel, SectionParameterModel)):
581+
assert_yaml_v1_parameters(parameter.parameters)
582+
583+
540584
def runtimeify(
541585
internal_state: JobInternalToolState,
542586
input_models: ToolParameterBundle,
543587
adapt_dataset: DatasetToRuntimeJson,
544588
adapt_collection: CollectionToRuntimeJson,
589+
yaml_origin: bool = False,
545590
) -> JobRuntimeToolState:
591+
if yaml_origin:
592+
assert_yaml_v1_parameters(list(input_models.parameters))
546593

547594
def adapt_dict(value: dict):
548595
assert isinstance(value, dict), str(value)

lib/galaxy/tool_util/parser/interface.py

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
)
2727

2828
from galaxy.tool_util_models.parameter_validators import AnyValidatorModel
29+
from galaxy.tool_util_models.testing_types import (
30+
AssertionList,
31+
DirectCredential,
32+
)
2933
from galaxy.tool_util_models.tool_source import (
3034
BaseJsonTestCollectionDefCollectionElementDict,
3135
Citation,
@@ -66,13 +70,6 @@
6670
INPUT_CLASS_T = Literal["galaxy", "cwl"]
6771

6872

69-
class AssertionDict(TypedDict):
70-
tag: str
71-
attributes: Dict[str, Any]
72-
children: "AssertionList"
73-
74-
75-
AssertionList = Optional[List[AssertionDict]]
7673
XmlInt = Union[str, int]
7774

7875

@@ -117,25 +114,6 @@ class ToolSourceTestInput(TypedDict):
117114
attributes: ToolSourceTestInputAttributes
118115

119116

120-
class DirectCredentialValue(TypedDict):
121-
"""Represents a credential value (variable or secret) provided directly."""
122-
123-
name: str
124-
value: str
125-
126-
127-
class _DirectCredentialRequired(TypedDict):
128-
name: str # Name of the credentials group
129-
variables: List[DirectCredentialValue]
130-
secrets: List[DirectCredentialValue]
131-
132-
133-
class DirectCredential(_DirectCredentialRequired, total=False):
134-
"""Represents a credential group with variables and secrets provided directly."""
135-
136-
version: str # Version of the credential definition (defaults to "1.0")
137-
138-
139117
ToolSourceTestInputs = List[ToolSourceTestInput]
140118
ToolSourceTestOutputs = List[ToolSourceTestOutput]
141119
TestSourceTestOutputColllection = Any

lib/galaxy/tool_util/parser/xml.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
DEFAULT_SORT,
3232
)
3333
from galaxy.tool_util_models.parameter_validators import AnyValidatorModel
34+
from galaxy.tool_util_models.testing_types import (
35+
AssertionList,
36+
DirectCredential,
37+
)
3438
from galaxy.tool_util_models.tool_source import (
3539
Citation,
3640
DrillDownOptionsDict,
@@ -53,8 +57,6 @@
5357
xml_to_string,
5458
)
5559
from .interface import (
56-
AssertionList,
57-
DirectCredential,
5860
DrillDownDynamicOptions,
5961
DynamicOptions,
6062
InputSource,

lib/galaxy/tool_util/parser/yaml.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
ToolParameterBundleModel,
3333
ToolParameterT,
3434
)
35+
from galaxy.tool_util_models.testing_types import (
36+
AssertionDict,
37+
AssertionList,
38+
DirectCredential,
39+
)
3540
from galaxy.tool_util_models.tool_source import (
3641
HelpContent,
3742
JsonTestCollectionDefDict,
@@ -40,9 +45,6 @@
4045
)
4146
from galaxy.util import listify
4247
from .interface import (
43-
AssertionDict,
44-
AssertionList,
45-
DirectCredential,
4648
InputSource,
4749
PageSource,
4850
PagesSource,
@@ -273,7 +275,7 @@ def parse_tests_to_dict(self) -> ToolSourceTests:
273275
tests: List[ToolSourceTest] = []
274276
rval: ToolSourceTests = dict(tests=tests)
275277

276-
raw_tests = deepcopy(self.root_dict.get("tests", []))
278+
raw_tests = deepcopy(self.root_dict.get("tests") or [])
277279
for i, test_dict in enumerate(raw_tests):
278280
inputs = test_dict.get("inputs", {})
279281
state = TestCaseJsonToolState(inputs)
@@ -530,7 +532,11 @@ def parse_input_type(self):
530532
def parse_extensions(self):
531533
extensions = self.input_dict.get("extensions")
532534
if not extensions:
533-
extensions = self.get("format", "data").split(",")
535+
format_raw = self.get("format", "data")
536+
if isinstance(format_raw, str):
537+
extensions = format_raw.split(",")
538+
else:
539+
extensions = format_raw
534540
return [ext.strip().lower() for ext in extensions]
535541

536542
def parse_nested_inputs_source(self):

lib/galaxy/tool_util/verify/_types.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
)
1616

1717
from galaxy.tool_util.parser.interface import (
18-
AssertionList,
19-
DirectCredential,
2018
TestSourceTestOutputColllection,
2119
ToolSourceTestOutputs,
2220
)
21+
from galaxy.tool_util_models.testing_types import (
22+
AssertionList,
23+
DirectCredential,
24+
)
2325

2426
# legacy inputs for working with POST /api/tools
2527
# + inputs that have been processed with parse.py and expanded out

lib/galaxy/tool_util/verify/interactor.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,17 @@
4646
ToolParameterBundle,
4747
)
4848
from galaxy.tool_util.parser.interface import (
49-
AssertionList,
50-
DirectCredential,
5149
TestCollectionDef,
5250
TestCollectionOutputDef,
5351
TestSourceTestOutputColllection,
5452
ToolSourceTestOutputs,
5553
XmlTestCollectionDefDict,
5654
)
5755
from galaxy.tool_util.verify.test_data import TestDataResolver
56+
from galaxy.tool_util_models.testing_types import (
57+
AssertionList,
58+
DirectCredential,
59+
)
5860
from galaxy.tool_util_models.tool_source import (
5961
JsonTestCollectionDefDict,
6062
JsonTestDatasetDefDict,

0 commit comments

Comments
 (0)