Push UserToolSource semantic validation onto the pydantic model

mvdbeek · mvdbeek · commit 713d34b4d507 · 2026-05-03T07:09:54.000+02:00
Move the id pattern, container shape, citation DOI/BibTeX checks, blank
required-field checks, undeclared `inputs.&lt;name&gt;` references in
`shell_command` / `configfiles[*].content`, and dataset/collection output
discovery requirements onto field/model validators on
`_DynamicToolSourceBase`, `UserToolSource`, and `Citation`. Doing so
benefits API callers of `DynamicUnprivilegedToolCreatePayload`, not just
`CustomToolAgent`.

Tool ids accept `[a-z][a-z0-9_-]*` (hyphens allowed). Outputs of a tool
with `shell_command` must claim their bytes via `from_work_dir` or
`discover_datasets`; the prior "name appears in command" heuristic
produced both false positives and false negatives.

`format_validation_errors` distills `ValidationError` to friendly bullet
strings for reuse by the agent and API layers. `CustomToolAgent` walks
the cause chain on `UnexpectedModelBehavior` to surface the underlying
`ValidationError` as a low-confidence response with bullet list, and
logs at debug since validation failure is the expected path while a
user is editing.
diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts
@@ -24919,7 +24919,7 @@ export interface components {
             help?: components["schemas"]["HelpContent"] | null;
             /**
              * id
-             * @description Unique identifier for the tool. Should be all lower-case and should not include whitespace.
+             * @description Unique identifier for the tool. Lowercase, must start with a letter, may contain letters, digits, '_' and '-'.
              * @example my-cool-tool
              */
             id?: string | null;
@@ -25018,7 +25018,7 @@ export interface components {
             help?: components["schemas"]["HelpContent"] | null;
             /**
              * id
-             * @description Unique identifier for the tool. Should be all lower-case and should not include whitespace.
+             * @description Unique identifier for the tool. Lowercase, must start with a letter, may contain letters, digits, '_' and '-'.
              * @example my-cool-tool
              */
             id?: string | null;
@@ -26725,7 +26725,7 @@ export interface components {
             help?: components["schemas"]["HelpContent"] | null;
             /**
              * id
-             * @description Unique identifier for the tool. Should be all lower-case and should not include whitespace.
+             * @description Unique identifier for the tool. Lowercase, must start with a letter, may contain letters, digits, '_' and '-'.
              * @example my-cool-tool
              */
             id?: string | null;
diff --git a/client/src/components/Tool/ToolSourceSchema.json b/client/src/components/Tool/ToolSourceSchema.json
diff --git a/lib/galaxy/agents/custom_tool.py b/lib/galaxy/agents/custom_tool.py
@@ -10,14 +10,18 @@
 )
 
 import yaml
+from pydantic import ValidationError
 from pydantic_ai import Agent
 from pydantic_ai.exceptions import (
     ModelHTTPError,
     UnexpectedModelBehavior,
 )
 
 from galaxy.schema.agents import ConfidenceLevel
-from galaxy.tool_util_models import UserToolSource
+from galaxy.tool_util_models import (
+    format_validation_errors,
+    UserToolSource,
+)
 from .base import (
     ActionSuggestion,
     ActionType,
@@ -32,6 +36,22 @@
 log = logging.getLogger(__name__)
 
 
+def _find_validation_error(exc: BaseException) -> Optional[ValidationError]:
+    """Walk the exception cause chain looking for a pydantic ValidationError.
+
+    pydantic-ai wraps validation failures inside UnexpectedModelBehavior after
+    exhausting retries; the original ValidationError surfaces via __cause__.
+    """
+    seen: set[int] = set()
+    current: Optional[BaseException] = exc
+    while current is not None and id(current) not in seen:
+        seen.add(id(current))
+        if isinstance(current, ValidationError):
+            return current
+        current = current.__cause__ or current.__context__
+    return None
+
+
 class CustomToolAgent(BaseGalaxyAgent):
     """Agent that creates custom Galaxy tools using UserToolSource schema.
 
@@ -178,6 +198,27 @@ async def process(self, query: str, context: Optional[dict[str, Any]] = None) ->
                 )
             raise
         except UnexpectedModelBehavior as e:
+            pydantic_error = _find_validation_error(e)
+            if pydantic_error is not None:
+                # Expected path: the user is editing the prompt iteratively and
+                # the LLM produced a tool definition that fails one of the
+                # UserToolSource validators. Surface the friendly bullet list
+                # rather than a generic "model misbehaved" message.
+                bullets = format_validation_errors(pydantic_error)
+                log.debug("CustomToolAgent validation failure: %s", bullets)
+                bullet_text = "\n".join(f"- {issue}" for issue in bullets)
+                return self._build_response(
+                    content=(
+                        "The model produced a tool definition, but it has problems "
+                        "that need to be fixed before it can be saved:\n\n"
+                        f"{bullet_text}"
+                    ),
+                    confidence=ConfidenceLevel.LOW,
+                    method="validation_error",
+                    query=query,
+                    error="validation_failed",
+                    agent_data={"validation_errors": bullets},
+                )
             log.warning(f"Model failed to produce valid tool definition: {e}")
             model = self._get_agent_config("model", "unknown")
             return self._build_response(
diff --git a/lib/galaxy/tool_util_models/__init__.py b/lib/galaxy/tool_util_models/__init__.py
@@ -4,11 +4,13 @@
 for reasoning about tool state externally from Galaxy.
 """
 
+import re
 from typing import (
     Any,
     Dict,
     List,
     Optional,
+    Set,
     Tuple,
     Union,
 )
@@ -19,6 +21,7 @@
     ConfigDict,
     Discriminator,
     Field,
+    field_validator,
     model_validator,
     RootModel,
     Tag,
@@ -41,6 +44,8 @@
 from .test_job import Job
 from .tool_outputs import (
     IncomingToolOutput,
+    IncomingToolOutputCollection,
+    IncomingToolOutputDataset,
     ToolOutput,
 )
 from .tool_source import (
@@ -64,6 +69,54 @@ def normalize_dict(values, keys: List[str]):
             values[key] = [{"name": k, **v} for k, v in items.items()]
 
 
+# Tool ID: lowercase, leading letter, letters/digits/'_'/'-'.
+_TOOL_ID_RE = re.compile(r"^[a-z][a-z0-9_-]*$")
+TOOL_ID_PATTERN = r"^[a-z][a-z0-9_-]*$"
+
+# Recognized container reference shapes. Kept module-level so future
+# admin-configurable code can extend them in place. Docker-Hub-style image
+# references allow optional registry path segments and an optional tag.
+CONTAINER_PREFIXES: Tuple[str, ...] = ("quay.io/biocontainers/", "docker://", "oras://")
+DOCKER_IMAGE_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*(/[a-zA-Z0-9._-]+)*(:[\w][\w.-]*)?$")
+
+# Templated ecmascript inside `shell_command` / `configfiles[*].content`.
+# Pull every $(<expr>) and extract the *leading* 'inputs.<name>' identifier.
+# Only the top-level name is checked, so nested references like
+# 'inputs.cond.test_parameter' or 'inputs.repeat[0].x' resolve against
+# the conditional / repeat / section's own top-level name -- no false
+# positives for nested structures. Computed/aliased references (e.g.
+# 'var x = inputs; x.foo') are intentionally not parsed; the goal is
+# catching obvious typos cheaply, not modelling ecmascript scope.
+_TEMPLATE_BLOCK_RE = re.compile(r"\$\((.*?)\)", re.DOTALL)
+_INPUTS_REF_RE = re.compile(r"\binputs\.([A-Za-z_][A-Za-z0-9_]*)")
+
+
+def _command_input_refs(text: Optional[str]) -> Set[str]:
+    refs: Set[str] = set()
+    if not text:
+        return refs
+    for block in _TEMPLATE_BLOCK_RE.findall(text):
+        for match in _INPUTS_REF_RE.findall(block):
+            refs.add(match)
+    return refs
+
+
+def format_validation_errors(exc: ValidationError) -> List[str]:
+    """Distill a pydantic ValidationError into a human-readable list.
+
+    Each entry is `<dotted.location>: <message>`, or just `<message>` for
+    model-level errors with no location. Suitable for surfacing directly to
+    a user (in the agent's bullet list, or as an API 4xx body).
+    """
+    lines: List[str] = []
+    for err in exc.errors():
+        loc_parts = [str(p) for p in err.get("loc", ()) if p not in ("__root__",)]
+        loc = ".".join(loc_parts)
+        msg = err.get("msg", "validation error")
+        lines.append(f"{loc}: {msg}" if loc else msg)
+    return lines
+
+
 class _DynamicToolSourceBase(ToolSourceBaseModel):
     # extra="forbid" rejects unknown top-level keys (e.g. a stray `argument:` at
     # the tool level), matching the strict-narrow stance on `inputs`.
@@ -75,17 +128,22 @@ class _DynamicToolSourceBase(ToolSourceBaseModel):
     id: Annotated[
         Optional[str],
         Field(
-            description="Unique identifier for the tool. Should be all lower-case and should not include whitespace.",
+            description=(
+                "Unique identifier for the tool. Lowercase, must start with a letter, "
+                "may contain letters, digits, '_' and '-'."
+            ),
             examples=["my-cool-tool"],
             min_length=3,
             max_length=255,
+            pattern=TOOL_ID_PATTERN,
         ),
     ] = None
     version: Annotated[Optional[str], Field(description="Version for the tool.", examples=["0.1.0"])] = None
     name: Annotated[
         str,
         Field(
-            description="The name of the tool, displayed in the tool menu. This is not the same as the tool id, which is a unique identifier for the tool."
+            description="The name of the tool, displayed in the tool menu. This is not the same as the tool id, which is a unique identifier for the tool.",
+            min_length=5,
         ),
     ]
     description: Annotated[
@@ -135,13 +193,65 @@ def normalize_items(cls, values):
             normalize_dict(values, ["inputs", "outputs"])
         return values
 
+    @field_validator("name", "version", mode="after")
+    @classmethod
+    def _reject_blank_strings(cls, v: Optional[str]) -> Optional[str]:
+        if v is not None and not v.strip():
+            raise ValueError("must not be empty or whitespace")
+        return v
+
+    @model_validator(mode="after")
+    def _check_input_refs_and_outputs(self) -> "_DynamicToolSourceBase":
+        declared_inputs: Set[str] = {param.root.name for param in self.inputs}
+
+        referenced: Set[str] = _command_input_refs(self.shell_command)
+        for configfile in self.configfiles or []:
+            referenced |= _command_input_refs(configfile.content)
+
+        errors: List[str] = []
+        for name in sorted(referenced - declared_inputs):
+            errors.append(f"references inputs.{name} but no input named '{name}' is declared")
+
+        for output in self.outputs:
+            if isinstance(output, IncomingToolOutputDataset):
+                if not output.from_work_dir and not output.discover_datasets:
+                    errors.append(
+                        f"output '{output.name}' must set 'from_work_dir' or 'discover_datasets' "
+                        "(otherwise its bytes will never be claimed from the working directory)"
+                    )
+            elif isinstance(output, IncomingToolOutputCollection):
+                if not output.structure.discover_datasets:
+                    errors.append(
+                        f"output collection '{output.name}' must set 'structure.discover_datasets' "
+                        "(otherwise no elements will be claimed from the working directory)"
+                    )
+
+        if errors:
+            raise ValueError("; ".join(errors))
+        return self
+
 
 class UserToolSource(_DynamicToolSourceBase):
     class_: Annotated[Literal["GalaxyUserTool"], Field(alias="class")]
     container: Annotated[
         str, Field(description="Container image to use for this tool.", examples=["quay.io/biocontainers/python:3.13"])
     ]
 
+    @field_validator("container", mode="after")
+    @classmethod
+    def _check_container_shape(cls, value: str) -> str:
+        if not value or not value.strip():
+            raise ValueError("container must not be empty")
+        stripped = value.strip()
+        if stripped.startswith(CONTAINER_PREFIXES):
+            return value
+        if DOCKER_IMAGE_RE.match(stripped):
+            return value
+        raise ValueError(
+            f"container '{value}' does not match a recognized shape "
+            "(quay.io/biocontainers/..., docker://..., oras://..., or <image>[:<tag>])"
+        )
+
 
 class YamlToolSource(_DynamicToolSourceBase):
     class_: Annotated[Literal["GalaxyTool"], Field(alias="class")]
diff --git a/lib/galaxy/tool_util_models/tool_source.py b/lib/galaxy/tool_util_models/tool_source.py
@@ -1,3 +1,4 @@
+import re
 from enum import Enum
 from typing import (
     List,
@@ -8,6 +9,7 @@
 from pydantic import (
     ConfigDict,
     Field,
+    model_validator,
     with_config,
 )
 from typing_extensions import (
@@ -142,10 +144,37 @@ class YamlTemplateConfigFile(TemplateConfigFile):
     eval_engine: Literal["ecmascript"] = "ecmascript"
 
 
+# DOI: '10.<registrant>/<suffix>' per Crossref's published shape.
+DOI_RE = re.compile(r"^10\.\d{4,9}/.+$")
+# BibTeX entries open with '@<type>{' -- e.g. '@article{', '@inproceedings{'.
+BIBTEX_RE = re.compile(r"^@[a-zA-Z]+\s*\{", re.MULTILINE)
+
+
 class Citation(ToolSourceBaseModel):
     type: str
     content: str
 
+    @model_validator(mode="after")
+    def _check_citation_shape(self) -> "Citation":
+        content = (self.content or "").strip()
+        if not content:
+            raise ValueError("citation content must not be empty")
+        citation_type = (self.type or "").strip().lower()
+        if citation_type == "doi":
+            if not DOI_RE.match(content):
+                raise ValueError(f"declared as DOI but '{content}' does not match DOI shape (^10\\.\\d{{4,9}}/.+$)")
+            return self
+        if citation_type == "bibtex":
+            if not BIBTEX_RE.search(content):
+                raise ValueError("declared as bibtex but content does not start with '@<type>{'")
+            return self
+        # Type wasn't explicitly doi/bibtex -- accept if the content shape is
+        # one of the two known forms. Lets a slightly mis-typed entry through
+        # instead of fighting models that emit type='reference' or similar.
+        if DOI_RE.match(content) or BIBTEX_RE.search(content):
+            return self
+        raise ValueError(f"citation (type={self.type!r}) is neither a recognizable DOI nor a BibTeX entry")
+
 
 class HelpContent(ToolSourceBaseModel):
     format: Literal["restructuredtext", "plain_text", "markdown"]
diff --git a/test/unit/tool_util/test_user_tool_source_validation.py b/test/unit/tool_util/test_user_tool_source_validation.py