Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions client/src/api/schema/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24816,7 +24816,7 @@ export interface components {
help?: components["schemas"]["HelpContent"] | null;
/**
* id
* @description Unique identifier for the tool. Should be all lower-case and should not include whitespace.
* @description Unique identifier for the tool. Lowercase, must start with a letter, may contain letters, digits, '_' and '-'.
* @example my-cool-tool
*/
id?: string | null;
Expand Down Expand Up @@ -24915,7 +24915,7 @@ export interface components {
help?: components["schemas"]["HelpContent"] | null;
/**
* id
* @description Unique identifier for the tool. Should be all lower-case and should not include whitespace.
* @description Unique identifier for the tool. Lowercase, must start with a letter, may contain letters, digits, '_' and '-'.
* @example my-cool-tool
*/
id?: string | null;
Expand Down Expand Up @@ -26622,7 +26622,7 @@ export interface components {
help?: components["schemas"]["HelpContent"] | null;
/**
* id
* @description Unique identifier for the tool. Should be all lower-case and should not include whitespace.
* @description Unique identifier for the tool. Lowercase, must start with a letter, may contain letters, digits, '_' and '-'.
* @example my-cool-tool
*/
id?: string | null;
Expand Down
2 changes: 1 addition & 1 deletion client/src/components/Tool/ToolSourceSchema.json

Large diffs are not rendered by default.

43 changes: 42 additions & 1 deletion lib/galaxy/agents/custom_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,18 @@
)

import yaml
from pydantic import ValidationError
from pydantic_ai import Agent
from pydantic_ai.exceptions import (
ModelHTTPError,
UnexpectedModelBehavior,
)

from galaxy.schema.agents import ConfidenceLevel
from galaxy.tool_util_models import UserToolSource
from galaxy.tool_util_models import (
format_validation_errors,
UserToolSource,
)
from .base import (
ActionSuggestion,
ActionType,
Expand All @@ -32,6 +36,22 @@
log = logging.getLogger(__name__)


def _find_validation_error(exc: BaseException) -> Optional[ValidationError]:
"""Walk the exception cause chain looking for a pydantic ValidationError.

pydantic-ai wraps validation failures inside UnexpectedModelBehavior after
exhausting retries; the original ValidationError surfaces via __cause__.
"""
seen: set[int] = set()
current: Optional[BaseException] = exc
while current is not None and id(current) not in seen:
seen.add(id(current))
if isinstance(current, ValidationError):
return current
current = current.__cause__ or current.__context__
return None


class CustomToolAgent(BaseGalaxyAgent):
"""Agent that creates custom Galaxy tools using UserToolSource schema.

Expand Down Expand Up @@ -178,6 +198,27 @@ async def process(self, query: str, context: Optional[dict[str, Any]] = None) ->
)
raise
except UnexpectedModelBehavior as e:
pydantic_error = _find_validation_error(e)
if pydantic_error is not None:
# Expected path: the user is editing the prompt iteratively and
# the LLM produced a tool definition that fails one of the
# UserToolSource validators. Surface the friendly bullet list
# rather than a generic "model misbehaved" message.
bullets = format_validation_errors(pydantic_error)
log.debug("CustomToolAgent validation failure: %s", bullets)
bullet_text = "\n".join(f"- {issue}" for issue in bullets)
return self._build_response(
content=(
"The model produced a tool definition, but it has problems "
"that need to be fixed before it can be saved:\n\n"
f"{bullet_text}"
),
confidence=ConfidenceLevel.LOW,
method="validation_error",
query=query,
error="validation_failed",
agent_data={"validation_errors": bullets},
)
log.warning(f"Model failed to produce valid tool definition: {e}")
model = self._get_agent_config("model", "unknown")
return self._build_response(
Expand Down
116 changes: 114 additions & 2 deletions lib/galaxy/tool_util_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
for reasoning about tool state externally from Galaxy.
"""

import re
from typing import (
Any,
Dict,
List,
Optional,
Set,
Tuple,
Union,
)

Expand All @@ -18,9 +21,11 @@
ConfigDict,
Discriminator,
Field,
field_validator,
model_validator,
RootModel,
Tag,
ValidationError,
)
from typing_extensions import (
Annotated,
Expand All @@ -39,6 +44,8 @@
from .test_job import Job
from .tool_outputs import (
IncomingToolOutput,
IncomingToolOutputCollection,
IncomingToolOutputDataset,
ToolOutput,
)
from .tool_source import (
Expand All @@ -62,6 +69,54 @@ def normalize_dict(values, keys: List[str]):
values[key] = [{"name": k, **v} for k, v in items.items()]


# Tool ID: lowercase, leading letter, letters/digits/'_'/'-'.
_TOOL_ID_RE = re.compile(r"^[a-z][a-z0-9_-]*$")
TOOL_ID_PATTERN = r"^[a-z][a-z0-9_-]*$"

# Recognized container reference shapes. Kept module-level so future
# admin-configurable code can extend them in place. Docker-Hub-style image
# references allow optional registry path segments and an optional tag.
CONTAINER_PREFIXES: Tuple[str, ...] = ("quay.io/biocontainers/", "docker://", "oras://")
DOCKER_IMAGE_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*(/[a-zA-Z0-9._-]+)*(:[\w][\w.-]*)?$")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a linter concern not a tool source validation concern. Really -.9 on this being here. This isn't a YAML tool concern it is an abstract ToolSource concern. Create a lint rule and require it to be met for Galaxy User Defined tools behind some flag that defaults to that behavior.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3c97502 any better ?

# Templated ecmascript inside `shell_command` / `configfiles[*].content`.
# Pull every $(<expr>) and extract the *leading* 'inputs.<name>' identifier.
# Only the top-level name is checked, so nested references like
# 'inputs.cond.test_parameter' or 'inputs.repeat[0].x' resolve against
# the conditional / repeat / section's own top-level name -- no false
# positives for nested structures. Computed/aliased references (e.g.
# 'var x = inputs; x.foo') are intentionally not parsed; the goal is
# catching obvious typos cheaply, not modelling ecmascript scope.
_TEMPLATE_BLOCK_RE = re.compile(r"\$\((.*?)\)", re.DOTALL)
_INPUTS_REF_RE = re.compile(r"\binputs\.([A-Za-z_][A-Za-z0-9_]*)")


def _command_input_refs(text: Optional[str]) -> Set[str]:
refs: Set[str] = set()
if not text:
return refs
for block in _TEMPLATE_BLOCK_RE.findall(text):
for match in _INPUTS_REF_RE.findall(block):
refs.add(match)
return refs


def format_validation_errors(exc: ValidationError) -> List[str]:
"""Distill a pydantic ValidationError into a human-readable list.

Each entry is `<dotted.location>: <message>`, or just `<message>` for
model-level errors with no location. Suitable for surfacing directly to
a user (in the agent's bullet list, or as an API 4xx body).
"""
lines: List[str] = []
for err in exc.errors():
loc_parts = [str(p) for p in err.get("loc", ()) if p not in ("__root__",)]
loc = ".".join(loc_parts)
msg = err.get("msg", "validation error")
lines.append(f"{loc}: {msg}" if loc else msg)
return lines


class _DynamicToolSourceBase(ToolSourceBaseModel):
# extra="forbid" rejects unknown top-level keys (e.g. a stray `argument:` at
# the tool level), matching the strict-narrow stance on `inputs`.
Expand All @@ -73,17 +128,22 @@ class _DynamicToolSourceBase(ToolSourceBaseModel):
id: Annotated[
Optional[str],
Field(
description="Unique identifier for the tool. Should be all lower-case and should not include whitespace.",
description=(
"Unique identifier for the tool. Lowercase, must start with a letter, "
"may contain letters, digits, '_' and '-'."
),
examples=["my-cool-tool"],
min_length=3,
max_length=255,
pattern=TOOL_ID_PATTERN,
),
] = None
version: Annotated[Optional[str], Field(description="Version for the tool.", examples=["0.1.0"])] = None
name: Annotated[
str,
Field(
description="The name of the tool, displayed in the tool menu. This is not the same as the tool id, which is a unique identifier for the tool."
description="The name of the tool, displayed in the tool menu. This is not the same as the tool id, which is a unique identifier for the tool.",
min_length=5,
),
]
description: Annotated[
Expand Down Expand Up @@ -133,13 +193,65 @@ def normalize_items(cls, values):
normalize_dict(values, ["inputs", "outputs"])
return values

@field_validator("name", "version", mode="after")
@classmethod
def _reject_blank_strings(cls, v: Optional[str]) -> Optional[str]:
if v is not None and not v.strip():
raise ValueError("must not be empty or whitespace")
return v

@model_validator(mode="after")
def _check_input_refs_and_outputs(self) -> "_DynamicToolSourceBase":
declared_inputs: Set[str] = {param.root.name for param in self.inputs}

referenced: Set[str] = _command_input_refs(self.shell_command)
for configfile in self.configfiles or []:
referenced |= _command_input_refs(configfile.content)

errors: List[str] = []
for name in sorted(referenced - declared_inputs):
errors.append(f"references inputs.{name} but no input named '{name}' is declared")

for output in self.outputs:
if isinstance(output, IncomingToolOutputDataset):
if not output.from_work_dir and not output.discover_datasets:
errors.append(
f"output '{output.name}' must set 'from_work_dir' or 'discover_datasets' "
"(otherwise its bytes will never be claimed from the working directory)"
)
elif isinstance(output, IncomingToolOutputCollection):
if not output.structure.discover_datasets:
errors.append(
f"output collection '{output.name}' must set 'structure.discover_datasets' "
"(otherwise no elements will be claimed from the working directory)"
)

if errors:
raise ValueError("; ".join(errors))
return self


class UserToolSource(_DynamicToolSourceBase):
class_: Annotated[Literal["GalaxyUserTool"], Field(alias="class")]
container: Annotated[
str, Field(description="Container image to use for this tool.", examples=["quay.io/biocontainers/python:3.13"])
]

@field_validator("container", mode="after")
@classmethod
def _check_container_shape(cls, value: str) -> str:
if not value or not value.strip():
raise ValueError("container must not be empty")
stripped = value.strip()
if stripped.startswith(CONTAINER_PREFIXES):
return value
if DOCKER_IMAGE_RE.match(stripped):
return value
raise ValueError(
f"container '{value}' does not match a recognized shape "
"(quay.io/biocontainers/..., docker://..., oras://..., or <image>[:<tag>])"
)


class YamlToolSource(_DynamicToolSourceBase):
class_: Annotated[Literal["GalaxyTool"], Field(alias="class")]
Expand Down
29 changes: 29 additions & 0 deletions lib/galaxy/tool_util_models/tool_source.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from enum import Enum
from typing import (
List,
Expand All @@ -8,6 +9,7 @@
from pydantic import (
ConfigDict,
Field,
model_validator,
with_config,
)
from typing_extensions import (
Expand Down Expand Up @@ -142,10 +144,37 @@ class YamlTemplateConfigFile(TemplateConfigFile):
eval_engine: Literal["ecmascript"] = "ecmascript"


# DOI: '10.<registrant>/<suffix>' per Crossref's published shape.
DOI_RE = re.compile(r"^10\.\d{4,9}/.+$")
# BibTeX entries open with '@<type>{' -- e.g. '@article{', '@inproceedings{'.
BIBTEX_RE = re.compile(r"^@[a-zA-Z]+\s*\{", re.MULTILINE)


class Citation(ToolSourceBaseModel):
type: str
content: str

@model_validator(mode="after")
def _check_citation_shape(self) -> "Citation":
content = (self.content or "").strip()
if not content:
raise ValueError("citation content must not be empty")
citation_type = (self.type or "").strip().lower()
if citation_type == "doi":
if not DOI_RE.match(content):
raise ValueError(f"declared as DOI but '{content}' does not match DOI shape (^10\\.\\d{{4,9}}/.+$)")
return self
if citation_type == "bibtex":
if not BIBTEX_RE.search(content):
raise ValueError("declared as bibtex but content does not start with '@<type>{'")
return self
# Type wasn't explicitly doi/bibtex -- accept if the content shape is
# one of the two known forms. Lets a slightly mis-typed entry through
# instead of fighting models that emit type='reference' or similar.
if DOI_RE.match(content) or BIBTEX_RE.search(content):
return self
raise ValueError(f"citation (type={self.type!r}) is neither a recognizable DOI nor a BibTeX entry")


class HelpContent(ToolSourceBaseModel):
format: Literal["restructuredtext", "plain_text", "markdown"]
Expand Down
Loading
Loading