Skip to content

Commit 1394d39

Browse files
committed
Raise ToolInputsNotReady for unpopulated structured_like target
When a tool output is declared ``structured_like=<non-mapped collection input>`` and the user maps the tool over an empty collection, param_combinations is empty and ExecutionTracker.example_params falls back to the raw param_template. The non-mapped collection's batch wrapper there is never substituted with an HDCA, so sliced_input_collection_structure crashes with "Referenced input parameter is not a collection." The real trigger observed in production was an upstream collection that had not finished populating yet. Move the representative-params logic onto MappingParameters so the fallback branch resolves batch wrappers (and bare {src,id} refs) to HDCA/DCE ORM objects, and raise ToolInputsNotReadyException when the referenced collection's populated_optimized is False. The scheduler already handles this exception by retrying, matching the behavior __expand_collection_parameter has for mapped-over HDCAs. Also switch sliced_input_collection_structure to derive the collection_type via get_collection(input_collection) so the DCE path stays consistent with the existing get_collection call that follows. Fixes GALAXY-MAIN-4KSCZZZ0015NC.
1 parent eda511a commit 1394d39

5 files changed

Lines changed: 200 additions & 9 deletions

File tree

lib/galaxy/tools/execute.py

Lines changed: 72 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121
from packaging.version import Version
2222

2323
from galaxy import model
24-
from galaxy.exceptions import ToolInputsNotOKException
24+
from galaxy.exceptions import (
25+
ToolInputsNotOKException,
26+
ToolInputsNotReadyException,
27+
)
2528
from galaxy.model import ToolRequest
2629
from galaxy.model.dataset_collections.matching import MatchingCollections
2730
from galaxy.model.dataset_collections.structure import (
@@ -41,6 +44,7 @@
4144
ToolExecutionCache,
4245
)
4346
from galaxy.tools.parameters.workflow_utils import is_runtime_value
47+
from galaxy.work.context import WorkRequestContext
4448
from ._types import (
4549
ToolRequestT,
4650
ToolStateJobInstancePopulatedT,
@@ -87,6 +91,71 @@ def ensure_validated(self):
8791
assert self.validated_param_template is not None
8892
assert self.validated_param_combinations is not None
8993

94+
def example_params(self, trans: WorkRequestContext) -> ToolStateJobInstancePopulatedT:
95+
"""Representative per-job params for output-structure determination.
96+
97+
Normally returns ``param_combinations[0]``. When the request
98+
produces zero jobs (e.g. mapping over an empty collection),
99+
falls back to a resolved copy of ``param_template``: batch
100+
wrappers and raw ``{"src", "id"}`` refs are replaced with
101+
HDCA/DCE ORM objects. Raises :class:`ToolInputsNotReadyException`
102+
if a referenced collection exists but is not populated yet, so
103+
the scheduler retries instead of surfacing the cryptic
104+
"Referenced input parameter is not a collection." error.
105+
"""
106+
if self.param_combinations:
107+
return self.param_combinations[0]
108+
return _resolve_template(self.param_template, trans)
109+
110+
111+
def _resolve_template(template: ToolRequestT, trans: WorkRequestContext) -> ToolStateJobInstancePopulatedT:
112+
return {key: _resolve_template_value(value, trans) for key, value in template.items()}
113+
114+
115+
def _resolve_template_value(value: Any, trans: WorkRequestContext) -> Any:
116+
if isinstance(value, dict):
117+
values = value.get("values")
118+
if (
119+
isinstance(values, list)
120+
and values
121+
and isinstance(values[0], dict)
122+
and "src" in values[0]
123+
and "id" in values[0]
124+
):
125+
return _resolve_collection_ref(values[0], trans, raw_fallback=value)
126+
if "src" in value and "id" in value:
127+
return _resolve_collection_ref(value, trans, raw_fallback=value)
128+
return {k: _resolve_template_value(v, trans) for k, v in value.items()}
129+
if isinstance(value, list):
130+
return [_resolve_template_value(v, trans) for v in value]
131+
return value
132+
133+
134+
def _resolve_collection_ref(
135+
ref: dict[str, Any],
136+
trans: WorkRequestContext,
137+
raw_fallback: Any,
138+
) -> Union[model.HistoryDatasetCollectionAssociation, model.DatasetCollectionElement, Any]:
139+
src = ref.get("src")
140+
rid = ref.get("id")
141+
if rid is None or src not in ("hdca", "dce"):
142+
return raw_fallback
143+
decoded = rid if isinstance(rid, int) else trans.security.decode_id(rid)
144+
sa_session = trans.sa_session
145+
if src == "hdca":
146+
hdca = sa_session.get(model.HistoryDatasetCollectionAssociation, decoded)
147+
if hdca is None:
148+
return raw_fallback
149+
if not hdca.collection.populated_optimized:
150+
raise ToolInputsNotReadyException("An input collection is not populated.")
151+
return hdca
152+
dce = sa_session.get(model.DatasetCollectionElement, decoded)
153+
if dce is None or dce.child_collection is None:
154+
return raw_fallback
155+
if not dce.child_collection.populated_optimized:
156+
raise ToolInputsNotReadyException("An input collection is not populated.")
157+
return dce
158+
90159

91160
def execute_async(
92161
trans,
@@ -416,13 +485,7 @@ def param_combinations(self) -> list[ToolStateJobInstancePopulatedT]:
416485

417486
@property
418487
def example_params(self):
419-
if self.mapping_params.param_combinations:
420-
return self.mapping_params.param_combinations[0]
421-
else:
422-
# TODO: This isn't quite right - what we want is something like param_template wrapped,
423-
# need a test case with an output filter applied to an empty list, still this is
424-
# an improvement over not allowing mapping of empty lists.
425-
return self.mapping_params.param_template
488+
return self.mapping_params.example_params(self.trans)
426489

427490
@property
428491
def job_count(self):
@@ -510,7 +573,7 @@ def find_collection(input_dict, input_name, path_prefix=""):
510573

511574
collection_type_description = (
512575
self.trans.app.dataset_collection_manager.collection_type_descriptions.for_collection_type(
513-
input_collection.collection.collection_type
576+
get_collection(input_collection).collection_type
514577
)
515578
)
516579
subcollection_mapping_type = None

lib/galaxy_test/api/test_tool_execute.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,27 @@ def test_map_over_empty_collection(target_history: TargetHistory, required_tool:
256256
assert "on collection 1" in name
257257

258258

259+
@requires_tool_id("collection_mapped_over_empty_structured_like")
260+
def test_map_over_empty_with_structured_like_non_mapped_collection_input(
261+
target_history: TargetHistory, required_tool: RequiredTool
262+
):
263+
# Regression guard: an output declared ``structured_like=<non-mapped
264+
# collection input>`` must precreate an implicit output even when the
265+
# mapped-over input is empty (zero jobs). Before the fix,
266+
# example_params fell back to param_template where the non-mapped
267+
# collection's batch wrapper was never substituted, and precreate
268+
# crashed with "Referenced input parameter is not a collection."
269+
empty_hdca = target_history.with_list([])
270+
shape_hdca = target_history.with_pair(["a", "b"])
271+
inputs = {
272+
"input1": {"batch": True, "values": [empty_hdca.src_dict]},
273+
"shape": shape_hdca.src_dict,
274+
}
275+
execute = required_tool.execute().with_inputs(inputs)
276+
execute.assert_has_n_jobs(0)
277+
execute.assert_creates_implicit_collection(0)
278+
279+
259280
@dataclass
260281
class MultiRunInRepeatFixtures:
261282
repeat_datasets: list[SrcDict]
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<tool id="collection_mapped_over_empty_structured_like" name="collection_mapped_over_empty_structured_like" version="0.1.0">
2+
<command><![CDATA[
3+
cat '$input1' '${shape.forward}' '${shape.reverse}' > '${list_output.forward}';
4+
cat '$input1' '${shape.reverse}' '${shape.forward}' > '${list_output.reverse}'
5+
]]></command>
6+
<inputs>
7+
<param name="input1" type="data" format="txt" label="Mapped-over data input" />
8+
<param name="shape" type="data_collection" collection_type="paired" format="txt" label="Structure source" />
9+
</inputs>
10+
<outputs>
11+
<collection name="list_output" structured_like="shape" type="paired" inherit_format="true" />
12+
</outputs>
13+
</tool>

test/functional/tools/sample_tool_conf.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@
189189
<tool file="collection_paired_default.xml" />
190190
<tool file="collection_paired_structured_like.xml" />
191191
<tool file="collection_paired_conditional_structured_like.xml" />
192+
<tool file="collection_mapped_over_empty_structured_like.xml" />
192193
<tool file="collection_nested_test.xml" />
193194
<tool file="collection_nested_default.xml" />
194195
<tool file="collection_mixed_param.xml" />
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""Integration test for ToolInputsNotReady on structured_like/unpopulated input.
2+
3+
When a tool output is ``structured_like="<non-mapped collection input>"`` and
4+
the user maps the tool over an empty collection, implicit output collection
5+
precreation consults that input to determine output shape. If the referenced
6+
collection is still populating, we should raise ``ToolInputsNotReadyException``
7+
(HTTP 400, ``TOOL_INPUTS_NOT_READY``) rather than surfacing the cryptic
8+
"Referenced input parameter is not a collection." (Sentry issue GALAXY-MAIN-4KSCZZZ0015NC).
9+
10+
This test deterministically produces an unpopulated DatasetCollection by
11+
downgrading ``populated_state`` directly in the DB after the collection has
12+
been created via the standard fetch path — the only reliable way to simulate
13+
the race-window state from pure API tests.
14+
"""
15+
16+
from sqlalchemy import select
17+
18+
from galaxy.model import (
19+
DatasetCollection,
20+
HistoryDatasetCollectionAssociation,
21+
)
22+
from galaxy_test.base.populators import (
23+
DatasetCollectionPopulator,
24+
DatasetPopulator,
25+
)
26+
from galaxy_test.driver import integration_util
27+
28+
29+
class TestStructuredLikeUnpopulatedRaisesNotReady(integration_util.IntegrationTestCase):
30+
framework_tool_and_types = True
31+
32+
dataset_populator: DatasetPopulator
33+
dataset_collection_populator: DatasetCollectionPopulator
34+
require_admin_user = True
35+
36+
def setUp(self):
37+
super().setUp()
38+
self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
39+
self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
40+
41+
@property
42+
def sa_session(self):
43+
return self._app.model.session
44+
45+
def _mark_collection_unpopulated(self, hdca_id: str) -> None:
46+
hdca_db_id = self._get(f"configuration/decode/{hdca_id}").json()["decoded_id"]
47+
# HDCA.collection_id points at the DatasetCollection row we need.
48+
hdca_model = self.sa_session.scalar(
49+
select(HistoryDatasetCollectionAssociation).where(HistoryDatasetCollectionAssociation.id == hdca_db_id)
50+
)
51+
assert hdca_model is not None
52+
dc_model = hdca_model.collection
53+
dc_model.populated_state = DatasetCollection.populated_states.NEW
54+
self.sa_session.add(dc_model)
55+
self.sa_session.commit()
56+
57+
def test_unpopulated_structured_like_target_raises_not_ready(self):
58+
with self.dataset_populator.test_history() as history_id:
59+
empty_hdca = self.dataset_collection_populator.create_list_in_history(
60+
history_id, contents=[], direct_upload=True, wait=True
61+
).json()["output_collections"][0]
62+
63+
shape_response = self.dataset_collection_populator.create_pair_in_history(
64+
history_id, contents=["a", "b"], direct_upload=True, wait=True
65+
).json()
66+
shape_hdca = shape_response["output_collections"][0]
67+
68+
# Simulate upstream "still populating" — mirrors what
69+
# happens when the referenced collection is an implicit
70+
# collection whose producing jobs haven't finished yet.
71+
self._mark_collection_unpopulated(shape_hdca["id"])
72+
73+
inputs = {
74+
"input1": {"batch": True, "values": [{"src": "hdca", "id": empty_hdca["id"]}]},
75+
"shape": {"src": "hdca", "id": shape_hdca["id"]},
76+
}
77+
response = self.dataset_populator.run_tool_raw(
78+
tool_id="collection_mapped_over_empty_structured_like",
79+
inputs=inputs,
80+
history_id=history_id,
81+
)
82+
83+
# Expect HTTP 400 (ToolInputsNotReadyException) with the
84+
# same message meta.py raises for mapped-over unpopulated
85+
# HDCAs, not the cryptic "Referenced input parameter..."
86+
# that used to reach Sentry.
87+
assert response.status_code == 400, (
88+
f"Expected 400 for unpopulated input collection, got {response.status_code}: {response.text}"
89+
)
90+
assert "not populated" in response.text, f"Expected 'not populated' in error body, got: {response.text}"
91+
assert (
92+
"Referenced input parameter is not a collection" not in response.text
93+
), "Regression: old cryptic error surfaced instead of ToolInputsNotReady"

0 commit comments

Comments
 (0)