Skip to content

Commit 1ca7ad8

Browse files
authored
Merge pull request #21489 from mvdbeek/add-sample-sheet-to-landing-requests
Enable attaching sample sheet to landing requests
2 parents 6f6aac1 + 55b76db commit 1ca7ad8

21 files changed

Lines changed: 757 additions & 83 deletions

File tree

client/src/api/schema/schema.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10461,6 +10461,8 @@ export interface components {
1046110461
class: "Collection";
1046210462
/** Collection Type */
1046310463
collection_type: string;
10464+
/** Column Definitions */
10465+
column_definitions?: components["schemas"]["SampleSheetColumnDefinition"][] | null;
1046410466
/**
1046510467
* Deferred
1046610468
* @default false
@@ -10473,6 +10475,10 @@ export interface components {
1047310475
)[];
1047410476
/** Name */
1047510477
name?: string | null;
10478+
/** Rows */
10479+
rows?: {
10480+
[key: string]: (number | boolean | string | null)[];
10481+
} | null;
1047610482
/** Src */
1047710483
src?: null;
1047810484
};
@@ -12742,6 +12748,10 @@ export interface components {
1274212748
items_from?: components["schemas"]["ElementsFromType"] | null;
1274312749
/** Name */
1274412750
name?: string | null;
12751+
/** Rows */
12752+
rows?: {
12753+
[key: string]: (number | boolean | string | null)[];
12754+
} | null;
1274512755
/**
1274612756
* Src
1274712757
* @constant
@@ -14471,6 +14481,10 @@ export interface components {
1447114481
name?: string | null;
1447214482
/** Path */
1447314483
path?: string | null;
14484+
/** Rows */
14485+
rows?: {
14486+
[key: string]: (number | boolean | string | null)[];
14487+
} | null;
1447414488
/** Server Dir */
1447514489
server_dir?: string | null;
1447614490
src: components["schemas"]["ItemsFromSrc"];
@@ -14509,6 +14523,10 @@ export interface components {
1450914523
)[];
1451014524
/** Name */
1451114525
name?: string | null;
14526+
/** Rows */
14527+
rows?: {
14528+
[key: string]: (number | boolean | string | null)[];
14529+
} | null;
1451214530
/** Tags */
1451314531
tags?: string[] | null;
1451414532
};

lib/galaxy/job_execution/output_collect.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,15 @@ def collect_dynamic_outputs(
125125
destination = unnamed_output_dict["destination"]
126126
elements = unnamed_output_dict["elements"]
127127

128+
# If rows are specified at the collection level, add them to individual elements
129+
# This is a defensive check in case rows weren't already distributed in data_fetch.py
130+
if "rows" in unnamed_output_dict:
131+
rows_dict = unnamed_output_dict["rows"]
132+
for element in elements:
133+
element_name = element.get("name")
134+
if element_name and element_name in rows_dict and "row" not in element:
135+
element["row"] = rows_dict[element_name]
136+
128137
assert "type" in destination
129138
destination_type = destination["type"]
130139
assert destination_type in ["library_folder", "hdca", "hdas"]

lib/galaxy/managers/landing.py

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,18 @@
1515
ItemAlreadyClaimedException,
1616
ItemMustBeClaimed,
1717
ObjectNotFound,
18+
RequestParameterInvalidException,
1819
RequestParameterMissingException,
1920
)
2021
from galaxy.managers.workflows import WorkflowContentsManager
2122
from galaxy.model import (
2223
ToolLandingRequest as ToolLandingRequestModel,
2324
WorkflowLandingRequest as WorkflowLandingRequestModel,
2425
)
26+
from galaxy.model.dataset_collections.types.sample_sheet_util import (
27+
validate_column_definitions,
28+
validate_row,
29+
)
2530
from galaxy.model.scoped_session import galaxy_scoped_session
2631
from galaxy.schema.schema import (
2732
ClaimLandingPayload,
@@ -41,7 +46,10 @@
4146
LandingRequestInternalToolState,
4247
LandingRequestToolState,
4348
)
44-
from galaxy.tool_util_models.parameters import DataOrCollectionRequestAdapter
49+
from galaxy.tool_util_models.parameters import (
50+
DataOrCollectionRequestAdapter,
51+
DataRequestCollectionUri,
52+
)
4553
from galaxy.util import safe_str_cmp
4654
from .context import ProvidesUserContext
4755
from .tools import (
@@ -86,6 +94,38 @@ def create_tool_landing_request(self, payload: CreateToolLandingRequestPayload,
8694
input_state=landing_request_state.input_state
8795
)
8896

97+
# Validate sample sheet metadata in request_state for __DATA_FETCH__ tool
98+
if tool.id == "__DATA_FETCH__" and request_state:
99+
100+
# Check each item in request_state for sample sheet metadata
101+
for item in landing_request_state.input_state.get("request_state", []):
102+
# Try to parse as DataRequestCollectionUri to access sample sheet fields
103+
if isinstance(item, dict) and item.get("class") == "Collection":
104+
column_definitions = item.get("column_definitions")
105+
rows = item.get("rows")
106+
collection_type = item.get("collection_type", "")
107+
108+
if column_definitions is not None or rows is not None:
109+
# Validate that sample sheet metadata is only used with sample_sheet collection types
110+
if not collection_type.startswith("sample_sheet"):
111+
raise RequestParameterInvalidException(
112+
f"Sample sheet metadata (column_definitions, rows) can only be used with collection_type 'sample_sheet' or 'sample_sheet:<type>', not '{collection_type}'"
113+
)
114+
115+
# Validate column definitions structure
116+
if column_definitions is not None:
117+
validate_column_definitions(column_definitions)
118+
119+
# Validate rows against column definitions and element identifiers
120+
if rows:
121+
element_identifiers = [elem.get("identifier") for elem in item.get("elements", [])]
122+
for identifier, row in rows.items():
123+
if identifier not in element_identifiers:
124+
raise RequestParameterInvalidException(
125+
f"Row identifier '{identifier}' not found in collection elements"
126+
)
127+
validate_row(row, column_definitions, element_identifiers)
128+
89129
model = ToolLandingRequestModel()
90130
model.tool_id = tool_id
91131
model.tool_version = tool_version
@@ -126,9 +166,35 @@ def validate_workflow_request_state(self, request_state: Optional[dict]) -> Opti
126166
if isinstance(value, dict):
127167
try:
128168
# persist values after model validators and aliases have been applied
129-
request_state[key] = DataOrCollectionRequestAdapter.validate_python(value).model_dump(
130-
by_alias=True, exclude_unset=True, mode="json"
131-
)
169+
validated_value = DataOrCollectionRequestAdapter.validate_python(value)
170+
171+
# Validate sample sheet metadata for collections
172+
if isinstance(validated_value, DataRequestCollectionUri):
173+
has_sample_sheet_metadata = (
174+
validated_value.column_definitions is not None or validated_value.rows is not None
175+
)
176+
if has_sample_sheet_metadata:
177+
collection_type = validated_value.collection_type
178+
if not collection_type.startswith("sample_sheet"):
179+
raise RequestParameterInvalidException(
180+
f"Sample sheet metadata (column_definitions, rows) can only be used with collection_type 'sample_sheet' or 'sample_sheet:<type>', not '{collection_type}'"
181+
)
182+
183+
# Validate column definitions structure
184+
if validated_value.column_definitions is not None:
185+
validate_column_definitions(validated_value.column_definitions)
186+
187+
# Validate rows against column definitions and element identifiers
188+
if validated_value.rows:
189+
element_identifiers = [elem.identifier for elem in validated_value.elements]
190+
for identifier, row in validated_value.rows.items():
191+
if identifier not in element_identifiers:
192+
raise RequestParameterInvalidException(
193+
f"Row identifier '{identifier}' not found in collection elements"
194+
)
195+
validate_row(row, validated_value.column_definitions, element_identifiers)
196+
197+
request_state[key] = validated_value.model_dump(by_alias=True, exclude_unset=True, mode="json")
132198
except ValidationError:
133199
pass
134200
return request_state

lib/galaxy/model/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,15 +182,17 @@
182182
DatasetValidatedState,
183183
InvocationsStateCounts,
184184
JobState,
185-
SampleSheetColumnDefinitions,
186-
SampleSheetRow,
187185
ToolRequestState,
188186
)
189187
from galaxy.schema.workflow.comments import WorkflowCommentModel
190188
from galaxy.security import get_permitted_actions
191189
from galaxy.security.idencoding import IdEncodingHelper
192190
from galaxy.security.validate_user_input import validate_password_str
193191
from galaxy.tool_util.output_checker import AnyJobMessage
192+
from galaxy.tool_util_models.sample_sheet import (
193+
SampleSheetColumnDefinitions,
194+
SampleSheetRow,
195+
)
194196
from galaxy.util import (
195197
directory_hash_id,
196198
enum_values,

lib/galaxy/model/dataset_collections/builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
BaseDatasetCollectionType,
2121
DatasetInstanceMapping,
2222
)
23-
from galaxy.schema.schema import SampleSheetRow
23+
from galaxy.tool_util_models.sample_sheet import SampleSheetRow
2424
from galaxy.tool_util_models.tool_source import FieldDict
2525

2626

lib/galaxy/model/dataset_collections/types/sample_sheet_util.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@
1414
from typing_extensions import Self
1515

1616
from galaxy.exceptions import RequestParameterInvalidException
17-
from galaxy.schema.schema import (
17+
from galaxy.tool_util_models.parameter_validators import AnySafeValidatorModel
18+
from galaxy.tool_util_models.sample_sheet import (
1819
SampleSheetColumnDefinition,
1920
SampleSheetColumnDefinitions,
2021
SampleSheetColumnType,
2122
SampleSheetColumnValueT,
2223
SampleSheetRow,
2324
)
24-
from galaxy.tool_util_models.parameter_validators import AnySafeValidatorModel
25+
from galaxy.util import strip_control_characters
2526

2627
SampleSheetRows = dict[str, SampleSheetRow]
2728
OptionalSampleSheetRows = Optional[SampleSheetRows]
@@ -98,6 +99,10 @@ def validate_row(
9899
):
99100
if column_definitions is None:
100101
return
102+
if row is None:
103+
raise RequestParameterInvalidException(
104+
"Sample sheet row is missing. Ensure all element names in 'elements' have corresponding entries in 'rows'."
105+
)
101106
if len(row) != len(column_definitions):
102107
raise RequestParameterInvalidException(
103108
"Sample sheet row validation failed, incorrect number of columns specified."
@@ -141,7 +146,7 @@ def validate_column_value(
141146
elif column_type == "string":
142147
if not isinstance(column_value, (str,)):
143148
raise RequestParameterInvalidException(f"{column_value} was not a string as expected")
144-
validate_no_special_characters(column_value)
149+
strip_control_characters(column_value)
145150
elif column_type == "boolean":
146151
if not isinstance(column_value, (bool,)):
147152
raise RequestParameterInvalidException(f"{column_value} was not a boolean as expected")

lib/galaxy/model/dataset_collections/types/sample_sheet_workbook.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
ReadOnlyWorkbook,
4848
set_column_width,
4949
)
50-
from galaxy.schema.schema import SampleSheetColumnValueT
50+
from galaxy.tool_util_models.sample_sheet import SampleSheetColumnValueT
5151
from galaxy.util import (
5252
string_as_bool,
5353
string_as_bool_or_none,

lib/galaxy/model/dereference.py

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
Session,
1717
)
1818

19+
from galaxy.exceptions import RequestParameterInvalidException
1920
from galaxy.model import (
2021
Dataset,
2122
DatasetCollection,
@@ -29,6 +30,10 @@
2930
REQUESTED_TRANSFORM_ACTIONS,
3031
User,
3132
)
33+
from galaxy.model.dataset_collections.types.sample_sheet_util import (
34+
validate_column_definitions,
35+
validate_row,
36+
)
3237
from galaxy.model.scoped_session import galaxy_scoped_session
3338
from galaxy.tool_util_models.parameters import (
3439
CollectionElementCollectionRequestUri,
@@ -37,6 +42,7 @@
3742
DataRequestUri,
3843
FileRequestUri,
3944
)
45+
from galaxy.tool_util_models.sample_sheet import SampleSheetRow
4046

4147
log = logging.getLogger(__name__)
4248

@@ -97,13 +103,21 @@ def derefence_collection_element(
97103
element: CollectionElementCollectionRequestUri,
98104
parent_dataset_collection: DatasetCollection,
99105
element_index: int,
106+
rows: Optional[dict[str, SampleSheetRow]] = None,
100107
):
101108
child_dataset_collection = DatasetCollection(collection_type=element.collection_type)
109+
110+
# Extract row for this element if present
111+
columns = None
112+
if rows and element.identifier in rows:
113+
columns = rows[element.identifier]
114+
102115
DatasetCollectionElement(
103116
collection=parent_dataset_collection,
104117
element=child_dataset_collection,
105118
element_identifier=element.identifier,
106119
element_index=element_index,
120+
columns=columns,
107121
)
108122
sa_session.add(child_dataset_collection)
109123
for index, child_element in enumerate(element.elements):
@@ -126,39 +140,93 @@ def dereference_collection_dataset_element(
126140
element: CollectionElementDataRequestUri,
127141
parent_dataset_collection: DatasetCollection,
128142
element_index: int,
143+
rows: Optional[dict[str, SampleSheetRow]] = None,
129144
):
130145
hda = dereference_to_model(sa_session, user, history, element, add_to_history=False, visible=False)
131146
history.stage_addition(hda)
147+
148+
# Extract row for this element if present
149+
columns = None
150+
if rows and element.identifier in rows:
151+
columns = rows[element.identifier]
152+
132153
dce = DatasetCollectionElement(
133154
collection=parent_dataset_collection,
134155
element=hda,
135156
element_identifier=element.identifier,
136157
element_index=element_index,
158+
columns=columns,
137159
)
138160
parent_dataset_collection.elements.append(dce)
139161

140162

163+
def _validate_sample_sheet_metadata(
164+
data_request_uri: DataRequestCollectionUri,
165+
):
166+
"""Validate sample sheet metadata for landing requests."""
167+
# Extract metadata from data request
168+
collection_type = data_request_uri.collection_type
169+
column_definitions = data_request_uri.column_definitions
170+
rows = data_request_uri.rows
171+
172+
# Validate that sample sheet metadata is only used with sample_sheet collection types
173+
is_sample_sheet = collection_type.startswith("sample_sheet")
174+
has_sample_sheet_metadata = column_definitions is not None or rows is not None
175+
176+
if has_sample_sheet_metadata and not is_sample_sheet:
177+
raise RequestParameterInvalidException(
178+
f"Sample sheet metadata (column_definitions, rows) can only be used with collection_type 'sample_sheet' or 'sample_sheet:<type>', not '{collection_type}'"
179+
)
180+
181+
# Validate column definitions structure
182+
if column_definitions is not None:
183+
validate_column_definitions(column_definitions)
184+
185+
# Validate each row
186+
if rows:
187+
# Get element identifiers for validation
188+
element_identifiers = [elem.identifier for elem in data_request_uri.elements]
189+
190+
for identifier, row in rows.items():
191+
if identifier not in element_identifiers:
192+
raise RequestParameterInvalidException(
193+
f"Row identifier '{identifier}' not found in collection elements"
194+
)
195+
validate_row(row, column_definitions, element_identifiers)
196+
197+
141198
def derefence_collection_to_model(
142199
sa_session: galaxy_scoped_session,
143200
user: User,
144201
history: History,
145202
data_request_uri: DataRequestCollectionUri,
146203
collection_name: str = "Collection",
147204
) -> HistoryDatasetCollectionAssociation:
205+
# Validate sample sheet metadata before creating any objects
206+
_validate_sample_sheet_metadata(data_request_uri)
207+
148208
name = data_request_uri.name or collection_name
149209
hdca = HistoryDatasetCollectionAssociation(
150210
name=name,
151211
history=history,
152212
)
153213
sa_session.add(hdca)
154-
dc = DatasetCollection(collection_type=data_request_uri.collection_type)
214+
dc = DatasetCollection(
215+
collection_type=data_request_uri.collection_type,
216+
column_definitions=data_request_uri.column_definitions,
217+
)
155218
sa_session.add(dc)
156219
hdca.collection = dc
220+
221+
# Extract rows for passing to element creation
222+
rows = data_request_uri.rows
223+
157224
for i, element in enumerate(data_request_uri.elements):
158225
if element.class_ == "File":
159-
dereference_collection_dataset_element(sa_session, user, history, element, dc, element_index=i)
226+
dereference_collection_dataset_element(sa_session, user, history, element, dc, element_index=i, rows=rows)
160227
elif element.class_ == "Collection":
161-
derefence_collection_element(sa_session, user, history, element, dc, i)
228+
derefence_collection_element(sa_session, user, history, element, dc, i, rows=rows)
229+
162230
dc.populated_state = "ok"
163231
dc.element_count = len(data_request_uri.elements)
164232
history.stage_addition(hdca)

0 commit comments

Comments
 (0)