Skip to content

Commit 1f01517

Browse files
committed
Add database operation tool to convert sample sheets to list collections
This tool converts sample sheet collections back to their corresponding non-sample-sheet types by stripping the column_definitions and row column metadata: - sample_sheet -> list - sample_sheet:paired -> list:paired - sample_sheet:paired_or_unpaired -> list:paired_or_unpaired - sample_sheet:record -> list:record Useful when a sample sheet needs to be passed to a tool that expects a regular list collection, or when discarding sample sheet metadata.
1 parent d8d7280 commit 1f01517

7 files changed

Lines changed: 259 additions & 0 deletions

File tree

lib/galaxy/config/sample/tool_conf.xml.sample

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
<tool file="${model_tools_path}/build_list.xml" />
5454
<tool file="${model_tools_path}/build_list_1.2.0.xml" />
5555
<tool file="${model_tools_path}/sample_sheet_to_tabular.xml" />
56+
<tool file="${model_tools_path}/convert_sample_sheet.xml" />
5657
<tool file="${model_tools_path}/extract_dataset.xml" />
5758
<tool file="${model_tools_path}/duplicate_file_to_collection.xml" />
5859
</section>

lib/galaxy/model/dataset_collections/types/sample_sheet_workbook.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,5 +618,24 @@ def _list_to_sample_sheet_collection_type(input_collection_type: str) -> SampleS
618618
)
619619

620620

621+
def _sample_sheet_to_list_collection_type(input_collection_type: str) -> str:
622+
"""Convert sample_sheet collection types to corresponding list collection types.
623+
624+
Converts sample_sheet types to list types (e.g., sample_sheet:paired -> list:paired).
625+
"""
626+
if input_collection_type == "sample_sheet":
627+
return "list"
628+
elif input_collection_type == "sample_sheet:paired":
629+
return "list:paired"
630+
elif input_collection_type == "sample_sheet:paired_or_unpaired":
631+
return "list:paired_or_unpaired"
632+
elif input_collection_type == "sample_sheet:record":
633+
return "list:record"
634+
else:
635+
raise RequestParameterInvalidException(
636+
f"Invalid collection type for sample sheet conversion: {input_collection_type}"
637+
)
638+
639+
621640
def _prefix_column_to_column_target(column_header: FetchPrefixColumn) -> ColumnTarget:
622641
return target_model_by_type(column_header.type)

lib/galaxy/tools/__init__.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
StoredWorkflow,
5858
)
5959
from galaxy.model.dataset_collections.matching import MatchingCollections
60+
from galaxy.model.dataset_collections.types.sample_sheet_workbook import _sample_sheet_to_list_collection_type
6061
from galaxy.schema.credentials import CredentialsContext
6162
from galaxy.tool_shed.util.repository_util import get_installed_repository
6263
from galaxy.tool_shed.util.shed_util_common import set_image_paths
@@ -4671,6 +4672,66 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
46714672
)
46724673

46734674

4675+
class ConvertSampleSheetTool(DatabaseOperationTool):
4676+
"""Convert a sample sheet collection back to its corresponding non-sample-sheet type.
4677+
4678+
This tool strips the sample sheet metadata (column_definitions and row columns)
4679+
and converts the collection type from sample_sheet variants to list variants.
4680+
"""
4681+
4682+
tool_type = "convert_sample_sheet"
4683+
require_terminal_states = False
4684+
require_dataset_ok = False
4685+
4686+
def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
4687+
has_collection = incoming["input"]
4688+
if hasattr(has_collection, "element_type"):
4689+
# It is a DCE
4690+
collection = has_collection.element_object
4691+
else:
4692+
# It is an HDCA
4693+
collection = has_collection.collection
4694+
4695+
input_collection_type = collection.collection_type
4696+
output_collection_type = _sample_sheet_to_list_collection_type(input_collection_type)
4697+
4698+
new_elements = {}
4699+
copied_datasets = []
4700+
4701+
def copy_elements(source_collection, target_dict):
4702+
for dce in source_collection.elements:
4703+
element_identifier = dce.element_identifier
4704+
dce_object = dce.element_object
4705+
if dce.is_collection:
4706+
# Handle nested collections (e.g., paired within sample_sheet:paired)
4707+
sub_collection: dict[str, Any] = {}
4708+
sub_collection["src"] = "new_collection"
4709+
sub_collection["collection_type"] = dce_object.collection_type
4710+
sub_elements = {}
4711+
for sub_dce in dce_object.elements:
4712+
sub_element_identifier = sub_dce.element_identifier
4713+
sub_dce_object = sub_dce.element_object
4714+
copied_dataset = sub_dce_object.copy(copy_tags=sub_dce_object.tags, flush=False)
4715+
sub_elements[sub_element_identifier] = copied_dataset
4716+
copied_datasets.append(copied_dataset)
4717+
sub_collection["elements"] = sub_elements
4718+
target_dict[element_identifier] = sub_collection
4719+
else:
4720+
copied_dataset = dce_object.copy(copy_tags=dce_object.tags, flush=False)
4721+
target_dict[element_identifier] = copied_dataset
4722+
copied_datasets.append(copied_dataset)
4723+
4724+
copy_elements(collection, new_elements)
4725+
self._add_datasets_to_history(history, copied_datasets)
4726+
output_collections.create_collection(
4727+
next(iter(self.outputs.values())),
4728+
"output",
4729+
collection_type=output_collection_type,
4730+
elements=new_elements,
4731+
propagate_hda_tags=False,
4732+
)
4733+
4734+
46744735
# Populate tool_type to ToolClass mappings
46754736
TOOL_CLASSES: list[type[Tool]] = [
46764737
Tool,
@@ -4690,6 +4751,7 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
46904751
BuildListCollectionTool,
46914752
ExtractDatasetCollectionTool,
46924753
DataDestinationTool,
4754+
ConvertSampleSheetTool,
46934755
]
46944756
tool_types = {tool_class.tool_type: tool_class for tool_class in TOOL_CLASSES}
46954757

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<tool id="__CONVERT_SAMPLE_SHEET__"
2+
name="Convert sample sheet"
3+
version="1.0.0"
4+
tool_type="convert_sample_sheet">
5+
<description>to list collection</description>
6+
<type class="ConvertSampleSheetTool" module="galaxy.tools" />
7+
<action module="galaxy.tools.actions.model_operations"
8+
class="ModelOperationToolAction"/>
9+
<edam_operations>
10+
<edam_operation>operation_2409</edam_operation>
11+
</edam_operations>
12+
<macros>
13+
<import>model_operation_macros.xml</import>
14+
</macros>
15+
<inputs>
16+
<param type="data_collection" collection_type="sample_sheet,sample_sheet:paired,sample_sheet:paired_or_unpaired,sample_sheet" name="input" label="Sample sheet to convert" />
17+
</inputs>
18+
<outputs>
19+
<!-- type_source is a lie, we don't have a way to encode the transform that is actually happening -->
20+
<collection name="output" format_source="input" type_source="input" label="${on_string} (converted)" >
21+
</collection>
22+
</outputs>
23+
<help><![CDATA[
24+
25+
========
26+
Synopsis
27+
========
28+
29+
Converts a sample sheet collection back to its corresponding list collection type.
30+
31+
===========
32+
Description
33+
===========
34+
35+
This tool takes a sample sheet collection and produces a regular list collection, removing all sample sheet metadata (column definitions and row values).
36+
37+
The conversion follows this mapping:
38+
39+
- ``sample_sheet`` becomes ``list``
40+
- ``sample_sheet:paired`` becomes ``list:paired``
41+
- ``sample_sheet:paired_or_unpaired`` becomes ``list:paired_or_unpaired``
42+
43+
Use this tool when you need to pass a sample sheet to a tool that expects a regular list collection, or when you want to discard the sample sheet metadata.
44+
45+
----
46+
47+
.. class:: infomark
48+
49+
@QUOTA_USAGE_NOTE@
50+
51+
]]></help>
52+
</tool>

lib/galaxy_test/api/test_tools.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,89 @@ def test_extract_dataset_invalid_element_identifier(self):
751751
assert run_response.status_code == 400
752752
assert run_response.json()["err_msg"] == "Dataset collection has no element_index with key 100."
753753

754+
@skip_without_tool("__CONVERT_SAMPLE_SHEET__")
755+
def test_convert_sample_sheet_to_list(self):
756+
with self.dataset_populator.test_history(require_new=False) as history_id:
757+
# Create sample_sheet collection with column_definitions and rows
758+
create_response = self.dataset_collection_populator.create_sample_sheet(
759+
history_id,
760+
contents=[("sample1", "content1"), ("sample2", "content2")],
761+
column_definitions=[
762+
{"type": "int", "name": "replicate", "optional": False},
763+
{"type": "string", "name": "treatment", "optional": False},
764+
],
765+
rows={"sample1": [1, "control"], "sample2": [2, "treatment"]},
766+
)
767+
self._assert_status_code_is(create_response, 200)
768+
sample_sheet_hdca = create_response.json()
769+
assert sample_sheet_hdca["collection_type"] == "sample_sheet"
770+
assert sample_sheet_hdca["column_definitions"] is not None
771+
772+
# Run convert sample sheet tool
773+
inputs = {"input": {"src": "hdca", "id": sample_sheet_hdca["id"]}}
774+
self.dataset_populator.wait_for_history(history_id, assert_ok=True)
775+
response = self._run("__CONVERT_SAMPLE_SHEET__", history_id, inputs, assert_ok=True)
776+
777+
# Verify output is a list collection without sample sheet metadata
778+
output_collections = response["output_collections"]
779+
assert len(output_collections) == 1
780+
self.dataset_populator.wait_for_job(response["jobs"][0]["id"], assert_ok=True)
781+
converted_hdca = self.dataset_populator.get_history_collection_details(
782+
history_id, hid=output_collections[0]["hid"]
783+
)
784+
assert converted_hdca["collection_type"] == "list"
785+
assert converted_hdca.get("column_definitions") is None
786+
assert len(converted_hdca["elements"]) == 2
787+
element_identifiers = [e["element_identifier"] for e in converted_hdca["elements"]]
788+
assert "sample1" in element_identifiers
789+
assert "sample2" in element_identifiers
790+
791+
@skip_without_tool("__CONVERT_SAMPLE_SHEET__")
792+
def test_convert_sample_sheet_paired_to_list_paired(self):
793+
with self.dataset_populator.test_history(require_new=False) as history_id:
794+
# Create sample_sheet:paired collection
795+
pair_identifiers = self.dataset_collection_populator.pair_identifiers(history_id, ["forward", "reverse"])
796+
element_identifiers = [
797+
{
798+
"name": "sample1",
799+
"collection_type": "paired",
800+
"src": "new_collection",
801+
"element_identifiers": pair_identifiers,
802+
}
803+
]
804+
create_response = self.dataset_collection_populator.create_sample_sheet(
805+
history_id,
806+
contents=element_identifiers,
807+
column_definitions=[{"type": "int", "name": "replicate", "default_value": 0, "optional": False}],
808+
rows={"sample1": [42]},
809+
collection_type="sample_sheet:paired",
810+
)
811+
self._assert_status_code_is(create_response, 200)
812+
sample_sheet_hdca = create_response.json()
813+
assert sample_sheet_hdca["collection_type"] == "sample_sheet:paired"
814+
assert sample_sheet_hdca["column_definitions"] is not None
815+
816+
# Run convert sample sheet tool
817+
inputs = {"input": {"src": "hdca", "id": sample_sheet_hdca["id"]}}
818+
self.dataset_populator.wait_for_history(history_id, assert_ok=True)
819+
response = self._run("__CONVERT_SAMPLE_SHEET__", history_id, inputs, assert_ok=True)
820+
821+
# Verify output is a list:paired collection without sample sheet metadata
822+
output_collections = response["output_collections"]
823+
assert len(output_collections) == 1
824+
self.dataset_populator.wait_for_job(response["jobs"][0]["id"], assert_ok=True)
825+
converted_hdca = self.dataset_populator.get_history_collection_details(
826+
history_id, hid=output_collections[0]["hid"]
827+
)
828+
assert converted_hdca["collection_type"] == "list:paired"
829+
assert converted_hdca.get("column_definitions") is None
830+
assert len(converted_hdca["elements"]) == 1
831+
# Verify nested paired structure is preserved
832+
element = converted_hdca["elements"][0]
833+
assert element["element_type"] == "dataset_collection"
834+
assert element["object"]["collection_type"] == "paired"
835+
assert len(element["object"]["elements"]) == 2
836+
754837
@skip_without_tool("__FILTER_FAILED_DATASETS__")
755838
def test_filter_failed_list(self):
756839
with self.dataset_populator.test_history(require_new=False) as history_id:

lib/galaxy_test/base/populators.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3555,6 +3555,47 @@ def hda_to_identifier(i, hda):
35553555
element_identifiers = [hda_to_identifier(i, hda) for (i, hda) in enumerate(hdas)]
35563556
return element_identifiers
35573557

3558+
def create_sample_sheet(
3559+
self,
3560+
history_id: str,
3561+
contents: list,
3562+
column_definitions: list,
3563+
rows: dict,
3564+
name: str = "test sample sheet",
3565+
collection_type: str = "sample_sheet",
3566+
):
3567+
"""Create a sample_sheet collection with metadata.
3568+
3569+
Args:
3570+
history_id: The history ID to create the collection in.
3571+
contents: A list of 2-tuples of form (name, dataset_content) for flat sample sheets,
3572+
or a list of element identifiers dicts for nested collections.
3573+
column_definitions: List of column definition dicts.
3574+
rows: Dict mapping element identifiers to row values.
3575+
name: Name for the collection.
3576+
collection_type: The collection type (sample_sheet, sample_sheet:paired, etc).
3577+
3578+
Returns:
3579+
Response from creating the collection.
3580+
"""
3581+
# For flat sample sheets, create element identifiers from contents
3582+
if contents and isinstance(contents[0], tuple):
3583+
element_identifiers = self.list_identifiers(history_id, contents)
3584+
else:
3585+
# Assume contents is already element_identifiers for nested collections
3586+
element_identifiers = contents
3587+
3588+
payload = dict(
3589+
name=name,
3590+
instance_type="history",
3591+
history_id=history_id,
3592+
element_identifiers=element_identifiers,
3593+
collection_type=collection_type,
3594+
column_definitions=column_definitions,
3595+
rows=rows,
3596+
)
3597+
return self._create_collection(payload)
3598+
35583599
def __create(self, payload, wait=False):
35593600
# Create a collection - either from existing datasets using collection creation API
35603601
# or from direct uploads with the fetch API. Dispatch on "targets" keyword in payload

test/functional/tools/sample_tool_conf.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@
338338
<tool file="${model_tools_path}/build_list.xml" />
339339
<tool file="${model_tools_path}/build_list_1.2.0.xml" />
340340
<tool file="${model_tools_path}/sample_sheet_to_tabular.xml" />
341+
<tool file="${model_tools_path}/convert_sample_sheet.xml" />
341342
<tool file="${model_tools_path}/extract_dataset.xml" />
342343
<tool file="${model_tools_path}/duplicate_file_to_collection.xml" />
343344
<tool file="${model_tools_path}/split_paired_and_unpaired.xml" />

0 commit comments

Comments
 (0)