Skip to content

Commit ac21af0

Browse files
committed
Add flag to selectively tolerate missing data during exports
1 parent 35b7c42 commit ac21af0

4 files changed

Lines changed: 51 additions & 16 deletions

File tree

lib/galaxy/managers/model_stores.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,11 @@ def write_history_to(self, request: WriteHistoryTo):
264264
uri: Optional[str] = None
265265
try:
266266
export_store = model.store.get_export_store_factory(
267-
self._app, model_store_format, export_files=export_files, user_context=user_context
267+
self._app,
268+
model_store_format,
269+
export_files=export_files,
270+
user_context=user_context,
271+
tolerate_missing_data=True,
268272
)(request.target_uri)
269273
with export_store:
270274
history = self._history_manager.by_id(request.history_id)

lib/galaxy/model/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,7 @@ def __init__(
459459
serialize_dataset_objects: Optional[bool] = None,
460460
serialize_files_handler: Optional[SerializeFilesHandler] = None,
461461
strip_metadata_files: Optional[bool] = None,
462+
tolerate_missing_data: bool = False,
462463
) -> None:
463464
self.for_edit = for_edit
464465
if serialize_dataset_objects is None:
@@ -470,6 +471,10 @@ def __init__(
470471
# expect metadata tool to be rerun.
471472
strip_metadata_files = not for_edit
472473
self.strip_metadata_files = strip_metadata_files
474+
# When True, serializers emit best-effort output for histories whose imports left
475+
# unresolved references (orphan ImplicitCollectionJobsJobAssociation rows, null-id
476+
# job param refs) instead of raising. Intended for background archival exports.
477+
self.tolerate_missing_data = tolerate_missing_data
473478

474479
def attach_identifier(self, id_encoder, obj, ret_val):
475480
if self.for_edit and obj.id:
@@ -492,7 +497,11 @@ def get_identifier(self, id_encoder, obj):
492497
return obj.temp_id
493498

494499
def get_identifier_for_id(self, id_encoder, obj_id):
495-
if not obj_id or self.for_edit:
500+
if not obj_id:
501+
if self.tolerate_missing_data:
502+
return obj_id
503+
raise NotImplementedError()
504+
if self.for_edit:
496505
return obj_id
497506
return id_encoder.encode_id(obj_id, kind="model_export")
498507

@@ -2950,7 +2959,9 @@ def _serialize(self, id_encoder, serialization_options):
29502959
self,
29512960
populated_state=self.populated_state,
29522961
jobs=[
2953-
serialization_options.get_identifier(id_encoder, j_a.job) for j_a in self.jobs if j_a.job is not None
2962+
serialization_options.get_identifier(id_encoder, j_a.job)
2963+
for j_a in self.jobs
2964+
if j_a.job is not None or not serialization_options.tolerate_missing_data
29542965
],
29552966
)
29562967
serialization_options.attach_identifier(id_encoder, self, rval)

lib/galaxy/model/store/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1965,6 +1965,7 @@ def __init__(
19651965
strip_metadata_files: bool = True,
19661966
serialize_jobs: bool = True,
19671967
user_context=None,
1968+
tolerate_missing_data: bool = False,
19681969
) -> None:
19691970
"""
19701971
:param export_directory: path to export directory. Will be created if it does not exist.
@@ -2001,6 +2002,7 @@ def __init__(
20012002
serialize_dataset_objects=serialize_dataset_objects,
20022003
strip_metadata_files=strip_metadata_files,
20032004
serialize_files_handler=self,
2005+
tolerate_missing_data=tolerate_missing_data,
20042006
)
20052007
self.export_files = export_files
20062008
self.included_datasets: dict[model.DatasetInstance, tuple[model.DatasetInstance, bool]] = {}
@@ -3040,13 +3042,15 @@ def get_export_store_factory(
30403042
export_files=None,
30413043
bco_export_options: Optional[BcoExportOptions] = None,
30423044
user_context=None,
3045+
tolerate_missing_data: bool = False,
30433046
) -> Callable[[StrPath], FileSourceModelExportStore]:
30443047
export_store_class: type[FileSourceModelExportStore]
30453048
export_store_class_kwds = {
30463049
"app": app,
30473050
"export_files": export_files,
30483051
"serialize_dataset_objects": False,
30493052
"user_context": user_context,
3053+
"tolerate_missing_data": tolerate_missing_data,
30503054
}
30513055
if download_format in ["tar.gz", "tgz"]:
30523056
export_store_class = TarModelExportStore

test/unit/data/model/test_model.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ def test_io_dicts_excludes_implicit_output_collections():
5858
assert "paired_output" in io.out_collections
5959

6060

61-
def test_implicit_collection_jobs_serialize_skips_orphan_associations():
62-
"""ImplicitCollectionJobsJobAssociation.job_id is nullable; orphan rows
63-
(e.g. produced by partial imports) must be skipped on export instead of
64-
crashing in SerializationOptions.get_identifier on a None job."""
61+
def test_implicit_collection_jobs_serialize_orphan_associations():
62+
"""ImplicitCollectionJobsJobAssociation.job_id is nullable; orphan rows can
63+
be persisted by the import path. Strict mode (default) lets the crash
64+
propagate; tolerate_missing_data mode skips the orphan."""
6565
icj = model.ImplicitCollectionJobs()
6666
job = model.Job()
6767
job.id = 42
@@ -73,15 +73,31 @@ def test_implicit_collection_jobs_serialize_skips_orphan_associations():
7373
orphan.order_index = 1 # job left as None
7474
icj.jobs = [linked, orphan]
7575

76-
rval = icj._serialize(id_encoder=None, serialization_options=model.SerializationOptions(for_edit=True))
76+
strict = model.SerializationOptions(for_edit=True)
77+
try:
78+
icj._serialize(id_encoder=None, serialization_options=strict)
79+
except AttributeError:
80+
pass
81+
else:
82+
raise AssertionError("strict mode should have raised on orphan ICJJA")
83+
84+
tolerant = model.SerializationOptions(for_edit=True, tolerate_missing_data=True)
85+
rval = icj._serialize(id_encoder=None, serialization_options=tolerant)
7786
assert rval["jobs"] == [42]
7887

7988

80-
def test_job_serialize_passes_through_null_param_ids():
81-
"""Job params can contain {"src": "hda"|"hdca"|"dce", "id": null} references
82-
(e.g. from corrupted state); export must pass null ids through instead of
83-
raising NotImplementedError in get_identifier_for_id."""
84-
options = model.SerializationOptions(for_edit=True)
85-
assert options.get_identifier_for_id(id_encoder=None, obj_id=None) is None
86-
assert options.get_identifier_for_id(id_encoder=None, obj_id=0) == 0
87-
assert options.get_identifier_for_id(id_encoder=None, obj_id=42) == 42
89+
def test_get_identifier_for_id_null_handling():
90+
"""Null job-param ids raise in strict mode and pass through in
91+
tolerate_missing_data mode."""
92+
strict = model.SerializationOptions(for_edit=True)
93+
try:
94+
strict.get_identifier_for_id(id_encoder=None, obj_id=None)
95+
except NotImplementedError:
96+
pass
97+
else:
98+
raise AssertionError("strict mode should have raised on null obj_id")
99+
100+
tolerant = model.SerializationOptions(for_edit=True, tolerate_missing_data=True)
101+
assert tolerant.get_identifier_for_id(id_encoder=None, obj_id=None) is None
102+
assert tolerant.get_identifier_for_id(id_encoder=None, obj_id=0) == 0
103+
assert tolerant.get_identifier_for_id(id_encoder=None, obj_id=42) == 42

0 commit comments

Comments
 (0)