Skip to content

Commit 2b30c9f

Browse files
authored
Merge pull request galaxyproject#22563 from guerler/fix_22562
Skip missing history item ids in job exports
2 parents f2d23fb + 39787ac commit 2b30c9f

6 files changed

Lines changed: 113 additions & 7 deletions

File tree

client/src/api/schema/schema.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12108,6 +12108,11 @@ export interface components {
1210812108
* @description Data of an export record associated with a history that was archived.
1210912109
*/
1211012110
ExportRecordData: {
12111+
/**
12112+
* Ignore Errors
12113+
* @description Last resort. If True, skip serialization errors caused by missing provenance (e.g. orphan implicit collection job associations, null job param refs from older histories that pre-date collections) instead of failing. Exported data may be incomplete or corrupt.
12114+
*/
12115+
ignore_errors?: boolean | null;
1211112116
/**
1211212117
* Include deleted
1211312118
* @description Include file contents for deleted datasets (if include_files is True).
@@ -25909,6 +25914,11 @@ export interface components {
2590925914
* @description Override xref for 'description domain' when generating BioCompute object.
2591025915
*/
2591125916
bco_override_xref?: components["schemas"]["XrefItem"][] | null;
25917+
/**
25918+
* Ignore Errors
25919+
* @description Last resort. If True, skip serialization errors caused by missing provenance (e.g. orphan implicit collection job associations, null job param refs from older histories that pre-date collections) instead of failing. Exported data may be incomplete or corrupt.
25920+
*/
25921+
ignore_errors?: boolean | null;
2591225922
/**
2591325923
* Include deleted
2591425924
* @description Include file contents for deleted datasets (if include_files is True).
@@ -25940,6 +25950,11 @@ export interface components {
2594025950
};
2594125951
/** WriteStoreToPayload */
2594225952
WriteStoreToPayload: {
25953+
/**
25954+
* Ignore Errors
25955+
* @description Last resort. If True, skip serialization errors caused by missing provenance (e.g. orphan implicit collection job associations, null job param refs from older histories that pre-date collections) instead of failing. Exported data may be incomplete or corrupt.
25956+
*/
25957+
ignore_errors?: boolean | null;
2594325958
/**
2594425959
* Include deleted
2594525960
* @description Include file contents for deleted datasets (if include_files is True).

lib/galaxy/managers/model_stores.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,11 @@ def write_history_to(self, request: WriteHistoryTo):
267267
uri: Optional[str] = None
268268
try:
269269
export_store = model.store.get_export_store_factory(
270-
self._app, model_store_format, export_files=export_files, user_context=user_context
270+
self._app,
271+
model_store_format,
272+
export_files=export_files,
273+
user_context=user_context,
274+
ignore_errors=request.ignore_errors,
271275
)(request.target_uri)
272276
with export_store:
273277
history = self._history_manager.by_id(request.history_id)

lib/galaxy/model/__init__.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ def __init__(
461461
serialize_dataset_objects: Optional[bool] = None,
462462
serialize_files_handler: Optional[SerializeFilesHandler] = None,
463463
strip_metadata_files: Optional[bool] = None,
464+
ignore_errors: Optional[bool] = False,
464465
) -> None:
465466
self.for_edit = for_edit
466467
if serialize_dataset_objects is None:
@@ -472,6 +473,10 @@ def __init__(
472473
# expect metadata tool to be rerun.
473474
strip_metadata_files = not for_edit
474475
self.strip_metadata_files = strip_metadata_files
476+
# When True, serializers emit best-effort output for histories whose imports left
477+
# unresolved references (orphan ImplicitCollectionJobsJobAssociation rows, null-id
478+
# job param refs) instead of raising. Intended for background archival exports.
479+
self.ignore_errors = ignore_errors
475480

476481
def attach_identifier(self, id_encoder, obj, ret_val):
477482
if self.for_edit and obj.id:
@@ -494,12 +499,13 @@ def get_identifier(self, id_encoder, obj):
494499
return obj.temp_id
495500

496501
def get_identifier_for_id(self, id_encoder, obj_id):
497-
if self.for_edit and obj_id:
498-
return obj_id
499-
elif obj_id:
500-
return id_encoder.encode_id(obj_id, kind="model_export")
501-
else:
502+
if not obj_id:
503+
if self.ignore_errors:
504+
return obj_id
502505
raise NotImplementedError()
506+
if self.for_edit:
507+
return obj_id
508+
return id_encoder.encode_id(obj_id, kind="model_export")
503509

504510
def serialize_files(self, dataset, as_dict):
505511
if self.serialize_files_handler is not None:
@@ -2959,7 +2965,11 @@ def _serialize(self, id_encoder, serialization_options):
29592965
rval = dict_for(
29602966
self,
29612967
populated_state=self.populated_state,
2962-
jobs=[serialization_options.get_identifier(id_encoder, j_a.job) for j_a in self.jobs],
2968+
jobs=[
2969+
serialization_options.get_identifier(id_encoder, j_a.job)
2970+
for j_a in self.jobs
2971+
if j_a.job is not None or not serialization_options.ignore_errors
2972+
],
29632973
)
29642974
serialization_options.attach_identifier(id_encoder, self, rval)
29652975
return rval

lib/galaxy/model/store/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,6 +1968,7 @@ def __init__(
19681968
strip_metadata_files: bool = True,
19691969
serialize_jobs: bool = True,
19701970
user_context=None,
1971+
ignore_errors: Optional[bool] = False,
19711972
) -> None:
19721973
"""
19731974
:param export_directory: path to export directory. Will be created if it does not exist.
@@ -2004,6 +2005,7 @@ def __init__(
20042005
serialize_dataset_objects=serialize_dataset_objects,
20052006
strip_metadata_files=strip_metadata_files,
20062007
serialize_files_handler=self,
2008+
ignore_errors=ignore_errors,
20072009
)
20082010
self.export_files = export_files
20092011
self.included_datasets: dict[model.DatasetInstance, tuple[model.DatasetInstance, bool]] = {}
@@ -3046,13 +3048,15 @@ def get_export_store_factory(
30463048
export_files=None,
30473049
bco_export_options: Optional[BcoExportOptions] = None,
30483050
user_context=None,
3051+
ignore_errors: Optional[bool] = False,
30493052
) -> Callable[[StrPath], FileSourceModelExportStore]:
30503053
export_store_class: type[FileSourceModelExportStore]
30513054
export_store_class_kwds = {
30523055
"app": app,
30533056
"export_files": export_files,
30543057
"serialize_dataset_objects": False,
30553058
"user_context": user_context,
3059+
"ignore_errors": ignore_errors,
30563060
}
30573061
if download_format in ["tar.gz", "tgz"]:
30583062
export_store_class = TarModelExportStore

lib/galaxy/schema/schema.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1926,6 +1926,15 @@ class WriteStoreToPayload(StoreExportPayload):
19261926
title="Target URI",
19271927
description="Galaxy Files URI to write mode store content to.",
19281928
)
1929+
ignore_errors: Optional[bool] = Field(
1930+
default=None,
1931+
description=(
1932+
"Last resort. If True, skip serialization errors caused by missing "
1933+
"provenance (e.g. orphan implicit collection job associations, null "
1934+
"job param refs from older histories that pre-date collections) "
1935+
"instead of failing. Exported data may be incomplete or corrupt."
1936+
),
1937+
)
19291938

19301939

19311940
class ObjectExportResponseBase(Model):

test/unit/data/model/test_model_store.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,70 @@ def test_import_traceback_handling():
789789
assert exc.value.traceback == traceback_message
790790

791791

792+
def test_export_history_with_orphan_icjja(tmp_path):
793+
"""Orphan ImplicitCollectionJobsJobAssociation rows (job_id NULL) are
794+
persisted by the import path when an ICJ references a job key not in
795+
object_import_tracker.jobs_by_key. The next export crashes in
796+
get_identifier(j_a.job=None); ignore_errors skips the orphan."""
797+
app = _mock_app()
798+
u, h, _d1, _d2, j = _setup_simple_cat_job(app)
799+
800+
icj = model.ImplicitCollectionJobs()
801+
linked = model.ImplicitCollectionJobsJobAssociation()
802+
linked.order_index = 0
803+
linked.implicit_collection_jobs = icj
804+
linked.job = j
805+
to_orphan = model.ImplicitCollectionJobsJobAssociation()
806+
to_orphan.order_index = 1
807+
to_orphan.implicit_collection_jobs = icj
808+
to_orphan.job = j
809+
app.add_and_commit(icj, linked, to_orphan)
810+
811+
# Mimic the post-import state: drop the FK so the row becomes an orphan.
812+
to_orphan.job = None # type: ignore[assignment]
813+
app.commit()
814+
815+
with pytest.raises(AttributeError):
816+
with store.TarModelExportStore(str(tmp_path / "strict.tgz"), app=app, export_files="copy") as export_store:
817+
export_store.export_history(h)
818+
819+
tolerant_archive = str(tmp_path / "tolerant.tgz")
820+
with store.TarModelExportStore(tolerant_archive, app=app, export_files="copy", ignore_errors=True) as export_store:
821+
export_store.export_history(h)
822+
823+
imported_history = import_archive(tolerant_archive, app, u)
824+
imported_job = imported_history.datasets[1].creating_job
825+
imported_icj = imported_job.implicit_collection_jobs_association.implicit_collection_jobs
826+
assert len(imported_icj.jobs) == 1
827+
828+
829+
def test_export_history_with_null_param_id(tmp_path):
830+
"""Job params shaped {"src": "hda"|"hdca"|"dce", "id": null} are persisted
831+
by the import path at model/store/__init__.py:1860-1888 when a referenced
832+
HDA/HDCA/DCE can't be resolved. Strict export raises in
833+
get_identifier_for_id; ignore_errors passes the null through.
834+
835+
Reproducing the on-disk state directly: the only producer is the import
836+
path itself, so deleting the referenced HDA wouldn't null the persisted
837+
param JSON."""
838+
app = _mock_app()
839+
u, h, _d1, _d2, j = _setup_simple_cat_job(app)
840+
j.parameters = [model.JobParameter(name="input1", value=json.dumps({"src": "hda", "id": None}))]
841+
app.commit()
842+
843+
with pytest.raises(NotImplementedError):
844+
with store.TarModelExportStore(str(tmp_path / "strict.tgz"), app=app, export_files="copy") as export_store:
845+
export_store.export_history(h)
846+
847+
tolerant_archive = str(tmp_path / "tolerant.tgz")
848+
with store.TarModelExportStore(tolerant_archive, app=app, export_files="copy", ignore_errors=True) as export_store:
849+
export_store.export_history(h)
850+
851+
imported_history = import_archive(tolerant_archive, app, u)
852+
imported_job = imported_history.datasets[1].creating_job
853+
assert json.loads(imported_job.raw_param_dict()["input1"]) == {"src": "hda", "id": None}
854+
855+
792856
def test_import_export_edit_datasets():
793857
"""Test modifying existing HDA and dataset metadata with import."""
794858
app, h, temp_directory, import_history = _setup_simple_export({"for_edit": True})

0 commit comments

Comments
 (0)