diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index 943dabe0561d..b171e21e0f37 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -12108,6 +12108,11 @@ export interface components { * @description Data of an export record associated with a history that was archived. */ ExportRecordData: { + /** + * Ignore Errors + * @description Last resort. If True, skip serialization errors caused by missing provenance (e.g. orphan implicit collection job associations, null job param refs from older histories that pre-date collections) instead of failing. Exported data may be incomplete or corrupt. + */ + ignore_errors?: boolean | null; /** * Include deleted * @description Include file contents for deleted datasets (if include_files is True). @@ -25909,6 +25914,11 @@ export interface components { * @description Override xref for 'description domain' when generating BioCompute object. */ bco_override_xref?: components["schemas"]["XrefItem"][] | null; + /** + * Ignore Errors + * @description Last resort. If True, skip serialization errors caused by missing provenance (e.g. orphan implicit collection job associations, null job param refs from older histories that pre-date collections) instead of failing. Exported data may be incomplete or corrupt. + */ + ignore_errors?: boolean | null; /** * Include deleted * @description Include file contents for deleted datasets (if include_files is True). @@ -25940,6 +25950,11 @@ export interface components { }; /** WriteStoreToPayload */ WriteStoreToPayload: { + /** + * Ignore Errors + * @description Last resort. If True, skip serialization errors caused by missing provenance (e.g. orphan implicit collection job associations, null job param refs from older histories that pre-date collections) instead of failing. Exported data may be incomplete or corrupt. + */ + ignore_errors?: boolean | null; /** * Include deleted * @description Include file contents for deleted datasets (if include_files is True). diff --git a/lib/galaxy/managers/model_stores.py b/lib/galaxy/managers/model_stores.py index 77f7bf624d9a..0c111d822fcc 100644 --- a/lib/galaxy/managers/model_stores.py +++ b/lib/galaxy/managers/model_stores.py @@ -267,7 +267,11 @@ def write_history_to(self, request: WriteHistoryTo): uri: Optional[str] = None try: export_store = model.store.get_export_store_factory( - self._app, model_store_format, export_files=export_files, user_context=user_context + self._app, + model_store_format, + export_files=export_files, + user_context=user_context, + ignore_errors=request.ignore_errors, )(request.target_uri) with export_store: history = self._history_manager.by_id(request.history_id) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 5c5a22ee86a0..249b9a5aadc4 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -461,6 +461,7 @@ def __init__( serialize_dataset_objects: Optional[bool] = None, serialize_files_handler: Optional[SerializeFilesHandler] = None, strip_metadata_files: Optional[bool] = None, + ignore_errors: Optional[bool] = False, ) -> None: self.for_edit = for_edit if serialize_dataset_objects is None: @@ -472,6 +473,10 @@ def __init__( # expect metadata tool to be rerun. strip_metadata_files = not for_edit self.strip_metadata_files = strip_metadata_files + # When True, serializers emit best-effort output for histories whose imports left + # unresolved references (orphan ImplicitCollectionJobsJobAssociation rows, null-id + # job param refs) instead of raising. Intended for background archival exports. + self.ignore_errors = ignore_errors def attach_identifier(self, id_encoder, obj, ret_val): if self.for_edit and obj.id: @@ -494,12 +499,13 @@ def get_identifier(self, id_encoder, obj): return obj.temp_id def get_identifier_for_id(self, id_encoder, obj_id): - if self.for_edit and obj_id: - return obj_id - elif obj_id: - return id_encoder.encode_id(obj_id, kind="model_export") - else: + if not obj_id: + if self.ignore_errors: + return obj_id raise NotImplementedError() + if self.for_edit: + return obj_id + return id_encoder.encode_id(obj_id, kind="model_export") def serialize_files(self, dataset, as_dict): if self.serialize_files_handler is not None: @@ -2959,7 +2965,11 @@ def _serialize(self, id_encoder, serialization_options): rval = dict_for( self, populated_state=self.populated_state, - jobs=[serialization_options.get_identifier(id_encoder, j_a.job) for j_a in self.jobs], + jobs=[ + serialization_options.get_identifier(id_encoder, j_a.job) + for j_a in self.jobs + if j_a.job is not None or not serialization_options.ignore_errors + ], ) serialization_options.attach_identifier(id_encoder, self, rval) return rval diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index 2a7cb1386a5c..61a5d4bfd58b 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -1968,6 +1968,7 @@ def __init__( strip_metadata_files: bool = True, serialize_jobs: bool = True, user_context=None, + ignore_errors: Optional[bool] = False, ) -> None: """ :param export_directory: path to export directory. Will be created if it does not exist. @@ -2004,6 +2005,7 @@ def __init__( serialize_dataset_objects=serialize_dataset_objects, strip_metadata_files=strip_metadata_files, serialize_files_handler=self, + ignore_errors=ignore_errors, ) self.export_files = export_files self.included_datasets: dict[model.DatasetInstance, tuple[model.DatasetInstance, bool]] = {} @@ -3046,6 +3048,7 @@ def get_export_store_factory( export_files=None, bco_export_options: Optional[BcoExportOptions] = None, user_context=None, + ignore_errors: Optional[bool] = False, ) -> Callable[[StrPath], FileSourceModelExportStore]: export_store_class: type[FileSourceModelExportStore] export_store_class_kwds = { @@ -3053,6 +3056,7 @@ def get_export_store_factory( "export_files": export_files, "serialize_dataset_objects": False, "user_context": user_context, + "ignore_errors": ignore_errors, } if download_format in ["tar.gz", "tgz"]: export_store_class = TarModelExportStore diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py index 60fe1588d489..2e6da397e84f 100644 --- a/lib/galaxy/schema/schema.py +++ b/lib/galaxy/schema/schema.py @@ -1926,6 +1926,15 @@ class WriteStoreToPayload(StoreExportPayload): title="Target URI", description="Galaxy Files URI to write mode store content to.", ) + ignore_errors: Optional[bool] = Field( + default=None, + description=( + "Last resort. If True, skip serialization errors caused by missing " + "provenance (e.g. orphan implicit collection job associations, null " + "job param refs from older histories that pre-date collections) " + "instead of failing. Exported data may be incomplete or corrupt." + ), + ) class ObjectExportResponseBase(Model): diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index b5200b04fcb0..75128667f103 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -789,6 +789,70 @@ def test_import_traceback_handling(): assert exc.value.traceback == traceback_message +def test_export_history_with_orphan_icjja(tmp_path): + """Orphan ImplicitCollectionJobsJobAssociation rows (job_id NULL) are + persisted by the import path when an ICJ references a job key not in + object_import_tracker.jobs_by_key. The next export crashes in + get_identifier(j_a.job=None); ignore_errors skips the orphan.""" + app = _mock_app() + u, h, _d1, _d2, j = _setup_simple_cat_job(app) + + icj = model.ImplicitCollectionJobs() + linked = model.ImplicitCollectionJobsJobAssociation() + linked.order_index = 0 + linked.implicit_collection_jobs = icj + linked.job = j + to_orphan = model.ImplicitCollectionJobsJobAssociation() + to_orphan.order_index = 1 + to_orphan.implicit_collection_jobs = icj + to_orphan.job = j + app.add_and_commit(icj, linked, to_orphan) + + # Mimic the post-import state: drop the FK so the row becomes an orphan. + to_orphan.job = None # type: ignore[assignment] + app.commit() + + with pytest.raises(AttributeError): + with store.TarModelExportStore(str(tmp_path / "strict.tgz"), app=app, export_files="copy") as export_store: + export_store.export_history(h) + + tolerant_archive = str(tmp_path / "tolerant.tgz") + with store.TarModelExportStore(tolerant_archive, app=app, export_files="copy", ignore_errors=True) as export_store: + export_store.export_history(h) + + imported_history = import_archive(tolerant_archive, app, u) + imported_job = imported_history.datasets[1].creating_job + imported_icj = imported_job.implicit_collection_jobs_association.implicit_collection_jobs + assert len(imported_icj.jobs) == 1 + + +def test_export_history_with_null_param_id(tmp_path): + """Job params shaped {"src": "hda"|"hdca"|"dce", "id": null} are persisted + by the import path at model/store/__init__.py:1860-1888 when a referenced + HDA/HDCA/DCE can't be resolved. Strict export raises in + get_identifier_for_id; ignore_errors passes the null through. + + Reproducing the on-disk state directly: the only producer is the import + path itself, so deleting the referenced HDA wouldn't null the persisted + param JSON.""" + app = _mock_app() + u, h, _d1, _d2, j = _setup_simple_cat_job(app) + j.parameters = [model.JobParameter(name="input1", value=json.dumps({"src": "hda", "id": None}))] + app.commit() + + with pytest.raises(NotImplementedError): + with store.TarModelExportStore(str(tmp_path / "strict.tgz"), app=app, export_files="copy") as export_store: + export_store.export_history(h) + + tolerant_archive = str(tmp_path / "tolerant.tgz") + with store.TarModelExportStore(tolerant_archive, app=app, export_files="copy", ignore_errors=True) as export_store: + export_store.export_history(h) + + imported_history = import_archive(tolerant_archive, app, u) + imported_job = imported_history.datasets[1].creating_job + assert json.loads(imported_job.raw_param_dict()["input1"]) == {"src": "hda", "id": None} + + def test_import_export_edit_datasets(): """Test modifying existing HDA and dataset metadata with import.""" app, h, temp_directory, import_history = _setup_simple_export({"for_edit": True})