Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions client/src/api/schema/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12108,6 +12108,11 @@ export interface components {
* @description Data of an export record associated with a history that was archived.
*/
ExportRecordData: {
/**
* Ignore Errors
* @description Last resort. If True, skip serialization errors caused by missing provenance (e.g. orphan implicit collection job associations, null job param refs from older histories that pre-date collections) instead of failing. Exported data may be incomplete or corrupt.
*/
ignore_errors?: boolean | null;
/**
* Include deleted
* @description Include file contents for deleted datasets (if include_files is True).
Expand Down Expand Up @@ -25909,6 +25914,11 @@ export interface components {
* @description Override xref for 'description domain' when generating BioCompute object.
*/
bco_override_xref?: components["schemas"]["XrefItem"][] | null;
/**
* Ignore Errors
* @description Last resort. If True, skip serialization errors caused by missing provenance (e.g. orphan implicit collection job associations, null job param refs from older histories that pre-date collections) instead of failing. Exported data may be incomplete or corrupt.
*/
ignore_errors?: boolean | null;
/**
* Include deleted
* @description Include file contents for deleted datasets (if include_files is True).
Expand Down Expand Up @@ -25940,6 +25950,11 @@ export interface components {
};
/** WriteStoreToPayload */
WriteStoreToPayload: {
/**
* Ignore Errors
* @description Last resort. If True, skip serialization errors caused by missing provenance (e.g. orphan implicit collection job associations, null job param refs from older histories that pre-date collections) instead of failing. Exported data may be incomplete or corrupt.
*/
ignore_errors?: boolean | null;
/**
* Include deleted
* @description Include file contents for deleted datasets (if include_files is True).
Expand Down
6 changes: 5 additions & 1 deletion lib/galaxy/managers/model_stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,11 @@ def write_history_to(self, request: WriteHistoryTo):
uri: Optional[str] = None
try:
export_store = model.store.get_export_store_factory(
self._app, model_store_format, export_files=export_files, user_context=user_context
self._app,
model_store_format,
export_files=export_files,
user_context=user_context,
ignore_errors=request.ignore_errors,
)(request.target_uri)
with export_store:
history = self._history_manager.by_id(request.history_id)
Expand Down
22 changes: 16 additions & 6 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ def __init__(
serialize_dataset_objects: Optional[bool] = None,
serialize_files_handler: Optional[SerializeFilesHandler] = None,
strip_metadata_files: Optional[bool] = None,
ignore_errors: Optional[bool] = False,
) -> None:
self.for_edit = for_edit
if serialize_dataset_objects is None:
Expand All @@ -472,6 +473,10 @@ def __init__(
# expect metadata tool to be rerun.
strip_metadata_files = not for_edit
self.strip_metadata_files = strip_metadata_files
# When True, serializers emit best-effort output for histories whose imports left
# unresolved references (orphan ImplicitCollectionJobsJobAssociation rows, null-id
# job param refs) instead of raising. Intended for background archival exports.
self.ignore_errors = ignore_errors

def attach_identifier(self, id_encoder, obj, ret_val):
if self.for_edit and obj.id:
Expand All @@ -494,12 +499,13 @@ def get_identifier(self, id_encoder, obj):
return obj.temp_id

def get_identifier_for_id(self, id_encoder, obj_id):
if self.for_edit and obj_id:
return obj_id
elif obj_id:
return id_encoder.encode_id(obj_id, kind="model_export")
else:
if not obj_id:
if self.ignore_errors:
return obj_id
raise NotImplementedError()
if self.for_edit:
return obj_id
return id_encoder.encode_id(obj_id, kind="model_export")

def serialize_files(self, dataset, as_dict):
if self.serialize_files_handler is not None:
Expand Down Expand Up @@ -2959,7 +2965,11 @@ def _serialize(self, id_encoder, serialization_options):
rval = dict_for(
self,
populated_state=self.populated_state,
jobs=[serialization_options.get_identifier(id_encoder, j_a.job) for j_a in self.jobs],
jobs=[
serialization_options.get_identifier(id_encoder, j_a.job)
for j_a in self.jobs
if j_a.job is not None or not serialization_options.ignore_errors
],
)
serialization_options.attach_identifier(id_encoder, self, rval)
return rval
Expand Down
4 changes: 4 additions & 0 deletions lib/galaxy/model/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1968,6 +1968,7 @@ def __init__(
strip_metadata_files: bool = True,
serialize_jobs: bool = True,
user_context=None,
ignore_errors: Optional[bool] = False,
) -> None:
"""
:param export_directory: path to export directory. Will be created if it does not exist.
Expand Down Expand Up @@ -2004,6 +2005,7 @@ def __init__(
serialize_dataset_objects=serialize_dataset_objects,
strip_metadata_files=strip_metadata_files,
serialize_files_handler=self,
ignore_errors=ignore_errors,
)
self.export_files = export_files
self.included_datasets: dict[model.DatasetInstance, tuple[model.DatasetInstance, bool]] = {}
Expand Down Expand Up @@ -3046,13 +3048,15 @@ def get_export_store_factory(
export_files=None,
bco_export_options: Optional[BcoExportOptions] = None,
user_context=None,
ignore_errors: Optional[bool] = False,
) -> Callable[[StrPath], FileSourceModelExportStore]:
export_store_class: type[FileSourceModelExportStore]
export_store_class_kwds = {
"app": app,
"export_files": export_files,
"serialize_dataset_objects": False,
"user_context": user_context,
"ignore_errors": ignore_errors,
}
if download_format in ["tar.gz", "tgz"]:
export_store_class = TarModelExportStore
Expand Down
9 changes: 9 additions & 0 deletions lib/galaxy/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1926,6 +1926,15 @@ class WriteStoreToPayload(StoreExportPayload):
title="Target URI",
description="Galaxy Files URI to write mode store content to.",
)
ignore_errors: Optional[bool] = Field(
default=None,
description=(
"Last resort. If True, skip serialization errors caused by missing "
"provenance (e.g. orphan implicit collection job associations, null "
"job param refs from older histories that pre-date collections) "
"instead of failing. Exported data may be incomplete or corrupt."
),
)


class ObjectExportResponseBase(Model):
Expand Down
64 changes: 64 additions & 0 deletions test/unit/data/model/test_model_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,70 @@ def test_import_traceback_handling():
assert exc.value.traceback == traceback_message


def test_export_history_with_orphan_icjja(tmp_path):
"""Orphan ImplicitCollectionJobsJobAssociation rows (job_id NULL) are
persisted by the import path when an ICJ references a job key not in
object_import_tracker.jobs_by_key. The next export crashes in
get_identifier(j_a.job=None); ignore_errors skips the orphan."""
app = _mock_app()
u, h, _d1, _d2, j = _setup_simple_cat_job(app)

icj = model.ImplicitCollectionJobs()
linked = model.ImplicitCollectionJobsJobAssociation()
linked.order_index = 0
linked.implicit_collection_jobs = icj
linked.job = j
to_orphan = model.ImplicitCollectionJobsJobAssociation()
to_orphan.order_index = 1
to_orphan.implicit_collection_jobs = icj
to_orphan.job = j
app.add_and_commit(icj, linked, to_orphan)

# Mimic the post-import state: drop the FK so the row becomes an orphan.
to_orphan.job = None # type: ignore[assignment]
app.commit()

with pytest.raises(AttributeError):
with store.TarModelExportStore(str(tmp_path / "strict.tgz"), app=app, export_files="copy") as export_store:
export_store.export_history(h)

tolerant_archive = str(tmp_path / "tolerant.tgz")
with store.TarModelExportStore(tolerant_archive, app=app, export_files="copy", ignore_errors=True) as export_store:
export_store.export_history(h)

imported_history = import_archive(tolerant_archive, app, u)
imported_job = imported_history.datasets[1].creating_job
imported_icj = imported_job.implicit_collection_jobs_association.implicit_collection_jobs
assert len(imported_icj.jobs) == 1


def test_export_history_with_null_param_id(tmp_path):
"""Job params shaped {"src": "hda"|"hdca"|"dce", "id": null} are persisted
by the import path at model/store/__init__.py:1860-1888 when a referenced
HDA/HDCA/DCE can't be resolved. Strict export raises in
get_identifier_for_id; ignore_errors passes the null through.

Reproducing the on-disk state directly: the only producer is the import
path itself, so deleting the referenced HDA wouldn't null the persisted
param JSON."""
app = _mock_app()
u, h, _d1, _d2, j = _setup_simple_cat_job(app)
j.parameters = [model.JobParameter(name="input1", value=json.dumps({"src": "hda", "id": None}))]
app.commit()

with pytest.raises(NotImplementedError):
with store.TarModelExportStore(str(tmp_path / "strict.tgz"), app=app, export_files="copy") as export_store:
export_store.export_history(h)

tolerant_archive = str(tmp_path / "tolerant.tgz")
with store.TarModelExportStore(tolerant_archive, app=app, export_files="copy", ignore_errors=True) as export_store:
export_store.export_history(h)

imported_history = import_archive(tolerant_archive, app, u)
imported_job = imported_history.datasets[1].creating_job
assert json.loads(imported_job.raw_param_dict()["input1"]) == {"src": "hda", "id": None}


def test_import_export_edit_datasets():
"""Test modifying existing HDA and dataset metadata with import."""
app, h, temp_directory, import_history = _setup_simple_export({"for_edit": True})
Expand Down
Loading