diff --git a/src/iohub/ngff/__init__.py b/src/iohub/ngff/__init__.py index 776d6624..6cc55a74 100644 --- a/src/iohub/ngff/__init__.py +++ b/src/iohub/ngff/__init__.py @@ -1,6 +1,7 @@ from iohub.core import NGFFArray from iohub.ngff.models import TransformationMeta from iohub.ngff.nodes import ( + Bioformats2RawSeries, ImageArray, NGFFNode, Plate, @@ -11,6 +12,7 @@ ) __all__ = [ + "Bioformats2RawSeries", "ImageArray", "NGFFArray", "NGFFNode", diff --git a/src/iohub/ngff/models.py b/src/iohub/ngff/models.py index b17476c5..4b58bb0f 100644 --- a/src/iohub/ngff/models.py +++ b/src/iohub/ngff/models.py @@ -480,3 +480,16 @@ class WellGroupMeta(VersionMeta): # MUST images: list[ImageMeta] + + +class Bioformats2RawMeta(MetaBase): + """Transitional bioformats2raw layout marker. + + https://ngff.openmicroscopy.org/specifications/0.5/index.html#bf2raw + """ + + # MUST: layout marker, currently always 3 + layout: Literal[3] = Field(alias="bioformats2raw.layout") + # MAY: ordered list of series paths (typically lives under the OME/ subgroup, + # but some implementations duplicate it on the root) + series: list[str] | None = None diff --git a/src/iohub/ngff/nodes.py b/src/iohub/ngff/nodes.py index 2459ae11..d6a92290 100644 --- a/src/iohub/ngff/nodes.py +++ b/src/iohub/ngff/nodes.py @@ -22,6 +22,7 @@ import numpy as np import xarray as xr import zarr +from natsort import natsorted from numpy.typing import ArrayLike, DTypeLike, NDArray from pydantic import ValidationError @@ -36,6 +37,7 @@ from iohub.ngff.models import ( AcquisitionMeta, AxisMeta, + Bioformats2RawMeta, ChannelAxisMeta, DatasetMeta, ImageMeta, @@ -2888,6 +2890,134 @@ def rename_well(self, old: str, new: str): self.dump_meta() +class Bioformats2RawSeries(NGFFNode): + """A bioformats2raw-layout multi-image collection (read-only). + + The root group is marked with ``"bioformats2raw.layout": 3`` and contains + numbered child groups (``0/``, ``1/``, ...), each a full multiscale image. + An optional ``OME/`` subgroup carries an OME-XML blob and a ``series`` + array listing image paths in canonical order. + + https://ngff.openmicroscopy.org/specifications/0.5/index.html#bf2raw + """ + + _MEMBER_TYPE = Position + + def __init__( + self, + group: zarr.Group, + parse_meta: bool = True, + channel_names: list[str] | None = None, + axes: list[AxisMeta] | None = None, + version: Literal["0.4", "0.5"] = "0.5", + overwriting_creation: bool = False, + impl: ZarrImplementation | None = None, + ): + super().__init__( + group=group, + parse_meta=parse_meta, + channel_names=channel_names, + axes=axes, + version=version, + overwriting_creation=overwriting_creation, + impl=impl, + ) + + def _parse_meta(self): + ome_attrs = dict(self.maybe_wrapped_ome_attrs) + try: + self.metadata = Bioformats2RawMeta(**ome_attrs) + except ValidationError as e: + _logger.warning(str(e)) + self._warn_invalid_meta() + return + # If the root doesn't carry a `series` list, fall back to the canonical + # location at OME/.zattrs (v0.4) or OME/zarr.json (v0.5). + if self.metadata.series is None and "OME" in self.zgroup: + try: + ome_grp_attrs = get_ome_attrs(self.zgroup["OME"].attrs) + series = ome_grp_attrs.get("series") + if isinstance(series, list): + self.metadata.series = [str(s) for s in series] + except (KeyError, AttributeError): + pass + + @property + def _member_names(self): + if hasattr(self, "metadata") and self.metadata.series: + return natsorted(self.metadata.series) + # Fall back to numeric-named child groups (exclude the OME/ sidecar). + return natsorted(k for k in self.group_keys() if k != "OME" and k.isdigit()) + + @cached_property + def _first_series(self) -> Position | None: + """First series Position by direct lookup; mirrors Plate._first_pos.""" + try: + name = self._member_names[0] + return Position( + group=self.zgroup[name], + parse_meta=True, + version=self._version, + impl=self._impl, + ) + except (IndexError, KeyError, AttributeError): + _logger.warning("Cannot read first series metadata.") + return None + + @cached_property + def channel_names(self): + if pos := self._first_series: + return pos.channel_names + raise AttributeError("No series found to read channel names from.") + + @cached_property + def axes(self): + if pos := self._first_series: + return pos.axes + return self._DEFAULT_AXES + + def positions(self) -> Generator[tuple[str, Position]]: + """Iterate over ``(name, Position)`` pairs of all series. + + Matches :py:meth:`Plate.positions` and :py:meth:`Well.positions` so + downstream code can iterate any FOV-bearing node uniformly. + """ + yield from self.iteritems() + + def print_tree(self, level: int | None = None): + """Print hierarchy with series natsorted (e.g. 0, 1, 2, ..., 10). + + Zarr's native tree() walks children in lexicographic order, which + groups ``"10"`` between ``"1"`` and ``"2"`` for numeric series. We + reorder at the series level and then defer to per-Position trees + for the multiscale arrays beneath. + """ + names = self._member_names + print("/") + for i, name in enumerate(names): + is_last_n = i == len(names) - 1 + print(("└── " if is_last_n else "├── ") + name) + if level is not None and level <= 1: + continue + sub_prefix = " " if is_last_n else "│ " + try: + pos = self[name] + except (KeyError, ValidationError): + continue + arr_names = pos.array_keys() + for j, ak in enumerate(arr_names): + is_last_a = j == len(arr_names) - 1 + connector = "└── " if is_last_a else "├── " + try: + arr = pos[ak] + print(sub_prefix + connector + f"{ak} {tuple(arr.shape)} {arr.dtype}") + except (KeyError, AttributeError): + pass + + def dump_meta(self): + raise NotImplementedError("Writing bioformats2raw-layout stores is not supported.") + + def _check_file_mode( store_path, mode: Literal["r", "r+", "a", "w", "w-"], @@ -2922,15 +3052,19 @@ def _check_file_mode( return parse_meta -def _detect_layout(meta_keys: list[str]) -> Literal["fov", "hcs"]: +def _detect_layout(meta_keys: list[str]) -> Literal["fov", "hcs", "bf2raw"]: + # Order matters: per the NGFF spec, "plate" takes precedence over + # "bioformats2raw.layout" when both are present at the root. if "plate" in meta_keys: return "hcs" + elif "bioformats2raw.layout" in meta_keys: + return "bf2raw" elif "multiscales" in meta_keys: return "fov" else: raise KeyError( - "Dataset metadata keys ('plate'/'multiscales') not in " - f"the found store metadata keys: {meta_keys}. " + "Dataset metadata keys ('plate'/'bioformats2raw.layout'/'multiscales') " + f"not in the found store metadata keys: {meta_keys}. " "Is this a valid OME-Zarr dataset?" ) @@ -2947,7 +3081,22 @@ def open_ome_zarr( implementation: str | None = None, implementation_config: ImplementationConfig | None = None, **kwargs, -) -> Plate | Position | TiledPosition: ... +) -> Plate | Position | TiledPosition | Bioformats2RawSeries: ... + + +@overload +def open_ome_zarr( + store_path: StorePath, + layout: Literal["bf2raw"], + mode: Literal["r", "r+", "a", "w", "w-"] = "r", + channel_names: list[str] | None = None, + axes: list[AxisMeta] | None = None, + version: Literal["0.4", "0.5"] = "0.5", + disable_path_checking: bool = False, + implementation: str | None = None, + implementation_config: ImplementationConfig | None = None, + **kwargs, +) -> Bioformats2RawSeries: ... @overload @@ -2997,7 +3146,7 @@ def open_ome_zarr( def open_ome_zarr( store_path: StorePath, - layout: Literal["auto", "fov", "hcs", "tiled"] = "auto", + layout: Literal["auto", "fov", "hcs", "tiled", "bf2raw"] = "auto", mode: Literal["r", "r+", "a", "w", "w-"] = "r", channel_names: list[str] | None = None, axes: list[AxisMeta] | None = None, @@ -3006,7 +3155,7 @@ def open_ome_zarr( implementation: str | None = None, implementation_config: ImplementationConfig | None = None, **kwargs, -) -> Plate | Position | TiledPosition: +) -> Plate | Position | TiledPosition | Bioformats2RawSeries: """Convenience method to open OME-Zarr stores. Parameters @@ -3091,7 +3240,7 @@ def open_ome_zarr( if "ome" in meta_keys: meta_keys = root.attrs["ome"].keys() version = root.attrs["ome"].get("version", version) - elif parse_meta and ("multiscales" in meta_keys or "plate" in meta_keys): + elif parse_meta and ("multiscales" in meta_keys or "plate" in meta_keys or "bioformats2raw.layout" in meta_keys): # v0.4 stores have flat metadata (no "ome" wrapper) version = "0.4" if layout == "auto": @@ -3108,6 +3257,12 @@ def open_ome_zarr( if parse_meta and "plate" not in meta_keys: raise ValueError(msg) node = Plate + elif layout == "bf2raw": + if mode in ("w", "w-"): + raise NotImplementedError("Creating bioformats2raw-layout stores is not supported.") + if parse_meta and "bioformats2raw.layout" not in meta_keys: + raise ValueError(msg) + node = Bioformats2RawSeries else: raise ValueError(f"Unknown layout: {layout}") return node( diff --git a/src/iohub/ngff/utils.py b/src/iohub/ngff/utils.py index 44650743..5b0a7015 100644 --- a/src/iohub/ngff/utils.py +++ b/src/iohub/ngff/utils.py @@ -19,7 +19,6 @@ from iohub.ngff import open_ome_zarr from iohub.ngff.nodes import TransformationMeta - #: Default ZYX chunk size for OME-Zarr v0.5 stores: ~2 MB at uint16 / ~4 MB at float32. _V05_DEFAULT_ZYX_CHUNKS: tuple[int, int, int] = (16, 256, 256) @@ -430,10 +429,7 @@ def process_single_position( elif use_threads: click.echo(f"\nStarting thread pool with {num_workers} threads") with ThreadPoolExecutor(max_workers=num_workers) as p: - futures = [ - p.submit(partial_apply_transform_to_czyx_and_save, *args) - for args in flat_iterable - ] + futures = [p.submit(partial_apply_transform_to_czyx_and_save, *args) for args in flat_iterable] for fut in as_completed(futures): fut.result() click.echo("Shut down thread pool") @@ -446,13 +442,8 @@ def process_single_position( # (e.g. cgroup OOM-kill) surfaces as BrokenProcessPool instead # of hanging indefinitely on pool.starmap. context = mp.get_context("spawn") - with ProcessPoolExecutor( - max_workers=num_workers, mp_context=context - ) as p: - futures = [ - p.submit(partial_apply_transform_to_czyx_and_save, *args) - for args in flat_iterable - ] + with ProcessPoolExecutor(max_workers=num_workers, mp_context=context) as p: + futures = [p.submit(partial_apply_transform_to_czyx_and_save, *args) for args in flat_iterable] for fut in as_completed(futures): fut.result() click.echo("Shut down multiprocess pool") diff --git a/src/iohub/reader.py b/src/iohub/reader.py index 7223a98b..1a280272 100644 --- a/src/iohub/reader.py +++ b/src/iohub/reader.py @@ -11,7 +11,14 @@ from iohub.fov import BaseFOVMapping from iohub.mmstack import MMStack from iohub.ndtiff import NDTiffDataset -from iohub.ngff.nodes import NGFFNode, Plate, Position, TiledPosition, open_ome_zarr +from iohub.ngff.nodes import ( + Bioformats2RawSeries, + NGFFNode, + Plate, + Position, + TiledPosition, + open_ome_zarr, +) if TYPE_CHECKING: from _typeshed import StrOrBytesPath @@ -31,16 +38,33 @@ def _find_ngff_version_in_zarr_group(group: zarr.Group) -> str | None: return None +def _has_bf2raw_marker(group: zarr.Group) -> bool: + """Detect ``bioformats2raw.layout`` marker at root, v0.4 or v0.5.""" + if "bioformats2raw.layout" in group.attrs: + return True + ome = group.attrs.get("ome") + return isinstance(ome, dict) and "bioformats2raw.layout" in ome + + def _check_zarr_data_type(src: Path): try: root = zarr.open(src, mode="r") if version := _find_ngff_version_in_zarr_group(root): return version - else: - for _, row in root.groups(): - for _, well in row.groups(): - if version := _find_ngff_version_in_zarr_group(well): - return version + # bf2raw root carries no version; recurse into first child series + if _has_bf2raw_marker(root): + for name, child in root.groups(): + if name == "OME": + continue + if version := _find_ngff_version_in_zarr_group(child): + return version + # fall back to zarr format → ngff version mapping + zarr_format = getattr(root.metadata, "zarr_format", None) + return "0.5" if zarr_format == 3 else "0.4" + for _, row in root.groups(): + for _, well in row.groups(): + if version := _find_ngff_version_in_zarr_group(well): + return version except Exception: # noqa: BLE001 — version detection may fail in many ways return False return "unknown" @@ -255,6 +279,17 @@ def print_info(path: StrOrBytesPath, verbose=False): f"No. bytes decompressed:\t\t, {total_bytes_uncompressed}" f" [{sizeof_fmt(total_bytes_uncompressed)}]" ) + elif isinstance(reader, Bioformats2RawSeries): + positions = list(reader.positions()) + msgs.append(f"Positions:\t\t {len(positions)}") + if positions: + name, first = positions[0] + msgs.append(f"First position:\t\t '{name}'") + if "0" in first: + msgs.append(f"Chunk size:\t\t {first['0'].chunks}") + if verbose: + print("Zarr hierarchy:") + reader.print_tree() else: total_bytes_uncompressed = reader["0"].nbytes msgs.append(f"(Z, Y, X) scale (um):\t {tuple(reader.scale[2:])}") diff --git a/tests/ngff/test_ngff.py b/tests/ngff/test_ngff.py index fddf6fdb..ba53d08d 100644 --- a/tests/ngff/test_ngff.py +++ b/tests/ngff/test_ngff.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import os import platform import shutil @@ -29,6 +30,7 @@ from iohub.core.utils import pad_shape from iohub.ngff.models import TO_DICT_SETTINGS from iohub.ngff.nodes import ( + Bioformats2RawSeries, Plate, Position, TransformationMeta, @@ -1964,3 +1966,99 @@ def test_sharding_raises_on_v04(tmp_path): np.zeros((1, 1, 1, 64, 64)), shards_ratio=(1, 1, 1, 2, 2), ) + + +def _make_bf2raw_fixture( + tmp_path: Path, + version: Literal["0.4", "0.5"], + series_names: tuple[str, ...] = ("0", "1"), + write_root_series: bool = False, +) -> Path: + """Build a synthetic bioformats2raw-layout store at ``tmp_path/series.ome.zarr``. + + Marker is written at the root; the canonical ``series`` list is written + under the ``OME/`` subgroup. Each series is a real iohub-written FOV with + a single ``"DAPI"`` channel and a (1, 1, 2, 4, 4) ``uint8`` array at level "0". + """ + root_path = tmp_path / "series.ome.zarr" + root_path.mkdir() + if version == "0.5": + root_attrs: dict[str, object] = {"bioformats2raw.layout": 3} + if write_root_series: + root_attrs["series"] = list(series_names) + (root_path / "zarr.json").write_text( + json.dumps({"zarr_format": 3, "node_type": "group", "attributes": {"ome": root_attrs}}) + ) + ome_dir = root_path / "OME" + ome_dir.mkdir() + (ome_dir / "zarr.json").write_text( + json.dumps( + { + "zarr_format": 3, + "node_type": "group", + "attributes": {"ome": {"series": list(series_names)}}, + } + ) + ) + else: # v0.4 + (root_path / ".zgroup").write_text(json.dumps({"zarr_format": 2})) + root_attrs_v04: dict[str, object] = {"bioformats2raw.layout": 3} + if write_root_series: + root_attrs_v04["series"] = list(series_names) + (root_path / ".zattrs").write_text(json.dumps(root_attrs_v04)) + ome_dir = root_path / "OME" + ome_dir.mkdir() + (ome_dir / ".zgroup").write_text(json.dumps({"zarr_format": 2})) + (ome_dir / ".zattrs").write_text(json.dumps({"series": list(series_names)})) + + for idx, name in enumerate(series_names): + with open_ome_zarr( + root_path / name, + layout="fov", + mode="w-", + channel_names=["DAPI"], + version=version, + ) as fov: + shape = (1, 1, 2, 4, 4) + arr = fov.create_zeros("0", shape=shape, dtype=np.uint8) + arr[...] = (np.arange(int(np.prod(shape)), dtype=np.uint8) + idx).reshape(shape) + return root_path + + +@pytest.mark.parametrize("version", ["0.4", "0.5"]) +def test_bf2raw_open_auto(tmp_path, version): + """``open_ome_zarr`` auto-detects bf2raw layout and returns the right node.""" + store_path = _make_bf2raw_fixture(tmp_path, version) + with open_ome_zarr(store_path, mode="r") as node: + assert isinstance(node, Bioformats2RawSeries) + assert node.version == version + assert len(node) == 2 + assert list(node) == ["0", "1"] + names = [n for n, _ in node.positions()] + assert names == ["0", "1"] + assert node["0"].channel_names == ["DAPI"] + assert node.channel_names == ["DAPI"] + assert [a.name for a in node.axes] == ["T", "C", "Z", "Y", "X"] + assert node["1"]["0"].shape == (1, 1, 2, 4, 4) + + +@pytest.mark.parametrize("version", ["0.4", "0.5"]) +def test_bf2raw_explicit_layout(tmp_path, version): + """``layout='bf2raw'`` succeeds; mismatched layout raises.""" + store_path = _make_bf2raw_fixture(tmp_path, version) + with open_ome_zarr(store_path, layout="bf2raw", mode="r") as node: + assert isinstance(node, Bioformats2RawSeries) + with pytest.raises(ValueError, match="does not match"): + open_ome_zarr(store_path, layout="hcs", mode="r") + + +def test_bf2raw_create_not_supported(tmp_path): + """Creating a bf2raw store via open_ome_zarr is explicitly rejected.""" + with pytest.raises(NotImplementedError, match="not supported"): + open_ome_zarr( + tmp_path / "new.zarr", + layout="bf2raw", + mode="w-", + channel_names=["DAPI"], + version="0.5", + ) diff --git a/tests/ngff/test_ngff_utils.py b/tests/ngff/test_ngff_utils.py index 11398dbe..bf7c7550 100644 --- a/tests/ngff/test_ngff_utils.py +++ b/tests/ngff/test_ngff_utils.py @@ -14,10 +14,10 @@ from iohub.core.compat import V04_MAX_CHUNK_SIZE_BYTES from iohub.ngff import open_ome_zarr from iohub.ngff.utils import ( + _V05_DEFAULT_ZYX_CHUNKS, _available_cpus, _indices_to_shard_aligned_batches, _match_indices_to_batches, - _V05_DEFAULT_ZYX_CHUNKS, apply_transform_to_tczyx_and_save, create_empty_plate, process_single_position, diff --git a/tests/test_reader.py b/tests/test_reader.py index dafeac72..8bed44ba 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -2,7 +2,8 @@ from iohub.mmstack import MMStack from iohub.ndtiff import NDTiffDataset -from iohub.reader import read_images, sizeof_fmt +from iohub.ngff.nodes import Bioformats2RawSeries +from iohub.reader import print_info, read_images, sizeof_fmt from tests.conftest import ( mm2gamma_ome_tiffs, mm2gamma_singlepage_tiffs, @@ -10,6 +11,7 @@ ndtiff_v2_datasets, ndtiff_v3_labeled_positions, ) +from tests.ngff.test_ngff import _make_bf2raw_fixture def test_unsupported_datatype(tmpdir): @@ -43,3 +45,25 @@ def test_detect_single_page_tiff(data_path): ) def test_sizeof_fmt(num_bytes, expected): assert sizeof_fmt(num_bytes) == expected + + +@pytest.mark.parametrize("version", ["0.4", "0.5"]) +def test_read_images_bf2raw(tmp_path, version): + """``read_images`` returns a Bioformats2RawSeries for bf2raw stores.""" + store_path = _make_bf2raw_fixture(tmp_path, version) + reader = read_images(store_path) + try: + assert isinstance(reader, Bioformats2RawSeries) + assert len(reader) == 2 + finally: + reader.close() + + +@pytest.mark.parametrize("version", ["0.4", "0.5"]) +def test_print_info_bf2raw(tmp_path, capsys, version): + """``print_info`` runs and reports series count for bf2raw stores.""" + store_path = _make_bf2raw_fixture(tmp_path, version) + print_info(store_path) + captured = capsys.readouterr().out + assert "Positions:" in captured + assert f"v{version}" in captured