Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/iohub/ngff/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from iohub.core import NGFFArray
from iohub.ngff.models import TransformationMeta
from iohub.ngff.nodes import (
Bioformats2RawSeries,
ImageArray,
NGFFNode,
Plate,
Expand All @@ -11,6 +12,7 @@
)

__all__ = [
"Bioformats2RawSeries",
"ImageArray",
"NGFFArray",
"NGFFNode",
Expand Down
13 changes: 13 additions & 0 deletions src/iohub/ngff/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,3 +480,16 @@ class WellGroupMeta(VersionMeta):

# MUST
images: list[ImageMeta]


class Bioformats2RawMeta(MetaBase):
"""Transitional bioformats2raw layout marker.

https://ngff.openmicroscopy.org/specifications/0.5/index.html#bf2raw
"""

# MUST: layout marker, currently always 3
layout: Literal[3] = Field(alias="bioformats2raw.layout")
# MAY: ordered list of series paths (typically lives under the OME/ subgroup,
# but some implementations duplicate it on the root)
series: list[str] | None = None
169 changes: 162 additions & 7 deletions src/iohub/ngff/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import numpy as np
import xarray as xr
import zarr
from natsort import natsorted
from numpy.typing import ArrayLike, DTypeLike, NDArray
from pydantic import ValidationError

Expand All @@ -36,6 +37,7 @@
from iohub.ngff.models import (
AcquisitionMeta,
AxisMeta,
Bioformats2RawMeta,
ChannelAxisMeta,
DatasetMeta,
ImageMeta,
Expand Down Expand Up @@ -2888,6 +2890,134 @@ def rename_well(self, old: str, new: str):
self.dump_meta()


class Bioformats2RawSeries(NGFFNode):
"""A bioformats2raw-layout multi-image collection (read-only).

The root group is marked with ``"bioformats2raw.layout": 3`` and contains
numbered child groups (``0/``, ``1/``, ...), each a full multiscale image.
An optional ``OME/`` subgroup carries an OME-XML blob and a ``series``
array listing image paths in canonical order.

https://ngff.openmicroscopy.org/specifications/0.5/index.html#bf2raw
"""

_MEMBER_TYPE = Position

def __init__(
self,
group: zarr.Group,
parse_meta: bool = True,
channel_names: list[str] | None = None,
axes: list[AxisMeta] | None = None,
version: Literal["0.4", "0.5"] = "0.5",
overwriting_creation: bool = False,
impl: ZarrImplementation | None = None,
):
super().__init__(
group=group,
parse_meta=parse_meta,
channel_names=channel_names,
axes=axes,
version=version,
overwriting_creation=overwriting_creation,
impl=impl,
)

def _parse_meta(self):
ome_attrs = dict(self.maybe_wrapped_ome_attrs)
try:
self.metadata = Bioformats2RawMeta(**ome_attrs)
except ValidationError as e:
_logger.warning(str(e))
self._warn_invalid_meta()
return
# If the root doesn't carry a `series` list, fall back to the canonical
# location at OME/.zattrs (v0.4) or OME/zarr.json (v0.5).
if self.metadata.series is None and "OME" in self.zgroup:
try:
ome_grp_attrs = get_ome_attrs(self.zgroup["OME"].attrs)
series = ome_grp_attrs.get("series")
if isinstance(series, list):
self.metadata.series = [str(s) for s in series]
except (KeyError, AttributeError):
pass

@property
def _member_names(self):
if hasattr(self, "metadata") and self.metadata.series:
return natsorted(self.metadata.series)
# Fall back to numeric-named child groups (exclude the OME/ sidecar).
return natsorted(k for k in self.group_keys() if k != "OME" and k.isdigit())

@cached_property
def _first_series(self) -> Position | None:
"""First series Position by direct lookup; mirrors Plate._first_pos."""
try:
name = self._member_names[0]
return Position(
group=self.zgroup[name],
parse_meta=True,
version=self._version,
impl=self._impl,
)
except (IndexError, KeyError, AttributeError):
_logger.warning("Cannot read first series metadata.")
return None

@cached_property
def channel_names(self):
if pos := self._first_series:
return pos.channel_names
raise AttributeError("No series found to read channel names from.")

@cached_property
def axes(self):
if pos := self._first_series:
return pos.axes
return self._DEFAULT_AXES

def positions(self) -> Generator[tuple[str, Position]]:
"""Iterate over ``(name, Position)`` pairs of all series.

Matches :py:meth:`Plate.positions` and :py:meth:`Well.positions` so
downstream code can iterate any FOV-bearing node uniformly.
"""
yield from self.iteritems()

def print_tree(self, level: int | None = None):
"""Print hierarchy with series natsorted (e.g. 0, 1, 2, ..., 10).

Zarr's native tree() walks children in lexicographic order, which
groups ``"10"`` between ``"1"`` and ``"2"`` for numeric series. We
reorder at the series level and then defer to per-Position trees
for the multiscale arrays beneath.
"""
names = self._member_names
print("/")
for i, name in enumerate(names):
is_last_n = i == len(names) - 1
print(("└── " if is_last_n else "├── ") + name)
if level is not None and level <= 1:
continue
sub_prefix = " " if is_last_n else "│ "
try:
pos = self[name]
except (KeyError, ValidationError):
continue
arr_names = pos.array_keys()
for j, ak in enumerate(arr_names):
is_last_a = j == len(arr_names) - 1
connector = "└── " if is_last_a else "├── "
try:
arr = pos[ak]
print(sub_prefix + connector + f"{ak} {tuple(arr.shape)} {arr.dtype}")
except (KeyError, AttributeError):
pass

def dump_meta(self):
raise NotImplementedError("Writing bioformats2raw-layout stores is not supported.")


def _check_file_mode(
store_path,
mode: Literal["r", "r+", "a", "w", "w-"],
Expand Down Expand Up @@ -2922,15 +3052,19 @@ def _check_file_mode(
return parse_meta


def _detect_layout(meta_keys: list[str]) -> Literal["fov", "hcs"]:
def _detect_layout(meta_keys: list[str]) -> Literal["fov", "hcs", "bf2raw"]:
# Order matters: per the NGFF spec, "plate" takes precedence over
# "bioformats2raw.layout" when both are present at the root.
if "plate" in meta_keys:
return "hcs"
elif "bioformats2raw.layout" in meta_keys:
return "bf2raw"
elif "multiscales" in meta_keys:
return "fov"
else:
raise KeyError(
"Dataset metadata keys ('plate'/'multiscales') not in "
f"the found store metadata keys: {meta_keys}. "
"Dataset metadata keys ('plate'/'bioformats2raw.layout'/'multiscales') "
f"not in the found store metadata keys: {meta_keys}. "
"Is this a valid OME-Zarr dataset?"
)

Expand All @@ -2947,7 +3081,22 @@ def open_ome_zarr(
implementation: str | None = None,
implementation_config: ImplementationConfig | None = None,
**kwargs,
) -> Plate | Position | TiledPosition: ...
) -> Plate | Position | TiledPosition | Bioformats2RawSeries: ...


@overload
def open_ome_zarr(
store_path: StorePath,
layout: Literal["bf2raw"],
mode: Literal["r", "r+", "a", "w", "w-"] = "r",
channel_names: list[str] | None = None,
axes: list[AxisMeta] | None = None,
version: Literal["0.4", "0.5"] = "0.5",
disable_path_checking: bool = False,
implementation: str | None = None,
implementation_config: ImplementationConfig | None = None,
**kwargs,
) -> Bioformats2RawSeries: ...


@overload
Expand Down Expand Up @@ -2997,7 +3146,7 @@ def open_ome_zarr(

def open_ome_zarr(
store_path: StorePath,
layout: Literal["auto", "fov", "hcs", "tiled"] = "auto",
layout: Literal["auto", "fov", "hcs", "tiled", "bf2raw"] = "auto",
mode: Literal["r", "r+", "a", "w", "w-"] = "r",
channel_names: list[str] | None = None,
axes: list[AxisMeta] | None = None,
Expand All @@ -3006,7 +3155,7 @@ def open_ome_zarr(
implementation: str | None = None,
implementation_config: ImplementationConfig | None = None,
**kwargs,
) -> Plate | Position | TiledPosition:
) -> Plate | Position | TiledPosition | Bioformats2RawSeries:
"""Convenience method to open OME-Zarr stores.

Parameters
Expand Down Expand Up @@ -3091,7 +3240,7 @@ def open_ome_zarr(
if "ome" in meta_keys:
meta_keys = root.attrs["ome"].keys()
version = root.attrs["ome"].get("version", version)
elif parse_meta and ("multiscales" in meta_keys or "plate" in meta_keys):
elif parse_meta and ("multiscales" in meta_keys or "plate" in meta_keys or "bioformats2raw.layout" in meta_keys):
# v0.4 stores have flat metadata (no "ome" wrapper)
version = "0.4"
if layout == "auto":
Expand All @@ -3108,6 +3257,12 @@ def open_ome_zarr(
if parse_meta and "plate" not in meta_keys:
raise ValueError(msg)
node = Plate
elif layout == "bf2raw":
if mode in ("w", "w-"):
raise NotImplementedError("Creating bioformats2raw-layout stores is not supported.")
if parse_meta and "bioformats2raw.layout" not in meta_keys:
raise ValueError(msg)
node = Bioformats2RawSeries
else:
raise ValueError(f"Unknown layout: {layout}")
return node(
Expand Down
15 changes: 3 additions & 12 deletions src/iohub/ngff/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from iohub.ngff import open_ome_zarr
from iohub.ngff.nodes import TransformationMeta


#: Default ZYX chunk size for OME-Zarr v0.5 stores: ~2 MB at uint16 / ~4 MB at float32.
_V05_DEFAULT_ZYX_CHUNKS: tuple[int, int, int] = (16, 256, 256)

Expand Down Expand Up @@ -430,10 +429,7 @@ def process_single_position(
elif use_threads:
click.echo(f"\nStarting thread pool with {num_workers} threads")
with ThreadPoolExecutor(max_workers=num_workers) as p:
futures = [
p.submit(partial_apply_transform_to_czyx_and_save, *args)
for args in flat_iterable
]
futures = [p.submit(partial_apply_transform_to_czyx_and_save, *args) for args in flat_iterable]
for fut in as_completed(futures):
fut.result()
click.echo("Shut down thread pool")
Expand All @@ -446,13 +442,8 @@ def process_single_position(
# (e.g. cgroup OOM-kill) surfaces as BrokenProcessPool instead
# of hanging indefinitely on pool.starmap.
context = mp.get_context("spawn")
with ProcessPoolExecutor(
max_workers=num_workers, mp_context=context
) as p:
futures = [
p.submit(partial_apply_transform_to_czyx_and_save, *args)
for args in flat_iterable
]
with ProcessPoolExecutor(max_workers=num_workers, mp_context=context) as p:
futures = [p.submit(partial_apply_transform_to_czyx_and_save, *args) for args in flat_iterable]
for fut in as_completed(futures):
fut.result()
click.echo("Shut down multiprocess pool")
Expand Down
47 changes: 41 additions & 6 deletions src/iohub/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,14 @@
from iohub.fov import BaseFOVMapping
from iohub.mmstack import MMStack
from iohub.ndtiff import NDTiffDataset
from iohub.ngff.nodes import NGFFNode, Plate, Position, TiledPosition, open_ome_zarr
from iohub.ngff.nodes import (
Bioformats2RawSeries,
NGFFNode,
Plate,
Position,
TiledPosition,
open_ome_zarr,
)

if TYPE_CHECKING:
from _typeshed import StrOrBytesPath
Expand All @@ -31,16 +38,33 @@ def _find_ngff_version_in_zarr_group(group: zarr.Group) -> str | None:
return None


def _has_bf2raw_marker(group: zarr.Group) -> bool:
"""Detect ``bioformats2raw.layout`` marker at root, v0.4 or v0.5."""
if "bioformats2raw.layout" in group.attrs:
return True
ome = group.attrs.get("ome")
return isinstance(ome, dict) and "bioformats2raw.layout" in ome


def _check_zarr_data_type(src: Path):
try:
root = zarr.open(src, mode="r")
if version := _find_ngff_version_in_zarr_group(root):
return version
else:
for _, row in root.groups():
for _, well in row.groups():
if version := _find_ngff_version_in_zarr_group(well):
return version
# bf2raw root carries no version; recurse into first child series
if _has_bf2raw_marker(root):
for name, child in root.groups():
if name == "OME":
continue
if version := _find_ngff_version_in_zarr_group(child):
return version
# fall back to zarr format → ngff version mapping
zarr_format = getattr(root.metadata, "zarr_format", None)
return "0.5" if zarr_format == 3 else "0.4"
for _, row in root.groups():
for _, well in row.groups():
if version := _find_ngff_version_in_zarr_group(well):
return version
except Exception: # noqa: BLE001 — version detection may fail in many ways
return False
return "unknown"
Expand Down Expand Up @@ -255,6 +279,17 @@ def print_info(path: StrOrBytesPath, verbose=False):
f"No. bytes decompressed:\t\t, {total_bytes_uncompressed}"
f" [{sizeof_fmt(total_bytes_uncompressed)}]"
)
elif isinstance(reader, Bioformats2RawSeries):
positions = list(reader.positions())
msgs.append(f"Positions:\t\t {len(positions)}")
if positions:
name, first = positions[0]
msgs.append(f"First position:\t\t '{name}'")
if "0" in first:
msgs.append(f"Chunk size:\t\t {first['0'].chunks}")
if verbose:
print("Zarr hierarchy:")
reader.print_tree()
else:
total_bytes_uncompressed = reader["0"].nbytes
msgs.append(f"(Z, Y, X) scale (um):\t {tuple(reader.scale[2:])}")
Expand Down
Loading
Loading