Skip to content

Commit b8da623

Browse files
ziw-liualiddell
andauthored
Support reading OME-Zarr 0.5 (#295)
* wip * wip: 2025-02-27 * Undo reformatting * Remove unused imports * Fix miscellaneous failing tests. * Get rename_well working. * sort import * Read MMStack with Zarr v3 (#293) * use fsspec as a wrapper for ZarrTiffStore * update deprecated readers and tests * document the helper function * add back the binary check after a flag * use kwarg for file mode in reader * restore dtype attribute * restore opening zarr with dask * stringify path name * remove redundant dtype * add rich for zarr v3 print tree * skip tests that require ome-zarr-py * fix file list checking it doesn't matter what the order is * fix commas * stringify path names * detect ome key in reader * allow multiscales to not have version * wip: allow reading v0.5 images * normalize store path * update docstring * remove unused import * remove zarr-python synchronizer zarr-developers/zarr-python#1596 * allow empty zarr examples * add example json files * fixture for example v0.5 HCS dataset * unify ome container unwrapping * run PR tests for all target branches * test with absolute paths zarr-python 3.0.8 breakage * avoid big endian test cases * avoid yanked versions * fix zarr-python breakage * revert keyword argument name * fix version detection after zarr-python breakage * test with absolute path * Revert "allow empty zarr examples" This reverts commit 77146dd. * Test reading NGFF v0.5 with acquire-zarr * check binary values * simplify file mode switch Co-authored-by: Alan Liddell <alan.c.liddell@gmail.com> * remove redundant deletion --------- Co-authored-by: Alan Liddell <alan.c.liddell@gmail.com> Co-authored-by: Alan Liddell <aliddell@chanzuckerberg.com>
1 parent a8fd837 commit b8da623

12 files changed

Lines changed: 574 additions & 76 deletions

File tree

.github/workflows/pr.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ name: lint, style, and tests
66
on:
77
pull_request:
88
branches:
9-
- main
9+
- "*"
1010

1111
jobs:
1212
style:

iohub/ngff/models.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
"""
44
Data model classes with validation for OME-NGFF metadata.
5-
Developed against OME-NGFF v0.4 and ome-zarr v0.9
5+
Developed against OME-NGFF v0.4/0.5.2 and ome-zarr v0.9.
66
77
Attributes are 'snake_case' with aliases to match NGFF names in JSON output.
88
See https://ngff.openmicroscopy.org/0.4/index.html#naming-style
@@ -219,7 +219,9 @@ class VersionMeta(MetaBase):
219219
"""OME-NGFF spec version. Default is the current version (0.4)."""
220220

221221
# SHOULD
222-
version: Literal["0.1", "0.2", "0.3", "0.4"] = "0.4"
222+
version: Literal["0.1", "0.2", "0.3", "0.4", "0.5"] | None = Field(
223+
default=None, exclude=lambda v: v is None
224+
)
223225

224226

225227
class MultiScaleMeta(VersionMeta):

iohub/ngff/nodes.py

Lines changed: 75 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616

1717
import numpy as np
1818
import zarr.codecs
19-
import zarr.storage
20-
from numcodecs import Blosc
2119
from numpy.typing import ArrayLike, DTypeLike, NDArray
2220
from pydantic import ValidationError
2321
from zarr.core.group import normalize_path
@@ -57,27 +55,28 @@ def _pad_shape(shape: tuple[int, ...], target: int = 5):
5755

5856

5957
def _open_store(
60-
store_path: StrOrBytesPath,
58+
store_path: StrOrBytesPath | Path,
6159
mode: Literal["r", "r+", "a", "w", "w-"],
6260
version: Literal["0.1", "0.4", "0.5"],
6361
):
64-
if not os.path.isdir(store_path) and mode in ("r", "r+"):
62+
store_path = Path(store_path).resolve()
63+
if not store_path.exists() and mode in ("r", "r+"):
6564
raise FileNotFoundError(
66-
f"Dataset directory not found at {store_path}."
65+
f"Dataset directory not found at {str(store_path)}."
6766
)
6867
if version not in ("0.4", "0.5"):
6968
_logger.warning(
7069
"IOHub is only tested against OME-NGFF v0.4 and v0.5. "
7170
f"Requested version {version} may not work properly."
7271
)
7372
try:
74-
store = zarr.storage.LocalStore(store_path)
75-
root = zarr.open_group(
76-
store, mode=mode, zarr_format=(3 if version == "0.5" else 2)
77-
)
73+
zarr_format = None
74+
if mode in ("w", "w-") or (mode == "a" and not store_path.exists()):
75+
zarr_format = 3 if version == "0.5" else 2
76+
root = zarr.open_group(store_path, mode=mode, zarr_format=zarr_format)
7877
except Exception as e:
7978
raise RuntimeError(
80-
f"Cannot open Zarr root group at {store_path}"
79+
f"Cannot open Zarr root group at {str(store_path)}"
8180
) from e
8281
return root
8382

@@ -141,6 +140,11 @@ def zattrs(self):
141140
Assignments will modify the metadata file."""
142141
return self._group.attrs
143142

143+
@property
144+
def maybe_wrapped_ome_attrs(self):
145+
"""Container of OME metadata attributes."""
146+
return self.zattrs.get("ome") or self.zattrs
147+
144148
@property
145149
def version(self):
146150
"""NGFF version"""
@@ -571,12 +575,12 @@ def _set_meta(
571575
)
572576
example_image: ImageArray = self[
573577
self.metadata.multiscales[0].datasets[0].path
574-
].channels
578+
]
575579
self._channel_names = list(range(example_image.channels))
576580

577581
def _parse_meta(self):
578-
multiscales = self.zattrs.get("multiscales")
579-
omero = self.zattrs.get("omero")
582+
multiscales = self.maybe_wrapped_ome_attrs.get("multiscales")
583+
omero = self.maybe_wrapped_ome_attrs.get("omero")
580584
if multiscales:
581585
try:
582586
self._set_meta(multiscales=multiscales, omero=omero)
@@ -802,6 +806,7 @@ def _check_shape(self, data_shape: tuple[int]):
802806
)
803807

804808
def _create_compressor_options(self, chunk_shape: Tuple[int, ...] = None):
809+
shuffle = zarr.codecs.BloscShuffle.bitshuffle
805810
if self._zarr_format == 3:
806811
return {
807812
"codecs": [
@@ -812,17 +817,19 @@ def _create_compressor_options(self, chunk_shape: Tuple[int, ...] = None):
812817
zarr.codecs.BloscCodec(
813818
cname="zstd",
814819
clevel=1,
815-
shuffle=Blosc.BITSHUFFLE,
820+
shuffle=shuffle,
816821
),
817822
],
818823
)
819824
],
820825
}
821826
else:
827+
from numcodecs import Blosc
828+
822829
return {
823830
"compressor": Blosc(
824831
cname="zstd", clevel=1, shuffle=Blosc.BITSHUFFLE
825-
),
832+
)
826833
}
827834

828835
def _create_image_meta(
@@ -1379,7 +1386,7 @@ def __init__(
13791386
)
13801387

13811388
def _parse_meta(self):
1382-
if well_group_meta := self.zattrs.get("well"):
1389+
if well_group_meta := self.maybe_wrapped_ome_attrs.get("well"):
13831390
self.metadata = WellGroupMeta(**well_group_meta)
13841391
else:
13851392
self._warn_invalid_meta()
@@ -1622,7 +1629,7 @@ def __init__(
16221629
)
16231630

16241631
def _parse_meta(self):
1625-
if plate_meta := self.zattrs.get("plate"):
1632+
if plate_meta := self.maybe_wrapped_ome_attrs.get("plate"):
16261633
_logger.debug(f"Loading HCS metadata from file: {plate_meta}")
16271634
self.metadata = PlateMeta(**plate_meta)
16281635
else:
@@ -1930,14 +1937,56 @@ def rename_well(self, old: str, new: str):
19301937
self.dump_meta()
19311938

19321939

1940+
def _check_file_mode(
1941+
store_path: Path,
1942+
mode: Literal["r", "r+", "a", "w", "w-"],
1943+
disable_path_checking: bool,
1944+
) -> bool:
1945+
if mode == "a":
1946+
mode = "r+" if store_path.exists() else "w-"
1947+
parse_meta = False
1948+
if mode in ("r", "r+"):
1949+
parse_meta = True
1950+
elif mode == "w-":
1951+
if store_path.exists():
1952+
raise FileExistsError(store_path)
1953+
elif mode == "w":
1954+
if store_path.exists():
1955+
if (
1956+
".zarr" not in str(store_path.resolve())
1957+
and not disable_path_checking
1958+
):
1959+
raise ValueError(
1960+
"Cannot overwrite a path that does not contain '.zarr', "
1961+
"use `disable_path_checking=True` if you are sure that "
1962+
f"{store_path} should be overwritten."
1963+
)
1964+
_logger.warning(f"Overwriting data at {store_path}")
1965+
else:
1966+
raise ValueError(f"Invalid persistence mode '{mode}'.")
1967+
return parse_meta
1968+
1969+
1970+
def _detect_layout(meta_keys: list[str]) -> Literal["fov", "hcs"]:
1971+
if "plate" in meta_keys:
1972+
return "hcs"
1973+
elif "multiscales" in meta_keys:
1974+
return "fov"
1975+
else:
1976+
raise KeyError(
1977+
"Dataset metadata keys ('plate'/'multiscales') not in "
1978+
f"the found store metadata keys: {meta_keys}. "
1979+
"Is this a valid OME-Zarr dataset?"
1980+
)
1981+
1982+
19331983
def open_ome_zarr(
19341984
store_path: StrOrBytesPath | Path,
19351985
layout: Literal["auto", "fov", "hcs", "tiled"] = "auto",
19361986
mode: Literal["r", "r+", "a", "w", "w-"] = "r",
19371987
channel_names: list[str] | None = None,
19381988
axes: list[AxisMeta] | None = None,
19391989
version: Literal["0.1", "0.4", "0.5"] = "0.4",
1940-
synchronizer: zarr.ThreadSynchronizer | zarr.ProcessSynchronizer = None,
19411990
disable_path_checking: bool = False,
19421991
**kwargs,
19431992
) -> Plate | Position | TiledPosition:
@@ -2003,42 +2052,17 @@ def open_ome_zarr(
20032052
or :py:class:`iohub.ngff.TiledPosition`)
20042053
"""
20052054
store_path = Path(store_path)
2006-
if mode == "a":
2007-
mode = ("w-", "r+")[int(store_path.exists())]
2008-
parse_meta = False
2009-
if mode in ("r", "r+"):
2010-
parse_meta = True
2011-
elif mode == "w-":
2012-
if store_path.exists():
2013-
raise FileExistsError(store_path)
2014-
elif mode == "w":
2015-
if store_path.exists():
2016-
if (
2017-
".zarr" not in str(store_path.resolve())
2018-
and not disable_path_checking
2019-
):
2020-
raise ValueError(
2021-
"Cannot overwrite a path that does not contain '.zarr', "
2022-
"use `disable_path_checking=True` if you are sure that "
2023-
f"{store_path} should be overwritten."
2024-
)
2025-
_logger.warning(f"Overwriting data at {store_path}")
2026-
else:
2027-
raise ValueError(f"Invalid persistence mode '{mode}'.")
2055+
parse_meta = _check_file_mode(
2056+
store_path, mode, disable_path_checking=disable_path_checking
2057+
)
20282058
root = _open_store(store_path, mode, version)
20292059
meta_keys = root.attrs.keys() if parse_meta else []
2060+
if "ome" in meta_keys:
2061+
meta_keys = root.attrs["ome"].keys()
2062+
version = root.attrs["ome"].get("version", version)
20302063
if layout == "auto":
20312064
if parse_meta:
2032-
if "plate" in meta_keys:
2033-
layout = "hcs"
2034-
elif "multiscales" in meta_keys:
2035-
layout = "fov"
2036-
else:
2037-
raise KeyError(
2038-
"Dataset metadata keys ('plate'/'multiscales') not in "
2039-
f"the found store metadata keys: {meta_keys}. "
2040-
"Is this a valid OME-Zarr dataset?"
2041-
)
2065+
layout = _detect_layout(meta_keys)
20422066
else:
20432067
raise ValueError(
20442068
"Store layout must be specified when creating a new dataset."
@@ -2059,5 +2083,6 @@ def open_ome_zarr(
20592083
parse_meta=parse_meta,
20602084
channel_names=channel_names,
20612085
axes=axes,
2086+
version=version,
20622087
**kwargs,
20632088
)

iohub/reader.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525

2626
def _find_ngff_version_in_zarr_group(group: zarr.Group) -> str | None:
27-
for key in ["plate", "well"]:
27+
for key in ["plate", "well", "ome"]:
2828
if key in group.attrs:
2929
if v := group.attrs[key].get("version"):
3030
return v
@@ -200,8 +200,8 @@ def print_info(path: StrOrBytesPath, verbose=False):
200200
path = Path(path).resolve()
201201
try:
202202
fmt, extra_info = _infer_format(path)
203-
if fmt == "omezarr" and extra_info == "0.4":
204-
reader = open_ome_zarr(path, mode="r")
203+
if fmt == "omezarr" and extra_info in ("0.4", "0.5"):
204+
reader = open_ome_zarr(path, mode="r", version=extra_info)
205205
else:
206206
reader = read_images(path, data_type=fmt)
207207
except (ValueError, RuntimeError):

setup.cfg

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ install_requires =
3838
tifffile>=2024.1.30, <2025.5.21
3939
natsort>=7.1.1
4040
ndtiff>=2.2.1
41-
zarr>=3.0.0
41+
zarr>=3.0.8
4242
rich
4343
tqdm
4444
pillow>=9.4.0
@@ -48,6 +48,7 @@ install_requires =
4848

4949
[options.extras_require]
5050
dev =
51+
acquire-zarr
5152
black
5253
flake8
5354
pytest>=5.0.0

0 commit comments

Comments
 (0)