Skip to content

Commit 985b4ea

Browse files
authored
Merge pull request #301 from czbiohub-sf/zarr3-dev
OME-Zarr v0.5 (Zarr v3) support
2 parents bd77a43 + 2d1c5fb commit 985b4ea

27 files changed

Lines changed: 1877 additions & 479 deletions

.github/workflows/pr.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ name: lint, style, and tests
66
on:
77
pull_request:
88
branches:
9-
- main
9+
- "*"
1010

1111
jobs:
1212
style:
@@ -94,7 +94,7 @@ jobs:
9494
- name: Install dependencies
9595
run: |
9696
python -m pip install --upgrade pip
97-
pip install ".[dev]"
97+
pip install ".[dev,acquire-zarr]"
9898
9999
- name: Test with pytest
100100
env:

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
- name: Install dependencies
2525
run: |
2626
python -m pip install --upgrade pip
27-
pip install ".[dev]"
27+
pip install ".[dev,acquire-zarr]"
2828
2929
- name: Test with pytest
3030
env:

CONTRIBUTING.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,13 @@ git clone https://github.com/czbiohub-sf/iohub.git
9191
Otherwise, you can follow [these instructions](https://docs.github.com/en/get-started/quickstart/fork-a-repo)
9292
to [fork](https://github.com/czbiohub-sf/iohub/fork) the repository.
9393

94-
Then install the package in editable mode with the development dependencies:
94+
Then install the package in editable mode with the development dependencies.
95+
Remove acquire-zarr if you do not have glibc version 2.35 or later,
96+
for example on the Bruno cluster (Rocky Linux 8).
9597

9698
```sh
9799
cd iohub/ # or the renamed project root directory
98-
pip install -e ".[dev]"
100+
pip install -e ".[dev,acquire-zarr]"
99101
```
100102

101103
Then make the changes and [track them with Git](https://docs.github.com/en/get-started/using-git/about-git#example-contribute-to-an-existing-repository).

docs/examples/run_coordinate_transform.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@
5858
) as dataset:
5959
# Create and write to positions
6060
# This affects the tile arrangement in visualization
61-
position = dataset.create_position(0, 0, 0)
61+
position = dataset.create_position("0", "0", "0")
6262
position.create_image("0", tczyx_1, transform=[translation[0]])
63-
position = dataset.create_position(0, 0, 1)
63+
position = dataset.create_position("0", "0", "1")
6464
position.create_image("0", tczyx_2, transform=[translation[1], scaling[0]])
6565
# Print dataset summary
6666
dataset.print_tree()
@@ -72,4 +72,4 @@
7272

7373
# %%
7474
# Clean up
75-
tmp_dir.cleanup()
75+
tmp_dir.cleanup()

docs/examples/run_multi_fov_hcs_ome_zarr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@
3232

3333
position_list = (
3434
("A", "1", "0"),
35-
("H", 1, "0"),
35+
("H", "1", "0"),
3636
("H", "12", "CannotVisualize"),
37-
("Control", "Blank", 0),
37+
("Control", "Blank", "0"),
3838
)
3939

4040
with open_ome_zarr(
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
"""
2+
Update OME-Zarr Version
3+
=======================
4+
5+
This script shows how to write the same OME-Zarr image
6+
using a new version.
7+
"""
8+
9+
# %%
10+
from pathlib import Path
11+
from tempfile import TemporaryDirectory
12+
13+
import numpy as np
14+
15+
from iohub.ngff import TransformationMeta, open_ome_zarr
16+
17+
# %%
18+
# Set storage path
19+
tmp_dir = TemporaryDirectory()
20+
old_store_path = Path(tmp_dir.name) / "old.zarr"
21+
new_store_path = Path(tmp_dir.name) / "new.zarr"
22+
23+
# %%
24+
# Create a version 0.4 OME-Zarr dataset
25+
random_image = np.random.randint(
26+
0, np.iinfo(np.uint16).max, size=(10, 2, 32, 128, 128), dtype=np.uint16
27+
)
28+
scale = [2.0, 3.0, 4.0, 5.0, 6.0]
29+
30+
31+
with open_ome_zarr(
32+
old_store_path,
33+
layout="hcs",
34+
mode="w-",
35+
channel_names=["DAPI", "GFP"],
36+
version="0.4",
37+
) as old_dataset:
38+
position = old_dataset.create_position("A", "1", "0")
39+
image = position.create_image(
40+
"0",
41+
random_image,
42+
chunks=(1, 1, 4, 32, 32),
43+
transform=[TransformationMeta(type="scale", scale=scale)],
44+
)
45+
46+
# %%
47+
# Write the same image with version 0.5 and sharding
48+
49+
with open_ome_zarr(old_store_path, mode="r", layout="hcs") as old_dataset:
50+
with open_ome_zarr(
51+
new_store_path,
52+
layout="hcs",
53+
mode="w",
54+
channel_names=old_dataset.channel_names,
55+
version="0.5",
56+
) as new_dataset:
57+
for name, old_position in old_dataset.positions():
58+
row, col, fov = name.split("/")
59+
new_position = new_dataset.create_position(row, col, fov)
60+
old_image = old_position["0"]
61+
new_image = new_position.create_image(
62+
"0",
63+
data=old_image.numpy(),
64+
chunks=(1, 1, 4, 32, 32),
65+
shards_ratio=(2, 1, 8, 4, 4),
66+
transform=old_position.metadata.multiscales[0]
67+
.datasets[0]
68+
.coordinate_transformations,
69+
)
70+
71+
# %%
72+
# Read the new FOV to verify it was written correctly
73+
with open_ome_zarr(new_store_path / "A/1/0", mode="r") as dataset:
74+
assert dataset.scale == scale
75+
image = dataset["0"]
76+
assert image.shards == (2, 1, 32, 128, 128)
77+
assert np.array_equal(image.numpy(), random_image)
78+
79+
# %%
80+
# Clean up
81+
tmp_dir.cleanup()

iohub/_deprecated/singlepagetiff.py

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@
33
import json
44
import logging
55
import os
6+
from pathlib import Path
67

78
import natsort
89
import numpy as np
910
import tifffile as tiff
1011
import zarr
1112

1213
from iohub._deprecated.reader_base import ReaderBase
14+
from iohub.mmstack import _tiff_to_fsspec_store
1315

1416

1517
class MicromanagerSequenceReader(ReaderBase):
16-
def __init__(self, folder, extract_data=False):
18+
def __init__(self, folder, extract_data=False, strict=False):
1719
super().__init__()
1820

1921
"""
@@ -32,13 +34,16 @@ def __init__(self, folder, extract_data=False):
3234
which contain singlepage tiff sequences
3335
extract_data (bool)
3436
True if zarr arrays should be extracted immediately
37+
strict: (bool)
38+
True if failures in getting images should raise exceptions
3539
"""
3640

3741
if not os.path.isdir(folder):
3842
raise NotImplementedError(
3943
"supplied path for singlepage tiff sequence reader "
4044
"is not a folder"
4145
)
46+
self._strict = strict
4247

4348
self.log = logging.getLogger(__name__)
4449
self.positions = {}
@@ -206,20 +211,32 @@ def _create_stores(self, p):
206211
if c[0] == p:
207212
self.log.info(f"reading coord = {c} from filename = {fn}")
208213
with tiff.imread(fn, aszarr=True) as store:
209-
z[c[1], c[2], c[3]] = zarr.open(store)
214+
try:
215+
array = zarr.open(
216+
_tiff_to_fsspec_store(
217+
store, root_uri=Path(fn).parent.as_uri()
218+
),
219+
mode="r",
220+
)[:]
221+
z[c[1], c[2], c[3]] = array
222+
except Exception:
223+
self.log.error(
224+
f"error reading file {fn} for coordinate {c}"
225+
)
210226

211227
# check that the array was assigned
212-
if z == zarr.zeros(
213-
shape=(
214-
self.frames,
215-
self.channels,
216-
self.slices,
217-
self.height,
218-
self.width,
219-
),
220-
chunks=(1, 1, 1, self.height, self.width),
221-
):
222-
raise IOError(f"array at position {p} can not be found")
228+
if self._strict:
229+
if z == zarr.zeros(
230+
shape=(
231+
self.frames,
232+
self.channels,
233+
self.slices,
234+
self.height,
235+
self.width,
236+
),
237+
chunks=(1, 1, 1, self.height, self.width),
238+
):
239+
raise IOError(f"array at position {p} can not be found")
223240

224241
self.positions[p] = z
225242

iohub/_deprecated/zarrfile.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import numpy as np
1010
import zarr
11+
import zarr.storage
1112

1213
from iohub._deprecated.reader_base import ReaderBase
1314

@@ -27,7 +28,7 @@ class ZarrReader(ReaderBase):
2728
"""
2829

2930
def __init__(
30-
self, store_path: str, version: Literal["0.1", "0.4"] = "0.1"
31+
self, store_path: str, version: Literal["0.1", "0.4", "0.5"] = "0.1"
3132
):
3233
super().__init__()
3334

@@ -43,17 +44,11 @@ def __init__(
4344
# zarr files (.zarr) are directories
4445
if not os.path.isdir(store_path):
4546
raise ValueError("file does not exist")
46-
if version == "0.4":
47-
dimension_separator = "/"
48-
elif version == "0.1":
49-
dimension_separator = "."
50-
else:
47+
if version not in ("0.1", "0.4", "0.5"):
5148
raise ValueError(f"Invalid NGFF version: {version}")
5249
try:
53-
self.store = zarr.DirectoryStore(
54-
store_path, dimension_separator=dimension_separator
55-
)
56-
self.root = zarr.open(self.store, "r")
50+
self.store = zarr.storage.LocalStore(store_path)
51+
self.root = zarr.open(self.store, mode="r")
5752
except Exception:
5853
raise FileNotFoundError("Supplies path is not a valid zarr root")
5954
try:

iohub/convert.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,9 @@ def _init_hcs_arrays(self, arr_kwargs):
309309
def _init_grid_arrays(self, arr_kwargs):
310310
for row, columns in enumerate(self.position_grid):
311311
for column in columns:
312-
self._create_zeros_array(row, column, "0", arr_kwargs)
312+
self._create_zeros_array(
313+
str(row), str(column), "0", arr_kwargs
314+
)
313315

314316
def _create_zeros_array(
315317
self, row_name: str, col_name: str, pos_name: str, arr_kwargs: dict

iohub/mmstack.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
from __future__ import annotations
22

3+
import io
4+
import json
35
import logging
46
from copy import copy
57
from pathlib import Path
68
from typing import TYPE_CHECKING, Iterable
79
from warnings import catch_warnings, filterwarnings
810

911
import dask.array as da
12+
import fsspec
1013
import numpy as np
1114
import zarr
15+
import zarr.storage
1216
from natsort import natsorted
1317
from numpy.typing import ArrayLike
14-
from tifffile import TiffFile
18+
from tifffile import TiffFile, ZarrTiffStore
1519
from xarray import DataArray
1620

1721
from iohub.mm_fov import MicroManagerFOV, MicroManagerFOVMapping
@@ -31,6 +35,34 @@ def _normalize_mm_pos_key(key: str | int) -> int:
3135
raise TypeError("Micro-Manager position keys must be integers.")
3236

3337

38+
def _tiff_to_fsspec_store(
39+
zarr_tiff_store: ZarrTiffStore, root_uri: str
40+
) -> zarr.storage.FsspecStore:
41+
"""Bridge tifffile (zarr-python v2 interface) with zarr-python v3.
42+
43+
Parameters
44+
----------
45+
zarr_tiff_store : ZarrTiffStore
46+
Zarr (v2) wrapper for a TIFF series
47+
root_uri : str
48+
`file://` URI to the directory containing the TIFF files
49+
50+
Returns
51+
-------
52+
zarr.storage.FsspecStore
53+
Zarr (v3) wrapper for a TIFF series
54+
"""
55+
spec_container = io.StringIO()
56+
zarr_tiff_store.write_fsspec(spec_container, url=root_uri)
57+
fs, _ = fsspec.url_to_fs(
58+
"reference://",
59+
fo=json.loads(spec_container.getvalue()),
60+
target_protocol="file",
61+
asynchronous=True,
62+
)
63+
return zarr.storage.FsspecStore(fs=fs)
64+
65+
3466
def find_first_ome_tiff_in_mmstack(data_path: Path) -> Path:
3567
if data_path.is_file():
3668
if "ome.tif" in data_path.name:
@@ -120,9 +152,12 @@ def _parse_data(self):
120152
self.width,
121153
) = dims.values()
122154
self._set_mm_meta(self._first_tif.micromanager_metadata)
123-
self._store = series.aszarr()
155+
zarr_tiff_store = series.aszarr(multiscales=True)
156+
self._store = _tiff_to_fsspec_store(
157+
zarr_tiff_store, root_uri=self._root.as_uri()
158+
)
124159
_logger.debug(f"Opened {self._store}.")
125-
data = da.from_zarr(zarr.open(self._store))
160+
data = da.from_zarr(zarr.open(self._store, mode="r")["0"])
126161
self.dtype = data.dtype
127162
img = DataArray(data, dims=raw_dims, name=self.dirname)
128163
xarr = img.expand_dims(

0 commit comments

Comments
 (0)