Skip to content

Commit 737cedf

Browse files
Port to iohub 0.3.2 ImageArray API (#407)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Alexandr Kalinin <alxndrkalinin@users.noreply.github.com>
1 parent a10d4c4 commit 737cedf

23 files changed

Lines changed: 239 additions & 815 deletions

File tree

applications/airtable/pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@ description = "Interface to the Computational Imaging Airtable database"
88
keywords = [ "airtable", "metadata", "microscopy", "zarr" ]
99
license = "BSD-3-Clause"
1010
authors = [ { name = "Biohub", email = "compmicro@czbiohub.org" } ]
11-
requires-python = ">=3.11"
11+
requires-python = ">=3.12"
1212
classifiers = [
1313
"Development Status :: 3 - Alpha",
1414
"Intended Audience :: Science/Research",
1515
"License :: OSI Approved :: BSD License",
1616
"Operating System :: OS Independent",
1717
"Programming Language :: Python :: 3 :: Only",
18-
"Programming Language :: Python :: 3.11",
1918
"Programming Language :: Python :: 3.12",
2019
"Programming Language :: Python :: 3.13",
2120
"Programming Language :: Python :: 3.14",

applications/cytoland/pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,13 @@ keywords = [
1414
]
1515
license = "BSD-3-Clause"
1616
authors = [ { name = "Biohub", email = "compmicro@czbiohub.org" } ]
17-
requires-python = ">=3.11"
17+
requires-python = ">=3.12"
1818
classifiers = [
1919
"Development Status :: 4 - Beta",
2020
"Intended Audience :: Science/Research",
2121
"License :: OSI Approved :: BSD License",
2222
"Operating System :: OS Independent",
2323
"Programming Language :: Python :: 3 :: Only",
24-
"Programming Language :: Python :: 3.11",
2524
"Programming Language :: Python :: 3.12",
2625
"Programming Language :: Python :: 3.13",
2726
"Programming Language :: Python :: 3.14",

applications/dynacell/pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,13 @@ keywords = [
1414
]
1515
license = "BSD-3-Clause"
1616
authors = [ { name = "Biohub", email = "compmicro@czbiohub.org" } ]
17-
requires-python = ">=3.11"
17+
requires-python = ">=3.12"
1818
classifiers = [
1919
"Development Status :: 3 - Alpha",
2020
"Intended Audience :: Science/Research",
2121
"License :: OSI Approved :: BSD License",
2222
"Operating System :: OS Independent",
2323
"Programming Language :: Python :: 3 :: Only",
24-
"Programming Language :: Python :: 3.11",
2524
"Programming Language :: Python :: 3.12",
2625
"Programming Language :: Python :: 3.13",
2726
"Programming Language :: Python :: 3.14",

applications/dynaclr/pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,13 @@ keywords = [
1515
]
1616
license = "BSD-3-Clause"
1717
authors = [ { name = "Biohub", email = "compmicro@czbiohub.org" } ]
18-
requires-python = ">=3.11"
18+
requires-python = ">=3.12"
1919
classifiers = [
2020
"Development Status :: 4 - Beta",
2121
"Intended Audience :: Science/Research",
2222
"License :: OSI Approved :: BSD License",
2323
"Operating System :: OS Independent",
2424
"Programming Language :: Python :: 3 :: Only",
25-
"Programming Language :: Python :: 3.11",
2625
"Programming Language :: Python :: 3.12",
2726
"Programming Language :: Python :: 3.13",
2827
"Programming Language :: Python :: 3.14",

applications/dynaclr/src/dynaclr/data/datamodule.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
from __future__ import annotations
1313

1414
import logging
15+
import os
1516

1617
import numpy as np
1718
import pandas as pd
19+
from iohub.core.config import TensorStoreConfig
1820
from lightning.pytorch import LightningDataModule
1921
from monai.data.thread_buffer import ThreadDataLoader
2022
from monai.transforms import Compose, MapTransform
@@ -238,6 +240,12 @@ def __init__(
238240
# Loss hyperparameters (informational)
239241
# Other
240242
self.cache_pool_bytes = cache_pool_bytes
243+
cpus = os.environ.get("SLURM_CPUS_PER_TASK")
244+
cpus = int(cpus) if cpus is not None else (os.cpu_count() or 4)
245+
self.tensorstore_config = TensorStoreConfig(
246+
data_copy_concurrency=cpus,
247+
cache_pool_bytes=cache_pool_bytes or None,
248+
)
241249
self.seed = seed
242250
self.include_wells = include_wells
243251
self.exclude_fovs = exclude_fovs
@@ -361,13 +369,13 @@ def _setup_experiment_split(self, registry: ExperimentRegistry) -> None:
361369
positive_cell_source=self.positive_cell_source,
362370
positive_match_columns=self.positive_match_columns,
363371
max_border_shift=self.max_border_shift,
372+
tensorstore_config=self.tensorstore_config,
364373
)
365374
self.train_dataset = MultiExperimentTripletDataset(
366375
index=train_index,
367376
fit=True,
368377
tau_range_hours=self.tau_range,
369378
tau_decay_rate=self.tau_decay_rate,
370-
cache_pool_bytes=self.cache_pool_bytes,
371379
channels_per_sample=self.channels_per_sample,
372380
positive_cell_source=self.positive_cell_source,
373381
positive_match_columns=self.positive_match_columns,
@@ -388,13 +396,13 @@ def _setup_experiment_split(self, registry: ExperimentRegistry) -> None:
388396
positive_cell_source=self.positive_cell_source,
389397
positive_match_columns=self.positive_match_columns,
390398
max_border_shift=self.max_border_shift,
399+
tensorstore_config=self.tensorstore_config,
391400
)
392401
self.val_dataset = MultiExperimentTripletDataset(
393402
index=val_index,
394403
fit=True,
395404
tau_range_hours=self.tau_range,
396405
tau_decay_rate=self.tau_decay_rate,
397-
cache_pool_bytes=self.cache_pool_bytes,
398406
channels_per_sample=self.channels_per_sample,
399407
positive_cell_source=self.positive_cell_source,
400408
positive_match_columns=self.positive_match_columns,
@@ -419,6 +427,7 @@ def _setup_fov_split(self, registry: ExperimentRegistry) -> None:
419427
num_workers=self.num_workers_index,
420428
positive_cell_source=self.positive_cell_source,
421429
positive_match_columns=self.positive_match_columns,
430+
tensorstore_config=self.tensorstore_config,
422431
)
423432

424433
rng = np.random.default_rng(self.seed)
@@ -458,7 +467,6 @@ def _setup_fov_split(self, registry: ExperimentRegistry) -> None:
458467
fit=True,
459468
tau_range_hours=self.tau_range,
460469
tau_decay_rate=self.tau_decay_rate,
461-
cache_pool_bytes=self.cache_pool_bytes,
462470
channels_per_sample=self.channels_per_sample,
463471
positive_cell_source=self.positive_cell_source,
464472
positive_match_columns=self.positive_match_columns,
@@ -477,7 +485,6 @@ def _setup_fov_split(self, registry: ExperimentRegistry) -> None:
477485
fit=True,
478486
tau_range_hours=self.tau_range,
479487
tau_decay_rate=self.tau_decay_rate,
480-
cache_pool_bytes=self.cache_pool_bytes,
481488
channels_per_sample=self.channels_per_sample,
482489
positive_cell_source=self.positive_cell_source,
483490
positive_match_columns=self.positive_match_columns,

applications/dynaclr/src/dynaclr/data/dataset.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from __future__ import annotations
1818

1919
import logging
20-
import os
2120
from collections import defaultdict
2221

2322
import numpy as np
@@ -112,8 +111,6 @@ class MultiExperimentTripletDataset(Dataset):
112111
return_negative : bool
113112
Reserved for future use. Currently unused (NTXentLoss uses
114113
in-batch negatives).
115-
cache_pool_bytes : int
116-
Tensorstore cache pool size in bytes.
117114
channels_per_sample : int | list[str] | None
118115
Controls how many source channels to read per sample.
119116
``None`` (default) — read all source channels, output ``(B, C, Z, Y, X)``.
@@ -148,7 +145,6 @@ def __init__(
148145
tau_range_hours: tuple[float, float] = (0.5, 2.0),
149146
tau_decay_rate: float = 2.0,
150147
return_negative: bool = False,
151-
cache_pool_bytes: int = 0,
152148
channels_per_sample: int | list[str] | None = None,
153149
positive_cell_source: str = "lookup",
154150
positive_match_columns: list[str] | None = None,
@@ -205,25 +201,9 @@ def __init__(
205201
_logger.info("Label encoder '%s' (%s): %d classes", batch_key, col, len(encoder))
206202

207203
self._rng = np.random.default_rng()
208-
self._setup_tensorstore_context(cache_pool_bytes)
209-
self._build_match_lookup()
210-
211-
# ------------------------------------------------------------------
212-
# Initialization helpers
213-
# ------------------------------------------------------------------
214-
215-
def _setup_tensorstore_context(self, cache_pool_bytes: int) -> None:
216-
"""Configure tensorstore context with CPU limits based on SLURM env."""
217-
cpus = os.environ.get("SLURM_CPUS_PER_TASK")
218-
cpus = int(cpus) if cpus is not None else (os.cpu_count() or 4)
219-
self._ts_context = ts.Context(
220-
{
221-
"data_copy_concurrency": {"limit": cpus},
222-
"cache_pool": {"total_bytes_limit": cache_pool_bytes},
223-
}
224-
)
225204
self._tensorstores: dict[str, ts.TensorStore] = {}
226205
self._norm_meta_cache: dict[str, NormMeta | None] = {}
206+
self._build_match_lookup()
227207

228208
def _build_match_lookup(self) -> None:
229209
"""Build lookup structures for O(1) positive candidate lookup.
@@ -531,10 +511,7 @@ def _get_tensorstore(self, position, fov_name: str) -> "ts.TensorStore":
531511
ts.TensorStore
532512
"""
533513
if fov_name not in self._tensorstores:
534-
self._tensorstores[fov_name] = position["0"].tensorstore(
535-
context=self._ts_context,
536-
recheck_cached_data="open",
537-
)
514+
self._tensorstores[fov_name] = position["0"].native
538515
return self._tensorstores[fov_name]
539516

540517
def _slice_patch(

applications/dynaclr/src/dynaclr/data/index.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import numpy as np
1616
import pandas as pd
17+
from iohub.core.config import TensorStoreConfig
1718
from iohub.ngff import Plate, Position, open_ome_zarr
1819

1920
from dynaclr.data.experiment import ExperimentRegistry
@@ -188,6 +189,7 @@ def __init__(
188189
positive_cell_source: str = "lookup",
189190
positive_match_columns: list[str] | None = None,
190191
max_border_shift: int = -1,
192+
tensorstore_config: TensorStoreConfig | None = None,
191193
) -> None:
192194
self.registry = registry
193195
self.yx_patch_size = yx_patch_size
@@ -197,6 +199,11 @@ def __init__(
197199
if max_border_shift < 0:
198200
max_border_shift = max(yx_patch_size[0] // 4, yx_patch_size[1] // 4)
199201
self.max_border_shift = max_border_shift
202+
# Plates cached here feed Position objects whose arrays the dataset reads
203+
# via ``position["0"].native`` (tensorstore handle). The tensorstore impl
204+
# must be configured at open-time — default zarr would return a
205+
# ``zarr.Array`` that has no ``.read().result()`` method.
206+
self.tensorstore_config = tensorstore_config or TensorStoreConfig()
200207
self._store_cache: dict[str, Plate] = {}
201208

202209
# Merge collection-level exclude_fovs with runtime exclude_fovs
@@ -362,7 +369,12 @@ def _resolve_positions_and_dims(self, tracks: pd.DataFrame) -> tuple[list[Positi
362369

363370
for (store_path, well_name, fov_name), _group in tracks.groupby(["store_path", "well_name", "fov_name"]):
364371
if store_path not in self._store_cache:
365-
self._store_cache[store_path] = open_ome_zarr(store_path, mode="r")
372+
self._store_cache[store_path] = open_ome_zarr(
373+
store_path,
374+
mode="r",
375+
implementation="tensorstore",
376+
implementation_config=self.tensorstore_config,
377+
)
366378
plate = self._store_cache[store_path]
367379
# fov_name may be just the FOV id (e.g. "000000") or the full
368380
# position path (e.g. "C/1/000000"). Prepend well_name when needed.

applications/qc/pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,13 @@ readme = "README.md"
99
keywords = [ "microscopy", "quality control", "zarr" ]
1010
license = "BSD-3-Clause"
1111
authors = [ { name = "Biohub", email = "compmicro@czbiohub.org" } ]
12-
requires-python = ">=3.11"
12+
requires-python = ">=3.12"
1313
classifiers = [
1414
"Development Status :: 4 - Beta",
1515
"Intended Audience :: Science/Research",
1616
"License :: OSI Approved :: BSD License",
1717
"Operating System :: OS Independent",
1818
"Programming Language :: Python :: 3 :: Only",
19-
"Programming Language :: Python :: 3.11",
2019
"Programming Language :: Python :: 3.12",
2120
"Programming Language :: Python :: 3.13",
2221
"Programming Language :: Python :: 3.14",

applications/qc/src/qc/focus.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""In-focus z-slice detection using midband spatial frequency power."""
22

33
import numpy as np
4-
import tensorstore
54
import torch
65
from waveorder.focus import focus_from_transverse_band
76

@@ -46,17 +45,16 @@ def __init__(
4645
self.device = torch.device(device)
4746

4847
def channels(self) -> list[str]:
48+
"""Return the channels this metric is configured for."""
4949
return self.channel_names
5050

5151
def __call__(self, position, channel_name, channel_index, num_workers=4):
52-
tzyx = (
53-
position["0"]
54-
.tensorstore(context=tensorstore.Context({"data_copy_concurrency": {"limit": num_workers}}))[
55-
:, channel_index
56-
]
57-
.read()
58-
.result()
59-
)
52+
"""Compute focus-slice index per timepoint for one channel of ``position``."""
53+
# Tensorstore concurrency is configured on the plate at
54+
# open-time (see qc_metrics.generate_qc_metadata); num_workers
55+
# is retained here only to match the QCMetric abstract interface.
56+
del num_workers
57+
tzyx = position["0"].native[:, channel_index].read().result()
6058

6159
T = tzyx.shape[0]
6260
focus_indices = np.empty(T, dtype=int)

applications/qc/src/qc/qc_metrics.py

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from abc import ABC, abstractmethod
55

66
import iohub.ngff as ngff
7+
from iohub.core.config import TensorStoreConfig
78
from tqdm import tqdm
89

910
from viscy_utils.meta_utils import write_meta_field
@@ -83,42 +84,45 @@ def generate_qc_metadata(
8384
num_workers : int
8485
Number of workers for data loading.
8586
"""
86-
plate = ngff.open_ome_zarr(zarr_dir, mode="r+")
87-
position_map = list(plate.positions())
87+
with ngff.open_ome_zarr(
88+
zarr_dir,
89+
mode="r+",
90+
implementation="tensorstore",
91+
implementation_config=TensorStoreConfig(data_copy_concurrency=num_workers),
92+
) as plate:
93+
position_map = list(plate.positions())
8894

89-
for metric in metrics:
90-
channel_list = metric.channels()
95+
for metric in metrics:
96+
channel_list = metric.channels()
9197

92-
for channel_name in channel_list:
93-
channel_index = plate.channel_names.index(channel_name)
94-
_logger.info(f"Computing {metric.field_name} for channel '{channel_name}'")
98+
for channel_name in channel_list:
99+
channel_index = plate.channel_names.index(channel_name)
100+
_logger.info(f"Computing {metric.field_name} for channel '{channel_name}'")
95101

96-
position_results = []
102+
position_results = []
97103

98-
for _, pos in tqdm(position_map, desc="Positions"):
99-
result = metric(pos, channel_name, channel_index, num_workers)
100-
position_results.append((pos, result))
104+
for _, pos in tqdm(position_map, desc="Positions"):
105+
result = metric(pos, channel_name, channel_index, num_workers)
106+
position_results.append((pos, result))
101107

102-
all_results = [r for _, r in position_results]
103-
dataset_stats = metric.aggregate_dataset(all_results)
108+
all_results = [r for _, r in position_results]
109+
dataset_stats = metric.aggregate_dataset(all_results)
104110

105-
if dataset_stats:
106-
write_meta_field(
107-
position=plate,
108-
metadata={"dataset_statistics": dataset_stats},
109-
field_name=metric.field_name,
110-
subfield_name=channel_name,
111-
)
112-
113-
for pos, result in position_results:
114-
metadata = {**result}
115111
if dataset_stats:
116-
metadata["dataset_statistics"] = dataset_stats
117-
write_meta_field(
118-
position=pos,
119-
metadata=metadata,
120-
field_name=metric.field_name,
121-
subfield_name=channel_name,
122-
)
123-
124-
plate.close()
112+
write_meta_field(
113+
position=plate,
114+
metadata={"dataset_statistics": dataset_stats},
115+
field_name=metric.field_name,
116+
subfield_name=channel_name,
117+
)
118+
119+
for pos, result in position_results:
120+
metadata = {**result}
121+
if dataset_stats:
122+
metadata["dataset_statistics"] = dataset_stats
123+
write_meta_field(
124+
position=pos,
125+
metadata=metadata,
126+
field_name=metric.field_name,
127+
subfield_name=channel_name,
128+
)

0 commit comments

Comments
 (0)