Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/iohub/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,26 @@ class ZarrConfig(BaseModel):


class TensorStoreConfig(BaseModel):
"""Config for the TensorStore implementation."""
"""Config for the TensorStore implementation.

Parameters
----------
file_io_concurrency : int or None
Concurrency limit for TensorStore's ``file_io_concurrency``
resource. Raise above the default (32) on high-latency networked
filesystems (e.g. NFS) where the default under-saturates the link.
cache_pool_bytes : int or None
Aggregate byte budget for TensorStore's chunk cache pool. ``None``
disables caching.
recheck_cached_data : bool, "open" or None
Controls whether cached chunk data is re-validated on each read.
``None`` (default) uses the TensorStore driver default, which
revalidates cached metadata on every access — one stat/GETATTR per
chunk. ``"open"`` checks freshness only when the array is opened
and trusts the cache thereafter — recommended for long-running
read-heavy workloads on NFS/VAST where the underlying zarr files
do not change. ``False`` disables freshness checks entirely.
"""

compressor: CompressorConfig = Field(default_factory=CompressorConfig)
data_copy_concurrency: int = Field(default=4, ge=1)
Expand All @@ -43,6 +62,7 @@ class TensorStoreConfig(BaseModel):
file_io_sync: bool = True
file_io_locking: Literal["auto", "disabled"] = "auto"
cache_pool_bytes: int | None = None
recheck_cached_data: bool | Literal["open"] | None = None
extra_context: dict | None = None


Expand Down
16 changes: 9 additions & 7 deletions src/iohub/core/implementations/tensorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,13 +193,15 @@ def open_array(self, group: zarr.Group, name: str) -> ts.TensorStore:
"driver": driver,
"kvstore": {"driver": "file", "path": key},
}
self._array_cache[key] = _ts_open(
spec,
open=True,
read=True,
write=writable,
context=self._context(),
)
open_kwargs: dict[str, Any] = {
"open": True,
"read": True,
"write": writable,
"context": self._context(),
}
if self.config.recheck_cached_data is not None:
open_kwargs["recheck_cached_data"] = self.config.recheck_cached_data
self._array_cache[key] = _ts_open(spec, **open_kwargs)
return self._array_cache[key]

# -- Array I/O ---------------------------------------------------------
Expand Down
Loading