diff --git a/src/iohub/core/config.py b/src/iohub/core/config.py index 696d2f12..1724365a 100644 --- a/src/iohub/core/config.py +++ b/src/iohub/core/config.py @@ -34,7 +34,26 @@ class ZarrConfig(BaseModel): class TensorStoreConfig(BaseModel): - """Config for the TensorStore implementation.""" + """Config for the TensorStore implementation. + + Parameters + ---------- + file_io_concurrency : int or None + Concurrency limit for TensorStore's ``file_io_concurrency`` + resource. Raise above the default (32) on high-latency networked + filesystems (e.g. NFS) where the default under-saturates the link. + cache_pool_bytes : int or None + Aggregate byte budget for TensorStore's chunk cache pool. ``None`` + disables caching. + recheck_cached_data : bool, "open" or None + Controls whether cached chunk data is re-validated on each read. + ``None`` (default) uses the TensorStore driver default, which + revalidates cached metadata on every access — one stat/GETATTR per + chunk. ``"open"`` checks freshness only when the array is opened + and trusts the cache thereafter — recommended for long-running + read-heavy workloads on NFS/VAST where the underlying zarr files + do not change. ``False`` disables freshness checks entirely. + """ compressor: CompressorConfig = Field(default_factory=CompressorConfig) data_copy_concurrency: int = Field(default=4, ge=1) @@ -43,6 +62,7 @@ class TensorStoreConfig(BaseModel): file_io_sync: bool = True file_io_locking: Literal["auto", "disabled"] = "auto" cache_pool_bytes: int | None = None + recheck_cached_data: bool | Literal["open"] | None = None extra_context: dict | None = None diff --git a/src/iohub/core/implementations/tensorstore.py b/src/iohub/core/implementations/tensorstore.py index 081e5b14..2a35022a 100644 --- a/src/iohub/core/implementations/tensorstore.py +++ b/src/iohub/core/implementations/tensorstore.py @@ -193,13 +193,15 @@ def open_array(self, group: zarr.Group, name: str) -> ts.TensorStore: "driver": driver, "kvstore": {"driver": "file", "path": key}, } - self._array_cache[key] = _ts_open( - spec, - open=True, - read=True, - write=writable, - context=self._context(), - ) + open_kwargs: dict[str, Any] = { + "open": True, + "read": True, + "write": writable, + "context": self._context(), + } + if self.config.recheck_cached_data is not None: + open_kwargs["recheck_cached_data"] = self.config.recheck_cached_data + self._array_cache[key] = _ts_open(spec, **open_kwargs) return self._array_cache[key] # -- Array I/O ---------------------------------------------------------