diff --git a/iohub/cli/cli.py b/iohub/cli/cli.py index d44b9b57..7c9e3cc5 100644 --- a/iohub/cli/cli.py +++ b/iohub/cli/cli.py @@ -6,6 +6,7 @@ from iohub._version import __version__ from iohub.cli.parsing import input_position_dirpaths from iohub.convert import TIFFConverter +from iohub.ngff.nodes import _is_remote_url from iohub.reader import print_info from iohub.rename_wells import rename_wells @@ -16,6 +17,24 @@ ) +class _PathOrURL(click.ParamType): + """Accepts local directory paths or remote URLs.""" + + name = "PATH_OR_URL" + + def convert(self, value, param, ctx): + if value is None: + return None + if _is_remote_url(value): + return value + path = pathlib.Path(value).resolve() + if not path.exists(): + self.fail(f"'{value}' does not exist.", param, ctx) + if not path.is_dir(): + self.fail(f"'{value}' is not a directory.", param, ctx) + return path + + @click.group() @click.help_option("-h", "--help") @click.version_option(version=VERSION) @@ -29,7 +48,7 @@ def cli(): "files", nargs=-1, required=True, - type=_DATASET_PATH, + type=_PathOrURL(), ) @click.option( "--verbose", @@ -39,14 +58,13 @@ def cli(): "and full tree for HCS Plates in OME-Zarr", ) def info(files, verbose): - """View basic metadata of a list of FILES. + """View basic metadata of a list of FILES or URLs. - Supported formats are Micro-Manager-acquired TIFF datasets - (single-page TIFF, multi-page OME-TIFF, NDTIFF) - and OME-Zarr (v0.1 linear HCS layout and all v0.4 layouts). + Supported formats: Micro-Manager TIFFs, OME-Zarr (v0.4/v0.5), + and remote URLs (HTTP/HTTPS, S3, GCS). """ for file in files: - click.echo(f"Reading file:\t {file}") + click.echo(f"Reading:\t {file}") print_info(file, verbose=verbose) diff --git a/iohub/ngff/nodes.py b/iohub/ngff/nodes.py index 79267a4e..e2580a42 100644 --- a/iohub/ngff/nodes.py +++ b/iohub/ngff/nodes.py @@ -18,6 +18,8 @@ import zarr.codecs from numpy.typing import ArrayLike, DTypeLike, NDArray from pydantic import ValidationError +from rich.console import Console +from rich.tree import Tree as RichTree from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams from zarr.storage._utils import normalize_path @@ -60,16 +62,35 @@ def _pad_shape(shape: tuple[int, ...], target: int = 5): return (1,) * pad + shape +def _is_remote_url(path: str | Path) -> bool: + """Check if path is a remote URL (http, https, s3, gs, az).""" + url_str = str(path) + return url_str.startswith( + ('http://', 'https://', 's3://', 'gs://', 'az://') + ) + + def _open_store( store_path: str | Path, mode: Literal["r", "r+", "a", "w", "w-"], version: Literal["0.4", "0.5"], ): - store_path = Path(store_path).resolve() - if not store_path.exists() and mode in ("r", "r+"): - raise FileNotFoundError( - f"Dataset directory not found at {str(store_path)}." - ) + is_remote = _is_remote_url(store_path) + + if is_remote: + if mode not in ("r",): + raise NotImplementedError( + f"Write mode '{mode}' not supported for remote URLs. " + "Use mode='r' for read-only access." + ) + else: + # Local path: existing behavior + store_path = Path(store_path).resolve() + if not store_path.exists() and mode in ("r", "r+"): + raise FileNotFoundError( + f"Dataset directory not found at {str(store_path)}." + ) + if version not in ("0.4", "0.5"): _logger.warning( "IOHub is only tested against OME-NGFF v0.4 and v0.5. " @@ -77,7 +98,10 @@ def _open_store( ) try: zarr_format = None - if mode in ("w", "w-") or (mode == "a" and not store_path.exists()): + if not is_remote and ( + mode in ("w", "w-") + or (mode == "a" and not Path(store_path).exists()) + ): zarr_format = 3 if version == "0.5" else 2 root = zarr.open_group(store_path, mode=mode, zarr_format=zarr_format) except Exception as e: @@ -190,11 +214,13 @@ def __len__(self): def __getitem__(self, key): key = normalize_path(str(key)) znode = self.zgroup.get(key) - if not znode: + if znode is None: raise KeyError(key) levels = len(key.split("/")) - 1 item_type = self._MEMBER_TYPE for _ in range(levels): + if issubclass(item_type, ImageArray): + break item_type = item_type._MEMBER_TYPE if issubclass(item_type, ImageArray): return item_type.from_zarr_array(znode) @@ -272,15 +298,34 @@ def is_leaf(self): """ return not self.group_keys() - def print_tree(self, level: int | None = None): - """Print hierarchy of the node to stdout. + def _build_tree( + self, + parent: RichTree | None = None, + level: int | None = None, + current_level: int = 0, + ) -> RichTree: + """Build tree using OME-NGFF metadata (works for remote stores).""" + name = self.zgroup.basename or self.zgroup.name + tree = parent.add(name) if parent else RichTree(name) + + if level is not None and current_level >= level: + return tree + + for member_name in self._member_names: + member = self[member_name] + if isinstance(member, ImageArray): + shape = ", ".join( + f"{a.name}: {s}" for a, s in zip(self.axes, member.shape) + ) + tree.add(f"{member_name} ({shape})") + else: + member._build_tree(tree, level, current_level + 1) - Parameters - ---------- - level : int, optional - Maximum depth to show, by default None - """ - print(self.zgroup.tree(level=level)) + return tree + + def print_tree(self, level: int | None = None): + """Print hierarchy using OME-NGFF metadata""" + Console().print(self._build_tree(level=level)) def iteritems(self): for key in self._member_names: @@ -593,6 +638,7 @@ def __init__( axes: list[AxisMeta] | None = None, version: Literal["0.4", "0.5"] = "0.4", overwriting_creation: bool = False, + **kwargs, # Absorbs extra attrs from parent hierarchy ): super().__init__( group=group, @@ -640,7 +686,8 @@ def _zarr_format(self): @property def _member_names(self): - return self.array_keys() + """Array names from position metadata.""" + return sorted(ds.path for ds in self.metadata.multiscales[0].datasets) @property def data(self): @@ -1429,6 +1476,7 @@ def __init__( axes: list[AxisMeta] | None = None, version: Literal["0.4", "0.5"] = "0.4", overwriting_creation: bool = False, + **kwargs, # Absorbs extra attrs from parent hierarchy ): super().__init__( group=group, @@ -1467,6 +1515,11 @@ def __getitem__(self, key: str): """ return super().__getitem__(key) + @property + def _member_names(self): + """Position names from well metadata.""" + return sorted(img.path for img in self.metadata.images) + def _create_position_nosync(self, name: str, acquisition: int = 0): "create_position, but doesn't write the metadata yet." pos_grp = self._group.create_group(name, overwrite=self._overwrite) @@ -1541,6 +1594,7 @@ def __init__( axes: list[AxisMeta] | None = None, version: Literal["0.4", "0.5"] = "0.4", overwriting_creation: bool = False, + _plate_wells: list[WellIndexMeta] | None = None, ): super().__init__( group=group, @@ -1550,6 +1604,7 @@ def __init__( version=version, overwriting_creation=overwriting_creation, ) + self._plate_wells = _plate_wells def __getitem__(self, key: str): """Get a well member of the row. @@ -1566,6 +1621,18 @@ def __getitem__(self, key: str): """ return super().__getitem__(key) + @property + def _member_names(self): + """Column names from plate metadata.""" + row_name = self.zgroup.basename + return sorted( + { + well.path.split("/")[1] + for well in self._plate_wells + if well.path.startswith(f"{row_name}/") + } + ) + def wells(self): """Returns a generator that iterate over the name and value of all the wells in the row. @@ -1581,6 +1648,13 @@ def _parse_meta(self): # this node does not have NGFF metadata return + @property + def _child_attrs(self): + """Attributes to pass to Well children (exclude _plate_wells).""" + attrs = super()._child_attrs.copy() + attrs.pop("_plate_wells", None) + return attrs + class Plate(NGFFNode): _MEMBER_TYPE = Row @@ -1707,9 +1781,10 @@ def _first_pos_attr(self, attr: str): name = " ".join(attr.split("_")).strip() msg = f"Cannot determine {name}:" try: - row_grp = next(self.zgroup.groups())[1] - well_grp = next(row_grp.groups())[1] - pos_grp = next(well_grp.groups())[1] + pos_grp = self._get_first_position_group() + if pos_grp is None: + _logger.warning(f"{msg} No position is found in the dataset.") + return except StopIteration: _logger.warning(f"{msg} No position is found in the dataset.") return @@ -1719,6 +1794,39 @@ def _first_pos_attr(self, attr: str): except AttributeError: _logger.warning(f"{msg} Invalid metadata at the first position") + def _get_first_position_group(self): + """Get the first position group using metadata.""" + if not self.metadata or not self.metadata.wells: + raise ValueError( + "Plate metadata required to determine first position" + ) + + well_path = self.metadata.wells[0].path + well = Well(self.zgroup[well_path], parse_meta=True) + + if not well.metadata or not well.metadata.images: + raise ValueError( + "Well metadata required to determine first position" + ) + + return self.zgroup[f"{well_path}/{well.metadata.images[0].path}"] + + @property + def _member_names(self): + """Row names from plate metadata.""" + return sorted(row.name for row in self.metadata.rows) + + @property + def _child_attrs(self): + """Attributes to pass on when constructing child type instances.""" + attrs = super()._child_attrs + # Pass only wells list - Row doesn't need full plate metadata + try: + attrs["_plate_wells"] = self.metadata.wells + except AttributeError: + pass + return attrs + def dump_meta(self, field_count: bool = False): """Dumps metadata JSON to the `.zattrs` file. @@ -1805,8 +1913,10 @@ def create_well( # normalize input row_name = normalize_path(row_name) col_name = normalize_path(col_name) - if row_name in self: - if col_name in self[row_name]: + # Check filesystem directly (not via _member_names/metadata) + # to ensure consistency during creation when metadata may be stale + if row_name in self.zgroup: + if col_name in self.zgroup[row_name]: raise FileExistsError( f"Well '{row_name}/{col_name}' already exists." ) @@ -1824,15 +1934,15 @@ def create_well( self._build_meta(row_meta, col_meta, well_index_meta) else: self.metadata.wells.append(well_index_meta) - # create new row if needed - if row_name not in self: + # create new row if needed (check filesystem directly) + if row_name not in self.zgroup: row_grp = self.zgroup.create_group( row_meta.name, overwrite=self._overwrite ) if row_meta not in self.metadata.rows: self.metadata.rows.append(row_meta) else: - row_grp = self[row_name].zgroup + row_grp = self.zgroup[row_name] if col_meta not in self.metadata.columns: self.metadata.columns.append(col_meta) # create well @@ -2100,10 +2210,21 @@ def rename_well(self, old: str, new: str): def _check_file_mode( - store_path: Path, + store_path: str | Path, mode: Literal["r", "r+", "a", "w", "w-"], disable_path_checking: bool, ) -> bool: + if _is_remote_url(store_path): + # Remote URLs: only read mode supported, always parse metadata + if mode not in ("r",): + raise NotImplementedError( + f"Write mode '{mode}' not supported for remote URLs. " + "Use mode='r' for read-only access." + ) + return True # parse_meta = True for read mode + + # Local paths: existing behavior + store_path = Path(store_path) if mode == "a": mode = "r+" if store_path.exists() else "w-" parse_meta = False @@ -2265,7 +2386,9 @@ def open_ome_zarr( :py:class:`iohub.ngff.Plate`, or :py:class:`iohub.ngff.TiledPosition`) """ - store_path = Path(store_path) + # Don't convert to Path for remote URLs + if not _is_remote_url(store_path): + store_path = Path(store_path) parse_meta = _check_file_mode( store_path, mode, disable_path_checking=disable_path_checking ) diff --git a/iohub/reader.py b/iohub/reader.py index dd02f201..efe36a5f 100644 --- a/iohub/reader.py +++ b/iohub/reader.py @@ -8,6 +8,9 @@ import natsort import tifffile as tiff import zarr +from rich.console import Console +from rich.panel import Panel +from rich.table import Table from iohub._deprecated.reader_base import ReaderBase from iohub._deprecated.singlepagetiff import MicromanagerSequenceReader @@ -15,7 +18,14 @@ from iohub.fov import BaseFOVMapping from iohub.mmstack import MMStack from iohub.ndtiff import NDTiffDataset -from iohub.ngff.nodes import NGFFNode, Plate, Position, open_ome_zarr +from iohub.ngff.models import SpaceAxisMeta +from iohub.ngff.nodes import ( + NGFFNode, + Plate, + Position, + _is_remote_url, + open_ome_zarr, +) if TYPE_CHECKING: from _typeshed import StrOrBytesPath @@ -185,25 +195,69 @@ def read_images( raise ValueError(f"Reader of type {data_type} is not implemented") -def print_info(path: StrOrBytesPath, verbose=False): +def _create_summary_table() -> Table: + """Create a two-column key-value table for summary display.""" + table = Table(show_header=False, box=None, padding=(0, 1), expand=True) + table.add_column("Property", style="cyan", no_wrap=True, width=22) + table.add_column("Value", overflow="fold") + return table + + +def _format_axes(axes: list) -> str: + """Format axes as comma-separated string with types.""" + return ", ".join(f"{a.name} ({a.type})" for a in axes) + + +def _format_channels(names: list) -> str: + """Format channel names as comma-separated string.""" + return ", ".join(str(n) for n in names) + + +_UNIT_ABBREV = { + "micrometer": "µm", + "nanometer": "nm", + "millimeter": "mm", + "meter": "m", +} + + +def _get_space_unit(axes: list) -> str | None: + """Get the unit from the first space axis, abbreviated for display.""" + for ax in axes: + if isinstance(ax, SpaceAxisMeta): + return _UNIT_ABBREV.get(ax.unit, ax.unit) + return None + + +def print_info(path: StrOrBytesPath | str, verbose=False): """Print summary information for a dataset. Parameters ---------- - path : StrOrBytesPath - Path to the dataset + path : StrOrBytesPath | str + Path to the dataset (local path or remote URL) verbose : bool, optional Show usage guide to open dataset in Python and full tree for HCS Plates in OME-Zarr, by default False """ - path = Path(path).resolve() + is_remote = _is_remote_url(path) + if not is_remote: + path = Path(path).resolve() + try: - fmt, extra_info = _infer_format(path) - if fmt == "omezarr" and extra_info in ("0.4", "0.5"): - reader = open_ome_zarr(path, mode="r", version=extra_info) + if is_remote: + # Remote URLs only support OME-Zarr + reader = open_ome_zarr(path, mode="r") + fmt, extra_info = "omezarr", reader.zattrs.get("ome", {}).get( + "version", reader.zattrs.get("plate", {}).get("version") + ) else: - reader = read_images(path, data_type=fmt) + fmt, extra_info = _infer_format(path) + if fmt == "omezarr" and extra_info in ("0.4", "0.5"): + reader = open_ome_zarr(path, mode="r", version=extra_info) + else: + reader = read_images(path, data_type=fmt) except (ValueError, RuntimeError): print("Error: No compatible dataset is found.") return @@ -243,59 +297,64 @@ def print_info(path: StrOrBytesPath, verbose=False): ) print(str.join("\n", msgs)) elif isinstance(reader, NGFFNode): - msgs.extend( - [ - sum_msg, - fmt_msg, - "".join( - ["Axes:\t\t\t "] - + [f"{a.name} ({a.type}); " for a in reader.axes] - ), - ch_msg, - ] - ) + console = Console() + + # Print tree first (if verbose or Position) + if verbose or isinstance(reader, Position): + console.print("[bold]Zarr hierarchy:[/bold]") + reader.print_tree() + console.print() + + # Build summary table + table = _create_summary_table() + fmt_str = f"{fmt} v{extra_info}" if extra_info else fmt + table.add_row("Format", fmt_str) + table.add_row("Axes", _format_axes(reader.axes)) + table.add_row("Channel names", _format_channels(reader.channel_names)) + if isinstance(reader, Plate): meta = reader.metadata - msgs.extend( - [ - f"Row names:\t\t {[r.name for r in meta.rows]}", - f"Column names:\t\t {[c.name for c in meta.columns]}", - f"Wells:\t\t\t {len(meta.wells)}", - ] - ) + table.add_row("Row names", ", ".join(r.name for r in meta.rows)) + col_names = ", ".join(c.name for c in meta.columns) + table.add_row("Column names", col_names) + table.add_row("Wells", str(len(meta.wells))) if verbose: - print("Zarr hierarchy:") - reader.print_tree() positions = list(reader.positions()) - total_bytes_uncompressed = sum( - p["0"].nbytes for _, p in positions - ) - msgs.append(f"Positions:\t\t {len(positions)}") - msgs.append(f"Chunk size:\t\t {positions[0][1][0].chunks}") - msgs.append( - f"No. bytes decompressed:\t\t {total_bytes_uncompressed} " - f"[{sizeof_fmt(total_bytes_uncompressed)}]" + first_pos = positions[0][1] + first_array = list(first_pos._member_names)[0] + total_bytes = sum(p[first_array].nbytes for _, p in positions) + table.add_row("Positions", str(len(positions))) + table.add_row("Chunk size", str(first_pos[first_array].chunks)) + table.add_row( + "Bytes (decompressed)", + f"{total_bytes} [{sizeof_fmt(total_bytes)}]", ) else: - total_bytes_uncompressed = reader["0"].nbytes - msgs.append(f"(Z, Y, X) scale (um):\t {tuple(reader.scale[2:])}") - msgs.append(f"Chunk size:\t\t {reader['0'].chunks}") - msgs.append( - f"No. bytes decompressed:\t\t {total_bytes_uncompressed} " - f"[{sizeof_fmt(total_bytes_uncompressed)}]" + first_array = list(reader._member_names)[0] + total_bytes = reader[first_array].nbytes + space_unit = _get_space_unit(reader.axes) + unit_str = f" ({space_unit})" if space_unit else "" + scale_label = f"(Z, Y, X) scale{unit_str}" + table.add_row(scale_label, str(tuple(reader.scale[2:]))) + table.add_row("Chunk size", str(reader[first_array].chunks)) + table.add_row( + "Bytes (decompressed)", + f"{total_bytes} [{sizeof_fmt(total_bytes)}]", ) + + # Print summary panel + console.print( + Panel(table, title="[bold]Summary[/bold]", border_style="blue") + ) + + # Print usage code (if verbose) if verbose: - msgs.extend( - [ - code_msg, - ">>> from iohub import open_ome_zarr", - f">>> dataset = open_ome_zarr('{path}', mode='r')", - ] - ) - if isinstance(reader, Position): - print("Zarr hierarchy:") - reader.print_tree() - print("\n".join(msgs)) + console.print() + console.print("[dim]This dataset can be opened with:[/dim]") + console.print("[green]>>> from iohub import open_ome_zarr[/green]") + code = f">>> dataset = open_ome_zarr('{path}', mode='r')" + console.print(f"[green]{code}[/green]") + reader.close() diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 9c2feb7c..60576fbd 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -97,13 +97,13 @@ def test_cli_info_ome_zarr(verbose): cmd.append(verbose) result = runner.invoke(cli, cmd) assert result.exit_code == 0 - assert re.search(r"Wells:\s+1", result.output) + assert re.search(r"Wells\s+1", result.output) assert ("Chunk size" in result.output) == bool(verbose) - assert ("No. bytes decompressed" in result.output) == bool(verbose) + assert ("Bytes (decompressed)" in result.output) == bool(verbose) # Test on single position result_pos = runner.invoke(cli, ["info", str(hcs_ref / "B" / "03" / "0")]) assert "Channel names" in result_pos.output - assert "scale (um)" in result_pos.output + assert "scale (µm)" in result_pos.output assert "Chunk size" in result_pos.output assert "84.4 MiB" in result_pos.output diff --git a/tests/ngff/test_remote.py b/tests/ngff/test_remote.py new file mode 100644 index 00000000..ff681b55 --- /dev/null +++ b/tests/ngff/test_remote.py @@ -0,0 +1,122 @@ +"""Tests for remote URL access (HTTP/HTTPS, S3, etc.).""" + +import numpy as np +import pytest + +from iohub.ngff import open_ome_zarr +from iohub.ngff.nodes import _is_remote_url, _open_store + +# Test datasets +WAVEORDER_URL = ( + "https://public.czbiohub.org/comp.micro/neurips_demos/waveorder/20x.zarr/" +) +VISCY_URL = ( + "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/" + "20240204_A549_DENV_ZIKV_timelapse/cropped_registered_test.zarr/" +) + + +class TestRemoteURLDetection: + """Test URL scheme detection.""" + + @pytest.mark.parametrize( + "url,expected", + [ + ("https://example.com/data.zarr", True), + ("http://example.com/data.zarr", True), + ("s3://bucket/data.zarr", True), + ("gs://bucket/data.zarr", True), + ("az://container/data.zarr", True), + ("/local/path/data.zarr", False), + ("relative/path/data.zarr", False), + ("C:\\Windows\\path\\data.zarr", False), + ], + ) + def test_is_remote_url(self, url, expected): + assert _is_remote_url(url) == expected + + +class TestRemoteWriteRestriction: + """Test that write modes are blocked for remote URLs.""" + + @pytest.mark.parametrize("mode", ["w", "w-", "a", "r+"]) + def test_remote_write_not_supported(self, mode): + with pytest.raises(NotImplementedError, match="not supported for remote"): + _open_store("https://example.com/data.zarr", mode=mode, version="0.5") + + +@pytest.mark.network +class TestHTTPAccess: + """Tests requiring network access. Run with: pytest -m network""" + + @pytest.fixture(scope="class") + def waveorder_fov(self): + """v0.5 single FOV (A/1/005028).""" + return open_ome_zarr(WAVEORDER_URL, mode="r") + + @pytest.fixture(scope="class") + def viscy_plate(self): + """v0.4 plate with wells A/3 and B/4, position 9.""" + return open_ome_zarr(VISCY_URL, mode="r") + + # v0.5 tests + def test_v05_plate_metadata(self, waveorder_fov): + """Test v0.5 plate structure and metadata.""" + ome = waveorder_fov.zattrs["ome"] + assert ome["version"] == "0.5" + assert len(ome["plate"]["wells"]) == 1 + assert ome["plate"]["rows"][0]["name"] == "A" + assert ome["plate"]["columns"][0]["name"] == "1" + + def test_v05_hierarchy_navigation(self, waveorder_fov): + """Test navigating plate -> well -> position -> array.""" + well = waveorder_fov["A/1"] + assert "well" in well.zattrs["ome"] + + pos = waveorder_fov["A/1/005028"] + assert "multiscales" in pos.zattrs["ome"] + + arr = pos["0"] + assert arr.shape == (1, 1, 7, 512, 512) + assert arr.dtype == np.uint16 + + def test_v05_read_data(self, waveorder_fov): + """Test reading pixel data.""" + data = waveorder_fov["A/1/005028"].data[0, 0, 0, :5, :5] + assert data.shape == (5, 5) + assert data.sum() > 0 + + # v0.4 tests + def test_v04_plate_metadata(self, viscy_plate): + """Test v0.4 plate structure and metadata.""" + plate_meta = viscy_plate.zattrs["plate"] + assert plate_meta["version"] == "0.4" + assert len(plate_meta["wells"]) == 2 + assert len(plate_meta["rows"]) == 2 + assert len(plate_meta["columns"]) == 2 + + def test_v04_hierarchy_navigation(self, viscy_plate): + """Test navigating plate -> well -> position -> array.""" + well = viscy_plate["A/3"] + assert "well" in well.zattrs + + pos = viscy_plate["A/3/9"] + assert "multiscales" in pos.zattrs + + arr = pos["0"] + assert arr.shape == (48, 4, 74, 1022, 1020) + assert arr.dtype == np.float32 + + def test_v04_multiple_wells(self, viscy_plate): + """Test accessing multiple wells.""" + well_paths = [w["path"] for w in viscy_plate.zattrs["plate"]["wells"]] + assert set(well_paths) == {"A/3", "B/4"} + + assert "well" in viscy_plate["A/3"].zattrs + assert "well" in viscy_plate["B/4"].zattrs + + def test_v04_read_data(self, viscy_plate): + """Test reading pixel data.""" + data = viscy_plate["A/3/9"].data[0, 0, 0, :5, :5] + assert data.shape == (5, 5) + assert data.dtype == np.float32