zarr-developers · maxrjones · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -66,11 +66,16 @@ fits = [
     "kerchunk>=0.2.8",
     "astropy",
 ]
+tif = [
+    "obstore>=0.5.1",
+    "async-tiff @ git+https://github.com/developmentseed/async-tiff#subdirectory=python",
+]
 all_readers = [
     "virtualizarr[hdf]",
     "virtualizarr[hdf5]",
     "virtualizarr[netcdf3]",
     "virtualizarr[fits]",
+    "virtualizarr[tif]",
 ]
 
 # writers
@@ -95,7 +100,6 @@ upstream = [
     # optional dependencies
     'astropy @ git+https://github.com/astropy/astropy',
     'fsspec @ git+https://github.com/fsspec/filesystem_spec',
-    's3fs @ git+https://github.com/fsspec/s3fs',
     'kerchunk @ git+https://github.com/fsspec/kerchunk',
     'icechunk @ git+https://github.com/earth-mover/icechunk#subdirectory=icechunk-python',
 ]
@@ -168,6 +172,9 @@ h5netcdf = ">=1.5.0,<2"
 [tool.pixi.feature.icechunk-dev.dependencies]
 rust = "*"
 
+[tool.pixi.feature.rio.dependencies]
+rioxarray = "*"
+
 # Define commands to run within the test environments
 [tool.pixi.feature.test.tasks]
 run-mypy = { cmd = "mypy virtualizarr" }
@@ -181,12 +188,12 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov
 [tool.pixi.environments]
 min-deps = ["dev", "test", "hdf", "hdf5", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
 # Inherit from min-deps to get all the test commands, along with optional dependencies
-test = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore"]
-test-py311 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py311"] # test against python 3.11
-test-py312 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312"] # test against python 3.12
+test = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore",  "tif", "rio"]
+test-py311 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "py311"] # test against python 3.11
+test-py312 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "py312"] # test against python 3.12
 minio = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312", "minio"]
 upstream = ["dev", "test", "hdf", "hdf5", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"]
-all = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "all_readers", "all_writers"]
+all = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore",  "tif", "rio", "all_readers", "all_writers"]
 docs = ["docs"]
 
 # Define commands to run within the docs environment
@@ -222,7 +229,9 @@ module = [
     "minio",
     "numcodecs.*",
     "ujson",
-    "zarr",
+    "zarr.*",
+    "async_tiff.*",
+    "obstore.*",
 ]
 ignore_missing_imports = true
 

diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py
@@ -108,36 +108,29 @@ def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer:
         return dict_to_buffer(metadata, prototype=default_buffer_prototype())
 
 
-def parse_manifest_index(
-    key: str, chunk_key_encoding: str = "."
-) -> tuple[str, tuple[int, ...]]:
+def parse_manifest_index(key: str, chunk_key_encoding: str = ".") -> tuple[int, ...]:
     """
     Splits `key` provided to a zarr store into the variable indicated
     by the first part and the chunk index from the 3rd through last parts,
     which can be used to index into the ndarrays containing paths, offsets,
     and lengths in ManifestArrays.
 
-    Currently only works for 1d+ arrays with a tree depth of one from the
-    root Zarr group.
-
     Parameters
     ----------
     key : str
     chunk_key_encoding : str
 
     Returns
     -------
-    ManifestIndex
+    tuple containing chunk indexes
     """
-    parts = key.split("/")
-    var = parts[0]
-    # Assume "c" is the second part
-    # TODO: Handle scalar array case with "c" holds the data
-    if chunk_key_encoding == "/":
-        indexes = tuple(int(ind) for ind in parts[2:])
-    else:
-        indexes = tuple(int(ind) for ind in parts[2].split(chunk_key_encoding))
-    return var, indexes
+    if key.endswith("c"):
+        # Scalar arrays hold the data in the "c" key
+        return (0,)
+    parts = key.split(
+        "c/"
+    )  # TODO: Open an issue upstream about the Zarr spec indicating this should be f"c{chunk_key_encoding}" rather than always "c/"
+    return tuple(int(ind) for ind in parts[1].split(chunk_key_encoding))
 
 
 def find_matching_store(stores: StoreDict, request_key: str) -> StoreRequest:
@@ -263,13 +256,16 @@ async def get(
 
         if key.endswith("zarr.json"):
             return get_zarr_metadata(self._group, key)
-        var, chunk_key = parse_manifest_index(key)
+        var = key.split("/")[0]
         marr = self._group.arrays[var]
         manifest = marr.manifest
 
-        path = manifest._paths[*chunk_key]
-        offset = manifest._offsets[*chunk_key]
-        length = manifest._lengths[*chunk_key]
+        chunk_indexes = parse_manifest_index(
+            key, marr.metadata.chunk_key_encoding.separator
+        )
+        path = manifest._paths[*chunk_indexes]
+        offset = manifest._offsets[*chunk_indexes]
+        length = manifest._lengths[*chunk_indexes]
         # Get the  configured object store instance that matches the path
         store_request = find_matching_store(stores=self._stores, request_key=path)
         # Transform the input byte range to account for the chunk location in the file

diff --git a/virtualizarr/manifests/utils.py b/virtualizarr/manifests/utils.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 from zarr import Array
+from zarr.core.chunk_key_encodings import ChunkKeyEncodingLike
 from zarr.core.metadata.v3 import ArrayV3Metadata
 
 from virtualizarr.codecs import convert_to_codec_pipeline, get_codecs
@@ -14,9 +15,11 @@ def create_v3_array_metadata(
     shape: tuple[int, ...],
     data_type: np.dtype,
     chunk_shape: tuple[int, ...],
+    chunk_key_encoding: ChunkKeyEncodingLike = {"name": "default"},
     fill_value: Any = None,
     codecs: Optional[list[Dict[str, Any]]] = None,
     attributes: Optional[Dict[str, Any]] = None,
+    dimension_names: Optional[tuple[str, ...]] = None,
 ) -> ArrayV3Metadata:
     """
     Create an ArrayV3Metadata instance with standard configuration.
@@ -30,12 +33,16 @@ def create_v3_array_metadata(
         The numpy dtype of the array
     chunk_shape : tuple[int, ...]
         The shape of each chunk
+    chunk_key_encoding : ChunkKeyEncodingLike
+        The mapping from chunk grid cell coordinates to keys.
     fill_value : Any, optional
         The fill value for the array
     codecs : list[Dict[str, Any]], optional
         List of codec configurations
     attributes : Dict[str, Any], optional
         Additional attributes for the array
+    dimension_names : tuple[str], optional
+        Names of the dimensions
 
     Returns
     -------
@@ -49,14 +56,14 @@ def create_v3_array_metadata(
             "name": "regular",
             "configuration": {"chunk_shape": chunk_shape},
         },
-        chunk_key_encoding={"name": "default"},
+        chunk_key_encoding=chunk_key_encoding,
         fill_value=fill_value,
         codecs=convert_to_codec_pipeline(
             codecs=codecs or [],
             dtype=data_type,
         ),
         attributes=attributes or {},
-        dimension_names=None,
+        dimension_names=dimension_names,
         storage_transformers=None,
     )
 

diff --git a/virtualizarr/readers/common.py b/virtualizarr/readers/common.py
@@ -1,17 +1,35 @@
+import dataclasses
 from collections.abc import Iterable, Mapping
-from typing import (
-    Any,
-    Hashable,
-    MutableMapping,
-    Optional,
-)
+from typing import Any, Hashable, MutableMapping, Optional, TypedDict
 
+import numpy as np
 import xarray as xr
 import xarray.indexes
+from numcodecs.abc import Codec
 
 from virtualizarr.utils import _FsspecFSFromFilepath
 
 
+@dataclasses.dataclass
+class ZstdProperties:
+    level: int
+
+
+@dataclasses.dataclass
+class ShuffleProperties:
+    elementsize: int
+
+
+@dataclasses.dataclass
+class ZlibProperties:
+    level: int
+
+
+class CFCodec(TypedDict):
+    target_dtype: np.dtype
+    codec: Codec
+
+
 def construct_fully_virtual_dataset(
     virtual_vars: Mapping[str, xr.Variable],
     coord_names: Iterable[str] | None = None,

diff --git a/virtualizarr/readers/hdf/filters.py b/virtualizarr/readers/hdf/filters.py
@@ -1,14 +1,20 @@
 from __future__ import annotations
 
 import dataclasses
-from typing import TYPE_CHECKING, List, Tuple, TypedDict, Union
+from typing import TYPE_CHECKING, List, Tuple, Union
 
 import numcodecs.registry as registry
 import numpy as np
 from numcodecs.abc import Codec
 from numcodecs.fixedscaleoffset import FixedScaleOffset
 from xarray.coding.variables import _choose_float_dtype
 
+from virtualizarr.readers.common import (
+    CFCodec,
+    ShuffleProperties,
+    ZlibProperties,
+    ZstdProperties,
+)
 from virtualizarr.utils import soft_import
 
 h5py = soft_import("h5py", "For reading hdf files", strict=False)
@@ -48,26 +54,6 @@ def __post_init__(self):
         self.cname = blosc_compressor_codes[self.cname]
 
 
-@dataclasses.dataclass
-class ZstdProperties:
-    level: int
-
-
-@dataclasses.dataclass
-class ShuffleProperties:
-    elementsize: int
-
-
-@dataclasses.dataclass
-class ZlibProperties:
-    level: int
-
-
-class CFCodec(TypedDict):
-    target_dtype: np.dtype
-    codec: Codec
-
-
 def _filter_to_codec(
     filter_id: str, filter_properties: Union[int, None, Tuple] = None
 ) -> Codec: