Skip to content

Instantly share code, notes, and snippets.

@d-v-b
Created November 7, 2023 21:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save d-v-b/f460c7f673819d431cc958a04acbab8a to your computer and use it in GitHub Desktop.
Save d-v-b/f460c7f673819d431cc958a04acbab8a to your computer and use it in GitHub Desktop.
crappy ome-ngff to xarray
from xarray_ome_ngff.registry import get_adapters
import zarr
from typing import Union
import dask.array as da
from xarray import DataArray
import os
def infer_coords(group: zarr.Group, array: zarr.Array):
# these conditionals should be handled by a lower-level validation function
if 'multiscales' in group.attrs:
multiscales = group.attrs['multiscales']
if len(multiscales) > 0:
# note that technically the spec allows multiple references to the same zarr array
# because multiscales is a list
multiscale = multiscales[0]
ngff_version = multiscale.get("version", None)
# get the appropriate Multiscale model depending on the version
if ngff_version == "0.4":
from pydantic_ome_ngff.v04 import Multiscale
elif ngff_version == "0.5-dev":
from pydantic_ome_ngff.latest import Multiscale
else:
raise ValueError(
"Could not resolve the version of the multiscales metadata ",
f"found in the group metadata {dict(group.attrs)}",
)
else:
raise ValueError("Multiscales attribute was empty.")
else:
raise ValueError("Multiscales attribute not found.")
xarray_adapters = get_adapters(ngff_version)
multiscales_meta = [Multiscale(**entry) for entry in multiscales]
transforms = []
axes = []
matched_multiscale, matched_dataset = None, None
# find the correct element in multiscales.datasets for this array
for multi in multiscales_meta:
for dataset in multi.datasets:
if dataset.path == array.basename:
matched_multiscale = multi
matched_dataset = dataset
if matched_dataset is None or matched_multiscale is None:
raise ValueError(
f"""
Could not find an entry referencing array {array.basename}
in the `multiscales` metadata of the parent group.
"""
)
else:
if matched_multiscale.coordinateTransformations is not None:
transforms.extend(matched_multiscale.coordinateTransformations)
transforms.extend(matched_dataset.coordinateTransformations)
axes.extend(matched_multiscale.axes)
coords = xarray_adapters.transforms_to_coords(axes, transforms, array.shape)
return coords
def read_dataarray(group: zarr.Group, array: zarr.Array, use_dask: bool = True, **kwargs) -> DataArray:
coords = infer_coords(group, array)
if use_dask:
data = da.from_array(array, **kwargs)
else:
data = array
return DataArray(data, coords)
def test_read_dataarray():
path = "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr/"
z_group = zarr.open(path, mode='r')
z_array = zarr.open(store=z_group.store, path = '0')
d_array = read_dataarray(z_group, z_array)
print(d_array)
if __name__ == '__main__':
test_read_dataarray()
"""
<xarray.DataArray 'array-0a22e19c51aed195a0b364219bd996aa' (c: 2, z: 236,
y: 275, x: 271)>
dask.array<array, shape=(2, 236, 275, 271), dtype=uint16, chunksize=(2, 236, 275, 271), chunktype=numpy.ndarray>
Coordinates:
* c (c) float64 0.0 1.0
* z (z) float64 0.0 0.5002 1.0 1.501 2.001 ... 116.0 116.5 117.0 117.5
* y (y) float64 0.0 0.3604 0.7208 1.081 ... 97.67 98.03 98.39 98.75
* x (x) float64 0.0 0.3604 0.7208 1.081 ... 96.23 96.59 96.95 97.31
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment