Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save eschalkargans/6c8708370ad6b7b58eebe95aa95084ab to your computer and use it in GitHub Desktop.
Save eschalkargans/6c8708370ad6b7b58eebe95aa95084ab to your computer and use it in GitHub Desktop.
resilience-towards-missing-array-dimensions-zarr-attribute.py
# %%
from pathlib import Path
import json
import numpy as np
import xarray as xr
from datatree import DataTree, open_datatree
# %%
def load_json(path: Path) -> dict:
with open(path, encoding="utf-8") as fp:
return json.load(fp)
# %% [markdown]
# ## Data Creation
#
# %%
xda = xr.DataArray(
np.arange(3 * 18).reshape(3, 18),
coords={"label": list("abc"), "z": list(range(18))},
)
xda = xda.chunk({"label": 2, "z": 4})
xda
# %%
xdt = DataTree(xr.Dataset({"my_xda": xda}))
xdt.my_xda
# %% [markdown]
# ## Data Writing
# %%
zarr_path = Path() / "../generated/my_array.zarr"
xdt.to_zarr(zarr_path)
print(zarr_path.resolve())
# %% [markdown]
# ## Data Initial Reading
# %%
open_datatree(zarr_path, engine="zarr").my_xda
# %% [markdown]
# ## Data Alteration
# %%
# corrupt the z's variable `_ARRAY_DIMENSIONS` xarray's attribute
zattrs_path = zarr_path / 'z/.zattrs'
assert zattrs_path.is_file()
zattrs_path.write_text('{}')
# Note: it has no impact, only the root .zmetdata seems to be used
# %%
open_datatree(zarr_path, engine="zarr")
# %%
zattrs_path = zarr_path / 'label/.zattrs'
assert zattrs_path.is_file()
zattrs_path.write_text('{}')
# %%
open_datatree(zarr_path, engine="zarr")
# %%
zattrs_path = zarr_path / 'my_xda/.zattrs'
assert zattrs_path.is_file()
zattrs_path.write_text('{}')
# %%
open_datatree(zarr_path, engine="zarr")
# %%
zmetadata_path = zarr_path / '.zmetadata'
assert zmetadata_path.is_file()
zmetadata=load_json(zmetadata_path)
zmetadata['metadata']['z/.zattrs'] = {}
zmetadata_path.write_text(json.dumps(zmetadata, indent=4))
# %%
open_datatree(zarr_path, engine="zarr")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment