Skip to content

Instantly share code, notes, and snippets.

@d70-t
Last active May 17, 2022 15:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save d70-t/52bc0ecfa0d8bffec3c0da620b03891f to your computer and use it in GitHub Desktop.
Save d70-t/52bc0ecfa0d8bffec3c0da620b03891f to your computer and use it in GitHub Desktop.
renumber shards
import re
from typing import Dict, Optional, Sequence
import zarr.storage
class RenumberShardsStore(zarr.storage.Store):
def __init__(
self,
base: zarr.storage.BaseStore,
shards: Dict[str, Sequence[int]],
dimension_separator: Optional[str] = None,
):
self.base = base
self.shards = shards
self.dimension_separator = dimension_separator or "."
self.shard_matchers = [
(
re.compile(re.escape(k + "/") + self._chunk_re(len(v))),
re.compile(re.escape(k + "/") + self._chunk_re(len(v)) + re.escape("/") + self._chunk_re(len(v))),
k,
v,
) for k, v in self.shards.items()]
def _chunk_re(self, n):
return self.dimension_separator.join(["([0-9]+)"] * n)
def _renumber_key(self, key):
for shard_re, _, k, shard_shape in self.shard_matchers:
if m := shard_re.match(key):
chunk_index = [int(s) for s in m.groups()]
shard_index, subchunk_index = zip(*((c // s, c % s) for c, s in zip(chunk_index, shard_shape)))
return k + "/" + self.dimension_separator.join(map(str, shard_index)) + "/" + self.dimension_separator.join(map(str, subchunk_index))
return key
def _inverse_renumber_key(self, key):
for _, shard_re, k, shard_shape in self.shard_matchers:
if m := shard_re.match(key):
indices = [int(s) for s in m.groups()]
shard_index, subchunk_index = indices[:len(shard_shape)], indices[len(shard_shape):]
chunk_index = [s * ss + c for ss, s, c in zip(shard_shape, shard_index, subchunk_index)]
return k + "/" + self.dimension_separator.join(map(str, chunk_index))
return key
def __delitem__(self, key):
del self.base[self._renumber_key(key)]
def __getitem__(self, key):
return self.base[self._renumber_key(key)]
def __setitem__(self, key, value):
self.base[self._renumber_key(key)] = value
def __iter__(self):
for k in self.base:
yield self._inverse_renumber_key(k)
def __len__(self):
return len(self.base)
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment