Skip to content

Instantly share code, notes, and snippets.

@ivirshup
Last active March 2, 2023 18:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivirshup/7ab80c4f1c18461c34093b5c5ace5338 to your computer and use it in GitHub Desktop.
Save ivirshup/7ab80c4f1c18461c34093b5c5ace5338 to your computer and use it in GitHub Desktop.
h5py virtual dataset with uneven chunk size
# Docs: https://docs.h5py.org/en/stable/vds.html
import h5py
import numpy as np
def virtual_concatenate(datasets: list[h5py.Dataset]) -> h5py.VirtualLayout:
"""Concatenate datasets along the first axis."""
vds = h5py.VirtualLayout(shape=sum(d.shape[0] for d in datasets), dtype=datasets[0].dtype)
offset = 0
for d in datasets:
source = h5py.VirtualSource(d)
vds[offset:offset+d.shape[0]] = source
offset += d.shape[0]
return vds
with h5py.File("test_virtual.h5", "w") as f
a = f.create_dataset("a", data=np.arange(3), chunks=None)
b = f.create_dataset("b", data=np.arange(3, 4), chunks=None)
c = f.create_dataset("c", data=np.arange(4, 10), chunks=None)
expected = np.concatenate([a[:], b[:], c[:]])
result = f.create_virtual_dataset("combined", virtual_concatenate([a, b, c]))
np.testing.assert_equal(expected, result[:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment