Skip to content

Instantly share code, notes, and snippets.

@berceanu
Created March 2, 2021 21:25
Show Gist options
  • Save berceanu/e833cdc71683cdcd85930e634fb0e78d to your computer and use it in GitHub Desktop.
Save berceanu/e833cdc71683cdcd85930e634fb0e78d to your computer and use it in GitHub Desktop.
Example usage of h5py virtual datasets with dask arrays
import dask.array as da
chunks = list()
for job in proj:
h5f = LastH5File(job)
sh = h5py.File(h5f.fpath, "r")[
"/data/91800/particles/electrons/weighting"
].shape[0]
chunks.append(sh)
layout = h5py.VirtualLayout(shape=(sum(chunks),), dtype="<f8")
start, stop = 0, 0
for chunk, job in zip(chunks, proj):
h5f = LastH5File(job)
start, stop = stop, stop + chunk
vsource = h5py.VirtualSource(
h5f.fpath,
"/data/91800/particles/electrons/weighting",
shape=chunk,
dtype="<f8",
)
layout[start:stop] = vsource
# Add virtual dataset to output file
with h5py.File("VDS.h5", "w", libver="latest") as f:
f.create_virtual_dataset("vdata", layout, fillvalue=0.0)
# read data back
# virtual dataset is transparent for reader!
with h5py.File("VDS.h5", "r+") as f:
print("Virtual dataset:")
d = f["vdata"]
x = da.from_array(d, chunks=(4096,))
s = x.sum()
print(s.compute())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment