Created
March 2, 2021 21:25
-
-
Save berceanu/e833cdc71683cdcd85930e634fb0e78d to your computer and use it in GitHub Desktop.
Example usage of h5py virtual datasets with dask arrays
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dask.array as da | |
chunks = list() | |
for job in proj: | |
h5f = LastH5File(job) | |
sh = h5py.File(h5f.fpath, "r")[ | |
"/data/91800/particles/electrons/weighting" | |
].shape[0] | |
chunks.append(sh) | |
layout = h5py.VirtualLayout(shape=(sum(chunks),), dtype="<f8") | |
start, stop = 0, 0 | |
for chunk, job in zip(chunks, proj): | |
h5f = LastH5File(job) | |
start, stop = stop, stop + chunk | |
vsource = h5py.VirtualSource( | |
h5f.fpath, | |
"/data/91800/particles/electrons/weighting", | |
shape=chunk, | |
dtype="<f8", | |
) | |
layout[start:stop] = vsource | |
# Add virtual dataset to output file | |
with h5py.File("VDS.h5", "w", libver="latest") as f: | |
f.create_virtual_dataset("vdata", layout, fillvalue=0.0) | |
# read data back | |
# virtual dataset is transparent for reader! | |
with h5py.File("VDS.h5", "r+") as f: | |
print("Virtual dataset:") | |
d = f["vdata"] | |
x = da.from_array(d, chunks=(4096,)) | |
s = x.sum() | |
print(s.compute()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment