Skip to content

Instantly share code, notes, and snippets.

@takluyver
Created October 19, 2018 13:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takluyver/0480a74881d84678f48b92c021129cd6 to your computer and use it in GitHub Desktop.
Save takluyver/0480a74881d84678f48b92c021129cd6 to your computer and use it in GitHub Desktop.
Investigating h5py chunked read performance
import h5py
from h5py import h5s, h5t
import h5py._hl.selections as sel
import numpy as np
import time
#data = np.random.randn(500, 512, 512)
#with h5py.File('datacube_chunked.h5', 'w') as fid:
# fid.create_dataset('cube', data=data, chunks=(500, 64, 64))
#with h5py.File('datacube.h5', 'w') as fid:
# fid.create_dataset('cube', data=data)
start_time = time.time()
s = sel.select((500, 512, 512), (slice(None), slice(0, 256), slice(0, 256)), None)
mspace = h5s.create_simple(s.mshape)
mtype = h5t.py_create(np.float64)
print("Preparing:", time.time() - start_time)
start_time = time.time()
a = np.empty((500, 256, 256))
with h5py.File('datacube.h5', 'r') as fid:
ds = fid["cube"]
ds.id.read(mspace, s.id, a, mtype, dxpl=ds._dxpl)
# a = fid["cube"][:,0:256,0:256]
print("Reading contiguous:", time.time() - start_time)
start_time = time.time()
b = np.empty((500, 256, 256))
with h5py.File('datacube_chunked.h5', 'r') as fid:
ds = fid["cube"]
ds.id.read(mspace, s.id, a, mtype, dxpl=ds._dxpl)
# b = fid["cube"][:,0:256,0:256]
print("Reading chunked:", time.time() - start_time)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment