Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
H5Dget_chunk_info benchmark via h5py
import sys
from timeit import default_timer as timer
import numpy
import h5py
N = int( sys.argv[1] )
B = int( sys.argv[2] )
print("h5py:",h5py.version.version,
"hdf5:",h5py.version.hdf5_version)
print("Number of 4 kB chunks",N,"printing every",B,"chunkinfos read")
print("Write data:")
begin = timer()
with h5py.File('demo.h5','w') as h:
d = h.create_dataset( name = 'data',
dtype= numpy.int32,
shape = (N, 1024),
chunks = (1, 1024),
compression='gzip',
compression_opts = 1 )
for i in range(N):
d[i,:] = i
nbytes = d.size * d.dtype.itemsize
del d
print("Writing time %.6f/s for %.2f MB"%( timer() - begin ,
nbytes*(2**(-20))))
begin = timer()
with h5py.File("demo.h5","r") as h:
print("Read the full dataset:")
begin = timer()
data = h['data'][:]
print("Time to read + decompress full data %.6f /s"%( timer() - begin ))
print("Now reading offsets")
print("Ninfos time cumulative_time")
dsid = h['data'].id
nread = 0
begin = start = timer()
for i in range(0,data.shape[0],B):
for j in range(i,min(i+B,data.shape[0])):
chunk_info = dsid.get_chunk_info( j )
nread += 1
now = timer()
print("%7d %.6f /s %.6f /s"% ( nread, now-start, now-begin ) )
start = now
@jonwright
Copy link
Author

jonwright commented Oct 23, 2021

Output on a pinebook pro for a small dataset:

h5py: 3.3.0 hdf5: 1.12.1
Number of 4 kB chunks 10000 printing every 2000 chunkinfos read
Write data:
Writing time 6.909123/s for 39.06 MB
Read the full dataset:
Time to read + decompress full data 0.495148 /s
Now reading offsets
Ninfos    time          cumulative_time
   2000   1.824879 /s   1.824879 /s
   4000   2.119140 /s   3.944019 /s
   6000   2.485861 /s   6.429879 /s
   8000   2.751050 /s   9.180929 /s
  10000   3.053058 /s   12.233987 /s

@jonwright
Copy link
Author

jonwright commented Oct 23, 2021

Output from a Xeon server (single core)

h5py: 3.2.1 hdf5: 1.10.6
Number of 4 kB chunks 50000 printing every 10000 chunkinfos read
Write data:
Writing time 5.385004/s for 195.31 MB
Read the full dataset:
Time to read + decompress full data 0.665111 /s
Now reading offsets
Ninfos    time          cumulative_time
  10000   0.769757 /s   0.769757 /s
  20000   2.314878 /s   3.084635 /s
  30000   4.178580 /s   7.263215 /s
  40000   5.778362 /s   13.041577 /s
  50000   7.273826 /s   20.315402 /s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment