Last active
October 14, 2019 05:49
-
-
Save ivirshup/247ea6fabd3935e12d41551b7252a2f8 to your computer and use it in GitHub Desktop.
hdf5 cache performance testing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3.7.4 (default, Sep 7 2019, 18:27:02) | |
Type 'copyright', 'credits' or 'license' for more information | |
IPython 7.8.0 -- An enhanced Interactive Python. Type '?' for help. | |
In [1]: import h5py | |
In [2]: import numpy as np | |
In [3]: indices = np.sort(np.random.choice(int(1e7), int(1e4), replace=False)) | |
In [4]: f = h5py.File("test.h5", "r") | |
...: dset = f["x"] | |
# helper functions | |
In [5]: def read_dset_by_indices(dset, indices): | |
...: for i in range(len(indices) - 1): | |
...: s = slice(indices[i], indices[i+1]) | |
...: dset[s] | |
...: | |
In [6]: def read_dset_by_chunks(dset): | |
...: cs = dset.chunks[0] | |
...: ts = dset.shape[0] | |
...: slice_gen = (slice(i*cs, min((i+1)*cs, ts)) for i in range(ts // cs + 1)) | |
...: for s in slice_gen: | |
...: dset[s] | |
...: | |
# File read with default settings | |
In [7]: %%timeit !sync && sudo purge | |
...: read_dset_by_indices(dset, indices) | |
...: | |
...: | |
1.23 s ± 12.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [8]: | |
In [8]: %%timeit | |
...: read_dset_by_indices(dset, indices) | |
...: | |
...: | |
1.13 s ± 19.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [9]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
221 ms ± 14.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [10]: %%timeit | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
141 ms ± 2.49 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [11]: f.close() | |
# Using a large cache | |
In [12]: f = h5py.File("test.h5", "r", rdcc_nbytes=100 * (1024 ** 2), rdcc_nslots=50000, rdcc_w0=.5) | |
...: dset = f["x"] | |
In [13]: %%timeit !sync && sudo purge | |
...: read_dset_by_indices(dset, indices) | |
...: | |
...: | |
1.22 s ± 34.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [14]: %%timeit | |
...: read_dset_by_indices(dset, indices) | |
...: | |
...: | |
1.12 s ± 18.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [15]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
131 ms ± 2.01 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [16]: %%timeit | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
123 ms ± 502 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [17]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
130 ms ± 1.38 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [18]: f.close() | |
# Using no cache | |
In [19]: f = h5py.File("test.h5", "r", rdcc_nbytes=0) | |
...: dset = f["x"] | |
In [20]: %%timeit !sync && sudo purge | |
...: read_dset_by_indices(dset, indices) | |
...: | |
...: | |
1.29 s ± 7.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [21]: %%timeit | |
...: read_dset_by_indices(dset, indices) | |
...: | |
...: | |
1.2 s ± 41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [22]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
145 ms ± 1.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [23]: %%timeit | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
135 ms ± 2.63 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
# Couple tests of functions | |
In [26]: f.close() | |
# Larger cache | |
In [27]: f = h5py.File("test.h5", "r", rdcc_nbytes=(1024 ** 3), rdcc_nslots=50000, rdcc_w0=.5) | |
...: dset = f["x"] | |
In [28]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
155 ms ± 6.25 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [29]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
132 ms ± 3.05 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [30]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
131 ms ± 1.89 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [31]: %%timeit | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
124 ms ± 1.94 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [32]: %%timeit | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
121 ms ± 738 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [33]: f.close() | |
In [34]: f = h5py.File("test.h5", "r", rdcc_nbytes=0) | |
...: dset = f["x"] | |
In [35]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
197 ms ± 4.44 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [36]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
143 ms ± 972 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [37]: f.close() | |
In [38]: f = h5py.File("test.h5", "r") | |
...: dset = f["x"] | |
In [39]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
216 ms ± 11.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) | |
In [40]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
153 ms ± 1.83 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | |
In [41]: %%timeit !sync && sudo purge | |
...: read_dset_by_chunks(dset) | |
...: | |
...: | |
213 ms ± 7.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment