Created
January 1, 2015 17:47
-
-
Save joferkington/77edf001b8c699a14e06 to your computer and use it in GitHub Desktop.
Chunked HDF vs Memmapped Array I/O Comparison: http://stackoverflow.com/questions/27710245/is-there-an-analysis-speed-or-memory-usage-advantage-to-using-hdf5-for-large-arr/27713489#27713489
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import h5py | |
def main(): | |
data = read() | |
if sys.argv[1] == 'x': | |
x_slice(data) | |
elif sys.argv[1] == 'z': | |
z_slice(data) | |
def read(): | |
f = h5py.File('/tmp/test.hdf5', 'r') | |
return f['seismic_volume'] | |
def z_slice(data): | |
return data[:,:,0] | |
def x_slice(data): | |
return data[0,:,:] | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import sys | |
def main(): | |
data = read() | |
if sys.argv[1] == 'x': | |
x_slice(data) | |
elif sys.argv[1] == 'z': | |
z_slice(data) | |
def read(): | |
big_binary_filename = '/data/nankai/data/Volumes/kumdep01_flipY.3dv.vol' | |
shape = 621, 4991, 2600 | |
header_len = 3072 | |
data = np.memmap(filename=big_binary_filename, mode='r', offset=header_len, | |
order='F', shape=shape, dtype=np.uint8) | |
return data | |
def z_slice(data): | |
dat = np.empty(data.shape[:2], dtype=data.dtype) | |
dat[:] = data[:,:,0] | |
return dat | |
def x_slice(data): | |
dat = np.empty(data.shape[1:], dtype=data.dtype) | |
dat[:] = data[0,:,:] | |
return dat | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import geoprobe | |
import h5py | |
# I'm using a library here, but "vol.data" is just a memmapped array. | |
vol = geoprobe.volume('/data/nankai/data/Volumes/kumdep01_flipY.3dv.vol') | |
with h5py.File('/tmp/test.hdf5', 'w') as f: | |
dset = f.create_dataset('seismic_volume', shape=vol.data.shape, | |
dtype=vol.data.dtype, chunks=True) | |
# Can't fit the entire file in memory... | |
for i in range(vol.nz): | |
dset[:,:,i] = vol.data[:,:,i] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment