Chunked HDF vs Memmapped Array I/O Comparison: http://stackoverflow.com/questions/27710245/is-there-an-analysis-speed-or-memory-usage-advantage-to-using-hdf5-for-large-arr/27713489#27713489
import sys | |
import h5py | |
def main(): | |
data = read() | |
if sys.argv[1] == 'x': | |
x_slice(data) | |
elif sys.argv[1] == 'z': | |
z_slice(data) | |
def read(): | |
f = h5py.File('/tmp/test.hdf5', 'r') | |
return f['seismic_volume'] | |
def z_slice(data): | |
return data[:,:,0] | |
def x_slice(data): | |
return data[0,:,:] | |
main() |
import numpy as np | |
import sys | |
def main(): | |
data = read() | |
if sys.argv[1] == 'x': | |
x_slice(data) | |
elif sys.argv[1] == 'z': | |
z_slice(data) | |
def read(): | |
big_binary_filename = '/data/nankai/data/Volumes/kumdep01_flipY.3dv.vol' | |
shape = 621, 4991, 2600 | |
header_len = 3072 | |
data = np.memmap(filename=big_binary_filename, mode='r', offset=header_len, | |
order='F', shape=shape, dtype=np.uint8) | |
return data | |
def z_slice(data): | |
dat = np.empty(data.shape[:2], dtype=data.dtype) | |
dat[:] = data[:,:,0] | |
return dat | |
def x_slice(data): | |
dat = np.empty(data.shape[1:], dtype=data.dtype) | |
dat[:] = data[0,:,:] | |
return dat | |
main() |
import geoprobe | |
import h5py | |
# I'm using a library here, but "vol.data" is just a memmapped array. | |
vol = geoprobe.volume('/data/nankai/data/Volumes/kumdep01_flipY.3dv.vol') | |
with h5py.File('/tmp/test.hdf5', 'w') as f: | |
dset = f.create_dataset('seismic_volume', shape=vol.data.shape, | |
dtype=vol.data.dtype, chunks=True) | |
# Can't fit the entire file in memory... | |
for i in range(vol.nz): | |
dset[:,:,i] = vol.data[:,:,i] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment