Translate a csv/npy/csv.gz integer file to efficiently compressed HDF5
import sys
import numpy
import h5py
filename = sys.argv[1]
outfilename = filename.replace('.npy', '').replace('.gz', '').replace('.csv', '') + '.h5'
if filename.endswith('.npy'):
print('loading NPY...')
data = numpy.load(filename)
print('loading CSV...')
data = numpy.loadtxt(filename, dtype=int)
print('storing as HDF5...')
with h5py.File(outfilename, 'w') as f:
f.create_dataset('data', data=data, compression='gzip', shuffle=True)
