Skip to content

Instantly share code, notes, and snippets.

@jfsantos
Created April 15, 2015 20:00
Show Gist options
  • Save jfsantos/14ae9631716a2aa328c4 to your computer and use it in GitHub Desktop.
Save jfsantos/14ae9631716a2aa328c4 to your computer and use it in GitHub Desktop.
from collections import defaultdict
import h5py, numpy
class HDF5Matrix:
refs = defaultdict(int)
def __init__(self, datapath, dataset, start, end, normalizer=None):
if datapath not in self.refs.keys():
f = h5py.File(datapath)
self.refs[datapath] = f
else:
f = self.refs[datapath]
self.start = start
self.end = end
self.data = f[dataset]
self.normalizer = normalizer
def __len__(self):
return self.end - self.start
def __getitem__(self, key):
if isinstance(key, slice):
if key.stop + self.start <= self.end:
idx = slice(key.start+self.start, key.stop + self.start)
else:
raise IndexError
elif isinstance(key, int):
if key + self.start < self.end:
idx = key+self.start
else:
raise IndexError
elif isinstance(key, numpy.ndarray):
if numpy.max(key) + self.start < self.end:
idx = (self.start + key).tolist()
else:
raise IndexError
elif isinstance(key, list):
if max(key) + self.start < self.end:
idx = map(lambda x: x + self.start, key)
if self.normalizer is not None:
return self.normalizer(self.data[idx])
else:
return self.data[idx]
@property
def shape(self):
return tuple([self.end - self.start, self.data.shape[1]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment