Skip to content

Instantly share code, notes, and snippets.

@veeresht
Created July 21, 2016 23:38
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save veeresht/7bf499ee6d81938f8bbdb3c6ef1855bf to your computer and use it in GitHub Desktop.
Save veeresht/7bf499ee6d81938f8bbdb3c6ef1855bf to your computer and use it in GitHub Desktop.
Read SVHN Dataset mat (version 7.3) files using h5py and numpy
import numpy as np
import h5py
def read_process_h5(filename):
""" Reads and processes the mat files provided in the SVHN dataset.
Input: filename
Ouptut: list of python dictionaries
"""
f = h5py.File(filename, 'r')
groups = f['digitStruct'].items()
bbox_ds = np.array(groups[0][1]).squeeze()
names_ds = np.array(groups[1][1]).squeeze()
data_list = []
num_files = bbox_ds.shape[0]
count = 0
for objref1, objref2 in zip(bbox_ds, names_ds):
data_dict = {}
# Extract image name
names_ds = np.array(f[objref2]).squeeze()
filename = ''.join(chr(x) for x in names_ds)
data_dict['filename'] = filename
#print filename
# Extract other properties
items1 = f[objref1].items()
# Extract image label
labels_ds = np.array(items1[1][1]).squeeze()
try:
label_vals = [int(f[ref][:][0, 0]) for ref in labels_ds]
except TypeError:
label_vals = [labels_ds]
data_dict['labels'] = label_vals
data_dict['length'] = len(label_vals)
# Extract image height
height_ds = np.array(items1[0][1]).squeeze()
try:
height_vals = [f[ref][:][0, 0] for ref in height_ds]
except TypeError:
height_vals = [height_ds]
data_dict['height'] = height_vals
# Extract image left coords
left_ds = np.array(items1[2][1]).squeeze()
try:
left_vals = [f[ref][:][0, 0] for ref in left_ds]
except TypeError:
left_vals = [left_ds]
data_dict['left'] = left_vals
# Extract image top coords
top_ds = np.array(items1[3][1]).squeeze()
try:
top_vals = [f[ref][:][0, 0] for ref in top_ds]
except TypeError:
top_vals = [top_ds]
data_dict['top'] = top_vals
# Extract image width
width_ds = np.array(items1[4][1]).squeeze()
try:
width_vals = [f[ref][:][0, 0] for ref in width_ds]
except TypeError:
width_vals = [width_ds]
data_dict['width'] = width_vals
data_list.append(data_dict)
count += 1
print 'Processed: %d/%d' % (count, num_files)
return data_list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment