Skip to content

Instantly share code, notes, and snippets.

@akesling
Created April 10, 2013 22:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akesling/5358936 to your computer and use it in GitHub Desktop.
Save akesling/5358936 to your computer and use it in GitHub Desktop.
import os, struct
from array import array
from cvxopt.base import matrix
def read(digits, dataset = "training", path = "."):
"""
Python function for importing the MNIST data set.
"""
if dataset is "training":
fname_img = os.path.join(path, 'train-images-idx3-ubyte')
fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
elif dataset is "testing":
fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
else:
raise ValueError, "dataset must be 'testing' or 'training'"
flbl = open(fname_lbl, 'rb')
magic_nr, size = struct.unpack(">II", flbl.read(8))
lbl = array("b", flbl.read())
flbl.close()
fimg = open(fname_img, 'rb')
magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
img = array("B", fimg.read())
fimg.close()
ind = [ k for k in xrange(size) if lbl[k] in digits ]
images = matrix(0, (len(ind), rows*cols))
labels = matrix(0, (len(ind), 1))
for i in xrange(len(ind)):
images[i, :] = img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]
labels[i] = lbl[ind[i]]
return images, labels
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment