Skip to content

Instantly share code, notes, and snippets.

@johnbaums
Last active August 29, 2015 14:08
Show Gist options
  • Save johnbaums/882ad1e458e13b96a3d1 to your computer and use it in GitHub Desktop.
Save johnbaums/882ad1e458e13b96a3d1 to your computer and use it in GitHub Desktop.
Download and import MNIST handwritten numeral datasets
get.mnist <- function(dir=NULL) {
# dir: the path containing the extracted files:
# train-images-idx3-ubyte
# train-labels-idx1-ubyte
# t10k-images-idx3-ubyte
# t10k-labels-idx1-ubyte
if(is.null(dir)) {
require(R.utils)
dir <- tempdir()
u <- c('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz')
sapply(u, function(x) {
download.file(x, f <- file.path(dir, basename(x)))
gunzip(f)
})
}
read <- function(type) {
img <- file(file.path(dir, sprintf('%s-images-idx3-ubyte', type)), 'rb')
hdr <- readBin(img, 'integer', n=4, endian="big")
X <- t(sapply(seq_len(hdr[2]), function(ii) {
readBin(img, 'integer', size=1, n=prod(hdr[3:4]), endian="big")
}))
close(img)
lab <- file(file.path(dir, sprintf('%s-labels-idx1-ubyte', type)), 'rb')
readBin(lab, "integer", n = 1, size = 4, endian = "big")
n <- readBin(lab, integer(), n=1, size=4, endian="big")
y <- readBin(lab, integer(), n=n, size=1, signed=FALSE)
close(lab)
cbind.data.frame(y=factor(y), X)
}
train <- read('train')
test <- read('t10k')
list(train=train, test=test)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment