Skip to content

Instantly share code, notes, and snippets.

@S0ngyuLi
Created February 16, 2018 05:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save S0ngyuLi/1a443d94f207053e93fea4fb72d3096b to your computer and use it in GitHub Desktop.
Save S0ngyuLi/1a443d94f207053e93fea4fb72d3096b to your computer and use it in GitHub Desktop.
labels <- read.table("batches.meta.txt")
images.rgb <- list()
images.lab <- list()
num.images = 10000 # Set to 10000 to retrieve all images per file to memory
# Cycle through all 5 binary files
for (f in 1:5) {
to.read <- file(paste("data_batch_", f, ".bin", sep=""), "rb")
for(i in 1:num.images) {
l <- readBin(to.read, integer(), size=1, n=1, endian="big")
r <- as.integer(readBin(to.read, raw(), size=1, n=1024, endian="big"))
g <- as.integer(readBin(to.read, raw(), size=1, n=1024, endian="big"))
b <- as.integer(readBin(to.read, raw(), size=1, n=1024, endian="big"))
index <- num.images * (f-1) + i
images.rgb[[index]] = data.frame(r, g, b)
images.lab[[index]] = l+1
}
close(to.read)
remove(l,r,g,b,f,i,index, to.read)
}
# function to run sanity check on photos & labels import
drawImage <- function(index) {
# Testing the parsing: Convert each color layer into a matrix,
# combine into an rgb object, and display as a plot
img <- images.rgb[[index]]
img.r.mat <- matrix(img$r, ncol=32, byrow = TRUE)
img.g.mat <- matrix(img$g, ncol=32, byrow = TRUE)
img.b.mat <- matrix(img$b, ncol=32, byrow = TRUE)
img.col.mat <- rgb(img.r.mat, img.g.mat, img.b.mat, maxColorValue = 255)
dim(img.col.mat) <- dim(img.r.mat)
# Plot and output label
library(grid)
grid.raster(img.col.mat, interpolate=FALSE)
# clean up
remove(img, img.r.mat, img.g.mat, img.b.mat, img.col.mat)
labels[[1]][images.lab[[index]]]
}
draw_image_by_rgb <- function(rgb) {
img <- matrix(rgb, nrow<-1024, ncol<-3)
img.r.mat <- matrix(img[,1], ncol=32, byrow = TRUE)
img.g.mat <- matrix(img[,2], ncol=32, byrow = TRUE)
img.b.mat <- matrix(img[,3], ncol=32, byrow = TRUE)
img.col.mat <- rgb(img.r.mat, img.g.mat, img.b.mat, maxColorValue = 255)
dim(img.col.mat) <- dim(img.r.mat)
# Plot and output label
library(grid)
grid.raster(img.col.mat, interpolate=FALSE)
# clean up
remove(img, img.r.mat, img.g.mat, img.b.mat, img.col.mat)
}
# drawImage(sample(1:(num.images*5), size=1))
bird_indices = which(images.lab == 3)
birds <- matrix(, nrow=5000, ncol=3072)
for (i in 1:5000) {
birds[i,] <- as.vector(t(images.rgb[[i]]))
}
birds_df <- data.frame(birds)
pca_result <- princomp(birds_df)
pca_20 <- pca_result$loadings[, 1:20]
pca_20 <- t(pca_20)
p <- data.frame(pca_20)
names(p) <- c(1:3072)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment