Created
January 24, 2016 11:30
-
-
Save tonyromarock/5dd16b4076b44cb39072 to your computer and use it in GitHub Desktop.
Converting MNIST data set into grayscale images.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# Creating gray scale BMP images | |
# 28 by 28 images (Total pixels: 784) | |
import struct, array, os | |
from numpy import genfromtxt | |
def main(): | |
# all sizes given in bytes | |
SIZE_HEADER = 14 | |
SIZE_INFOHEADER = 40 | |
SIZE_COLOR_TABLE = 1024 | |
SIZE_PIXEL_TABLE = 784 | |
SIZE_FILE = SIZE_HEADER + SIZE_INFOHEADER + SIZE_COLOR_TABLE + SIZE_PIXEL_TABLE | |
#-----Total: 1862----------# | |
WIDTH = 28 # in pixels | |
# info for the FILE Header | |
fileheader_fmt = "<HIII" | |
infoheader_fmt = "<IiiHHIIiiII" | |
BGRA_fmt = "<BBBB" | |
Pixel_cell_fmt = "<B" | |
# fill file Header | |
fileheader = struct.pack(fileheader_fmt, 19778, SIZE_HEADER, 0, 54) | |
# fill info header | |
BI_RGB = 0 # file uncompressed | |
bitBitCount = 8 # 8 bpp | |
biClrUsed = int(SIZE_COLOR_TABLE / 4) # 256 different colors in the color table | |
infoheader = struct.pack(infoheader_fmt, SIZE_INFOHEADER, 28, -28, 1, bitBitCount, BI_RGB, 784, 0, 0, biClrUsed, 0) | |
"""Fill the color table only with gray values, | |
by setting all BGRA values with equal Blue, Green | |
and Red""" | |
color_table = [] | |
for i in range(0,256): | |
bgra = struct.pack(BGRA_fmt, 255-i, 255-i, 255-i, 0) | |
color_table.append(bgra) | |
# read the image info from 'train.csv' | |
dataset = readCSV() | |
data = dataset[1] | |
labels = dataset[0] | |
mkdir("Images") | |
"""Take the pixel info and fill the pixel table""" | |
for j in range(0,len(data)): | |
pixel_table = [] | |
for i in range(0, 784): | |
pixel = struct.pack(Pixel_cell_fmt, int(data[j][i])) | |
pixel_table.append(pixel) | |
# save pic in the Images folder | |
name ="Images/picNo" + str(j) + "_" + str(int(labels[j])) + ".bmp" | |
save(name , fileheader, infoheader, color_table, pixel_table) | |
def readCSV(): | |
dataset = genfromtxt('train.csv', delimiter=",", dtype=None)[1:] | |
labels = [x[0] for x in dataset] | |
data = [x[1:] for x in dataset] | |
return [labels, data] | |
# to save the bmp files | |
def save(filename, header, infoHeader, colorTable, pixelTable): | |
datafile = open(filename, "wb") | |
datafile.write(header) | |
datafile.write(infoHeader) | |
for color in colorTable: | |
datafile.write(color) | |
for pixel in pixelTable: | |
datafile.write(pixel) | |
datafile.close() | |
# create a dir for the images | |
def mkdir(foldername): | |
if not os.path.exists(foldername): | |
os.makedirs(foldername) | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment