Skip to content

Instantly share code, notes, and snippets.

@tonyromarock
Created January 24, 2016 11:30
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save tonyromarock/5dd16b4076b44cb39072 to your computer and use it in GitHub Desktop.
Converting MNIST data set into grayscale images.
#! /usr/bin/python
# Creating gray scale BMP images
# 28 by 28 images (Total pixels: 784)
import struct, array, os
from numpy import genfromtxt
def main():
# all sizes given in bytes
SIZE_HEADER = 14
SIZE_INFOHEADER = 40
SIZE_COLOR_TABLE = 1024
SIZE_PIXEL_TABLE = 784
SIZE_FILE = SIZE_HEADER + SIZE_INFOHEADER + SIZE_COLOR_TABLE + SIZE_PIXEL_TABLE
#-----Total: 1862----------#
WIDTH = 28 # in pixels
# info for the FILE Header
fileheader_fmt = "<HIII"
infoheader_fmt = "<IiiHHIIiiII"
BGRA_fmt = "<BBBB"
Pixel_cell_fmt = "<B"
# fill file Header
fileheader = struct.pack(fileheader_fmt, 19778, SIZE_HEADER, 0, 54)
# fill info header
BI_RGB = 0 # file uncompressed
bitBitCount = 8 # 8 bpp
biClrUsed = int(SIZE_COLOR_TABLE / 4) # 256 different colors in the color table
infoheader = struct.pack(infoheader_fmt, SIZE_INFOHEADER, 28, -28, 1, bitBitCount, BI_RGB, 784, 0, 0, biClrUsed, 0)
"""Fill the color table only with gray values,
by setting all BGRA values with equal Blue, Green
and Red"""
color_table = []
for i in range(0,256):
bgra = struct.pack(BGRA_fmt, 255-i, 255-i, 255-i, 0)
color_table.append(bgra)
# read the image info from 'train.csv'
dataset = readCSV()
data = dataset[1]
labels = dataset[0]
mkdir("Images")
"""Take the pixel info and fill the pixel table"""
for j in range(0,len(data)):
pixel_table = []
for i in range(0, 784):
pixel = struct.pack(Pixel_cell_fmt, int(data[j][i]))
pixel_table.append(pixel)
# save pic in the Images folder
name ="Images/picNo" + str(j) + "_" + str(int(labels[j])) + ".bmp"
save(name , fileheader, infoheader, color_table, pixel_table)
def readCSV():
dataset = genfromtxt('train.csv', delimiter=",", dtype=None)[1:]
labels = [x[0] for x in dataset]
data = [x[1:] for x in dataset]
return [labels, data]
# to save the bmp files
def save(filename, header, infoHeader, colorTable, pixelTable):
datafile = open(filename, "wb")
datafile.write(header)
datafile.write(infoHeader)
for color in colorTable:
datafile.write(color)
for pixel in pixelTable:
datafile.write(pixel)
datafile.close()
# create a dir for the images
def mkdir(foldername):
if not os.path.exists(foldername):
os.makedirs(foldername)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment