Skip to content

Instantly share code, notes, and snippets.

@BSVogler
Created December 15, 2016 17:27
Show Gist options
  • Save BSVogler/eba818bd36fc46b791cf777f3e568ba9 to your computer and use it in GitHub Desktop.
Save BSVogler/eba818bd36fc46b791cf777f3e568ba9 to your computer and use it in GitHub Desktop.
Tensorflow cat vs dog classifier
#!/usr/bin/env python3
import numpy as np
import pickle
import os
import sys
from PIL import Image
def create_var(name, shape, stddev, wd):
dtype = tf.float32
var = tf.get_variable(name, shape, initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=dtype), dtype=tf.float32)
return var
IMAGE_PATH = "cats_vs_dogs/train/"
DATA_PATH = "cats_vs_dogs/data/"
BATCH_SIZE = 1
def getDim():
global IMAGE_PATH
dimensionFileName = "dimension"
if not os.path.isfile(dimensionFileName):
print("creating new file")
maxwidth = 0
maxheight = 0
i = 0
for file in os.listdir(IMAGE_PATH):
if i % 100 == 0:
print("processing ", i)
img = Image.open(IMAGE_PATH+file)
width, height = img.size
maxwidth = max(maxwidth, width)
maxheight = max(maxheight, height)
img.close()
i = i + 1
with open(dimensionFileName, "w") as file:
file.write(str(maxheight) + "\n" + str(maxwidth) + "\n")
else:
print("reading dimension file" + dimensionFileName)
with open(dimensionFileName, "r") as file:
maxheight = int(file.readline())
maxwidth = int(file.readline())
return (maxheight,maxwidth)
#late import for faster feedback if something fails
import tensorflow as tf
def build_Y_Vec():
Y = []
for file in sorted(os.listdir(IMAGE_PATH)):
if file.startswith("cat") and file.endswith("jpg"):
Y.append([0,1])
elif file.startswith("dog") and file.endswith("jpg"):
Y.append([1,0])
return np.array(Y)
if __name__ == '__main__':
maxwidth, maxheight = getDim()
graph = tf.Graph()
with graph.as_default():
#build graph
X = tf.placeholder("float", [BATCH_SIZE, maxheight, maxwidth, 3])
Y = tf.placeholder("float", [BATCH_SIZE, 2]) # cat or dog
# conv-layer 1 ###############################################################
# convolution stage 1
with tf.variable_scope('conv-stage-1') as scope:
kernel = create_var('weights', shape=[5, 5, 3, 64], stddev=5e-2, wd=0.0)#use 64 kernels with 5*5*3 Dimension
conv = tf.nn.conv2d(X, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.get_variable(name='biases', shape=[64], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)
# pooling stage 1
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool-stage-1')
# normalizing stage 1
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm-stage-1')
# conv-layer 2 ###############################################################
# convolution stage 2
with tf.variable_scope('conv-stage-2') as scope:
kernel = create_var('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.get_variable(name='biases', shape=[64], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name=scope.name)
# normalizing stage 2, why this order (normalization first, then pooling)
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm-stage-2')
# pooling stage 2
# ksize = [imageIndex, X, Y, Depth]; strides=[imageIndex, X, Y, Depth]
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool-stage-2')
with tf.variable_scope('conv-stage-3') as scope:
kernel = create_var('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0)
conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.get_variable(name='biases', shape=[64], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
pre_activation = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(pre_activation, name=scope.name)
# normalizing stage 2, why this order (normalization first, then pooling)
norm3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm-stage-3')
# pooling stage 2
# ksize = [imageIndex, X, Y, Depth]; strides=[imageIndex, X, Y, Depth]
pool3 = tf.nn.max_pool(norm3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool-stage-3')
#local layers
with tf.variable_scope('local1') as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshapedToVector = tf.reshape(pool3, [BATCH_SIZE, -1])
vectorDim = reshapedToVector.get_shape()[1].value
perceptronNum1=100
weights = create_var('weights', shape=[vectorDim, perceptronNum1], stddev=0.04, wd=0.004)
biases = tf.get_variable(name='biases', shape=[perceptronNum1], initializer=tf.constant_initializer(0.1), dtype=tf.float32)
local1 = tf.nn.relu(tf.matmul(reshapedToVector, weights) + biases, name=scope.name)
# local2
with tf.variable_scope('local2') as scope:
perceptronNum2=50
weights = create_var('weights', shape=[perceptronNum1, perceptronNum2], stddev=0.04, wd=0.004)
biases = tf.get_variable(name='biases', shape=[perceptronNum2], initializer=tf.constant_initializer(0.1), dtype=tf.float32)
local2 = tf.nn.relu(tf.matmul(local1, weights) + biases, name=scope.name)
# softmax
with tf.variable_scope('softmax_linear') as scope:
weights = create_var('weights', [perceptronNum2, 2], stddev=1/perceptronNum2, wd=0.0)
biases = tf.get_variable(name='biases', shape=[2], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
softmax_linear = tf.add(tf.matmul(local2, weights), biases, name=scope.name)
with tf.name_scope('cost-function'):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(softmax_linear, Y))
################################################################################
# NUM_IMG_PER_EPOCH = 10
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.1, global_step, 100000, 0.96, staircase=True)
# tf.contrib.deprecated.scalar_summary('learning_rate', lr)
with tf.name_scope('optimizer'):
opt = tf.train.GradientDescentOptimizer(lr).minimize(cost)
# gradients = opt.compute_gradients(loss)
print("running")
#run
image = tf.image.decode_jpeg(IMAGE_PATH + "cat.1.jpg", channels=3)
image = tf.image.convert_image_dtype(image, dtype=tf.float32, name="normalize_as_float")#convert to float range 0-1
print(image)
resized_image = tf.image.resize_image_with_crop_or_pad(image, maxwidth, maxheight)
# reshaped_image = tf.reshape(resized_image, [maxwidth*maxheight*3, -1])
print(resized_image)
#inNumpy.append(resized_image)
with tf.Session(graph=graph) as sess:
writer = tf.train.SummaryWriter('cats_vs_dogs', graph=sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
#numpy error here, maybe try something else: https://github.com/tensorflow/models/blob/master/inception/inception/inception_train.py
#use first parameter or pass init?
sess.run(opt,feed_dict={
X: np.array(resized_image),
Y: np.array([[0,1]])
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment