BSVogler/cats_and_dogs.py

## cats_and_dogs.py
#!/usr/bin/env python3

import numpy as np
import pickle
import os
import sys
from PIL import Image

def create_var(name, shape, stddev, wd):
  dtype = tf.float32
  var = tf.get_variable(name, shape, initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=dtype), dtype=tf.float32)
  return var

IMAGE_PATH = "cats_vs_dogs/train/"
DATA_PATH = "cats_vs_dogs/data/"
BATCH_SIZE = 1

def getDim():
  global IMAGE_PATH
  dimensionFileName = "dimension"
  if not os.path.isfile(dimensionFileName):
    print("creating new file")
    maxwidth  = 0
    maxheight = 0
    i = 0
    for file in os.listdir(IMAGE_PATH):
      if i % 100 == 0:
        print("processing ", i)
      img = Image.open(IMAGE_PATH+file)
      width, height = img.size
      maxwidth = max(maxwidth, width)
      maxheight = max(maxheight, height)
      img.close()
      i = i + 1
    with open(dimensionFileName, "w") as file:
      file.write(str(maxheight) + "\n" + str(maxwidth) + "\n")
  else:
    print("reading dimension file" + dimensionFileName)
    with open(dimensionFileName, "r") as file:
      maxheight = int(file.readline())
      maxwidth = int(file.readline())
  return (maxheight,maxwidth)


#late import for faster feedback if something fails
import tensorflow as tf

def build_Y_Vec():
  Y = []
  for file in sorted(os.listdir(IMAGE_PATH)):
    if file.startswith("cat") and file.endswith("jpg"):
      Y.append([0,1])
    elif file.startswith("dog") and file.endswith("jpg"):
      Y.append([1,0])
  return np.array(Y)

if __name__ == '__main__':
    maxwidth, maxheight = getDim()

    graph = tf.Graph()
    with graph.as_default():

        #build graph
        X = tf.placeholder("float", [BATCH_SIZE, maxheight, maxwidth, 3])
        Y = tf.placeholder("float", [BATCH_SIZE, 2]) # cat or dog

        # conv-layer 1 ###############################################################
        # convolution stage 1
        with tf.variable_scope('conv-stage-1') as scope:
          kernel = create_var('weights', shape=[5, 5, 3, 64], stddev=5e-2, wd=0.0)#use 64 kernels with 5*5*3 Dimension
          conv = tf.nn.conv2d(X, kernel, [1, 1, 1, 1], padding='SAME')
          biases = tf.get_variable(name='biases', shape=[64], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
          pre_activation = tf.nn.bias_add(conv, biases)
          conv1 = tf.nn.relu(pre_activation, name=scope.name)
          # pooling stage 1
          pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool-stage-1')
          # normalizing stage 1
          norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm-stage-1')


        # conv-layer 2 ###############################################################
        # convolution stage 2
        with tf.variable_scope('conv-stage-2') as scope:
          kernel = create_var('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0)
          conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
          biases = tf.get_variable(name='biases', shape=[64], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
          pre_activation = tf.nn.bias_add(conv, biases)
          conv2 = tf.nn.relu(pre_activation, name=scope.name)
          # normalizing stage 2, why this order (normalization first, then pooling)
          norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm-stage-2')
          # pooling stage 2
          # ksize = [imageIndex, X, Y, Depth]; strides=[imageIndex, X, Y, Depth]
          pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool-stage-2')

        with tf.variable_scope('conv-stage-3') as scope:
          kernel = create_var('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0)
          conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
          biases = tf.get_variable(name='biases', shape=[64], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
          pre_activation = tf.nn.bias_add(conv, biases)
          conv3 = tf.nn.relu(pre_activation, name=scope.name)
          # normalizing stage 2, why this order (normalization first, then pooling)
          norm3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm-stage-3')
          # pooling stage 2
          # ksize = [imageIndex, X, Y, Depth]; strides=[imageIndex, X, Y, Depth]
          pool3 = tf.nn.max_pool(norm3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool-stage-3')

        #local layers
        with tf.variable_scope('local1') as scope:
          # Move everything into depth so we can perform a single matrix multiply.
          reshapedToVector = tf.reshape(pool3, [BATCH_SIZE, -1])
          vectorDim = reshapedToVector.get_shape()[1].value
          perceptronNum1=100
          weights = create_var('weights', shape=[vectorDim, perceptronNum1], stddev=0.04, wd=0.004)
          biases = tf.get_variable(name='biases', shape=[perceptronNum1], initializer=tf.constant_initializer(0.1), dtype=tf.float32)
          local1 = tf.nn.relu(tf.matmul(reshapedToVector, weights) + biases, name=scope.name)

        # local2
        with tf.variable_scope('local2') as scope:
          perceptronNum2=50
          weights = create_var('weights', shape=[perceptronNum1, perceptronNum2], stddev=0.04, wd=0.004)
          biases = tf.get_variable(name='biases', shape=[perceptronNum2], initializer=tf.constant_initializer(0.1), dtype=tf.float32)
          local2 = tf.nn.relu(tf.matmul(local1, weights) + biases, name=scope.name)

        # softmax
        with tf.variable_scope('softmax_linear') as scope:
          weights = create_var('weights', [perceptronNum2, 2], stddev=1/perceptronNum2, wd=0.0)
          biases = tf.get_variable(name='biases', shape=[2], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
          softmax_linear = tf.add(tf.matmul(local2, weights), biases, name=scope.name)


        with tf.name_scope('cost-function'):
          cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(softmax_linear, Y))
    ################################################################################

    # NUM_IMG_PER_EPOCH = 10

        global_step = tf.Variable(0, trainable=False)
        lr = tf.train.exponential_decay(0.1, global_step, 100000, 0.96, staircase=True)
        # tf.contrib.deprecated.scalar_summary('learning_rate', lr)

        with tf.name_scope('optimizer'):
            opt = tf.train.GradientDescentOptimizer(lr).minimize(cost)
    # gradients = opt.compute_gradients(loss)

    print("running")
    #run
    image = tf.image.decode_jpeg(IMAGE_PATH + "cat.1.jpg", channels=3)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32, name="normalize_as_float")#convert to float range 0-1
    print(image)
    resized_image = tf.image.resize_image_with_crop_or_pad(image, maxwidth, maxheight)
    # reshaped_image = tf.reshape(resized_image, [maxwidth*maxheight*3, -1])
    print(resized_image)

    #inNumpy.append(resized_image)
    with tf.Session(graph=graph) as sess:
      writer = tf.train.SummaryWriter('cats_vs_dogs', graph=sess.graph)
      init = tf.global_variables_initializer()
      sess.run(init)
      #numpy error here, maybe try something else: https://github.com/tensorflow/models/blob/master/inception/inception/inception_train.py
      #use first parameter or pass init?
      sess.run(opt,feed_dict={
          X: np.array(resized_image),
          Y: np.array([[0,1]])
      })
	#!/usr/bin/env python3

	import numpy as np
	import pickle
	import os
	import sys
	from PIL import Image

	def create_var(name, shape, stddev, wd):
	dtype = tf.float32
	var = tf.get_variable(name, shape, initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=dtype), dtype=tf.float32)
	return var

	IMAGE_PATH = "cats_vs_dogs/train/"
	DATA_PATH = "cats_vs_dogs/data/"
	BATCH_SIZE = 1

	def getDim():
	global IMAGE_PATH
	dimensionFileName = "dimension"
	if not os.path.isfile(dimensionFileName):
	print("creating new file")
	maxwidth = 0
	maxheight = 0
	i = 0
	for file in os.listdir(IMAGE_PATH):
	if i % 100 == 0:
	print("processing ", i)
	img = Image.open(IMAGE_PATH+file)
	width, height = img.size
	maxwidth = max(maxwidth, width)
	maxheight = max(maxheight, height)
	img.close()
	i = i + 1
	with open(dimensionFileName, "w") as file:
	file.write(str(maxheight) + "\n" + str(maxwidth) + "\n")
	else:
	print("reading dimension file" + dimensionFileName)
	with open(dimensionFileName, "r") as file:
	maxheight = int(file.readline())
	maxwidth = int(file.readline())
	return (maxheight,maxwidth)


	#late import for faster feedback if something fails
	import tensorflow as tf

	def build_Y_Vec():
	Y = []
	for file in sorted(os.listdir(IMAGE_PATH)):
	if file.startswith("cat") and file.endswith("jpg"):
	Y.append([0,1])
	elif file.startswith("dog") and file.endswith("jpg"):
	Y.append([1,0])
	return np.array(Y)

	if __name__ == '__main__':
	maxwidth, maxheight = getDim()

	graph = tf.Graph()
	with graph.as_default():

	#build graph
	X = tf.placeholder("float", [BATCH_SIZE, maxheight, maxwidth, 3])
	Y = tf.placeholder("float", [BATCH_SIZE, 2]) # cat or dog

	# conv-layer 1 ###############################################################
	# convolution stage 1
	with tf.variable_scope('conv-stage-1') as scope:
	kernel = create_var('weights', shape=[5, 5, 3, 64], stddev=5e-2, wd=0.0)#use 64 kernels with 553 Dimension
	conv = tf.nn.conv2d(X, kernel, [1, 1, 1, 1], padding='SAME')
	biases = tf.get_variable(name='biases', shape=[64], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
	pre_activation = tf.nn.bias_add(conv, biases)
	conv1 = tf.nn.relu(pre_activation, name=scope.name)
	# pooling stage 1
	pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool-stage-1')
	# normalizing stage 1
	norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm-stage-1')


	# conv-layer 2 ###############################################################
	# convolution stage 2
	with tf.variable_scope('conv-stage-2') as scope:
	kernel = create_var('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0)
	conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
	biases = tf.get_variable(name='biases', shape=[64], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
	pre_activation = tf.nn.bias_add(conv, biases)
	conv2 = tf.nn.relu(pre_activation, name=scope.name)
	# normalizing stage 2, why this order (normalization first, then pooling)
	norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm-stage-2')
	# pooling stage 2
	# ksize = [imageIndex, X, Y, Depth]; strides=[imageIndex, X, Y, Depth]
	pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool-stage-2')

	with tf.variable_scope('conv-stage-3') as scope:
	kernel = create_var('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0)
	conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
	biases = tf.get_variable(name='biases', shape=[64], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
	pre_activation = tf.nn.bias_add(conv, biases)
	conv3 = tf.nn.relu(pre_activation, name=scope.name)
	# normalizing stage 2, why this order (normalization first, then pooling)
	norm3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm-stage-3')
	# pooling stage 2
	# ksize = [imageIndex, X, Y, Depth]; strides=[imageIndex, X, Y, Depth]
	pool3 = tf.nn.max_pool(norm3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool-stage-3')

	#local layers
	with tf.variable_scope('local1') as scope:
	# Move everything into depth so we can perform a single matrix multiply.
	reshapedToVector = tf.reshape(pool3, [BATCH_SIZE, -1])
	vectorDim = reshapedToVector.get_shape()[1].value
	perceptronNum1=100
	weights = create_var('weights', shape=[vectorDim, perceptronNum1], stddev=0.04, wd=0.004)
	biases = tf.get_variable(name='biases', shape=[perceptronNum1], initializer=tf.constant_initializer(0.1), dtype=tf.float32)
	local1 = tf.nn.relu(tf.matmul(reshapedToVector, weights) + biases, name=scope.name)

	# local2
	with tf.variable_scope('local2') as scope:
	perceptronNum2=50
	weights = create_var('weights', shape=[perceptronNum1, perceptronNum2], stddev=0.04, wd=0.004)
	biases = tf.get_variable(name='biases', shape=[perceptronNum2], initializer=tf.constant_initializer(0.1), dtype=tf.float32)
	local2 = tf.nn.relu(tf.matmul(local1, weights) + biases, name=scope.name)

	# softmax
	with tf.variable_scope('softmax_linear') as scope:
	weights = create_var('weights', [perceptronNum2, 2], stddev=1/perceptronNum2, wd=0.0)
	biases = tf.get_variable(name='biases', shape=[2], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
	softmax_linear = tf.add(tf.matmul(local2, weights), biases, name=scope.name)


	with tf.name_scope('cost-function'):
	cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(softmax_linear, Y))
	################################################################################

	# NUM_IMG_PER_EPOCH = 10

	global_step = tf.Variable(0, trainable=False)
	lr = tf.train.exponential_decay(0.1, global_step, 100000, 0.96, staircase=True)
	# tf.contrib.deprecated.scalar_summary('learning_rate', lr)

	with tf.name_scope('optimizer'):
	opt = tf.train.GradientDescentOptimizer(lr).minimize(cost)
	# gradients = opt.compute_gradients(loss)

	print("running")
	#run
	image = tf.image.decode_jpeg(IMAGE_PATH + "cat.1.jpg", channels=3)
	image = tf.image.convert_image_dtype(image, dtype=tf.float32, name="normalize_as_float")#convert to float range 0-1
	print(image)
	resized_image = tf.image.resize_image_with_crop_or_pad(image, maxwidth, maxheight)
	# reshaped_image = tf.reshape(resized_image, [maxwidthmaxheight3, -1])
	print(resized_image)

	#inNumpy.append(resized_image)
	with tf.Session(graph=graph) as sess:
	writer = tf.train.SummaryWriter('cats_vs_dogs', graph=sess.graph)
	init = tf.global_variables_initializer()
	sess.run(init)
	#numpy error here, maybe try something else: https://github.com/tensorflow/models/blob/master/inception/inception/inception_train.py
	#use first parameter or pass init?
	sess.run(opt,feed_dict={
	X: np.array(resized_image),
	Y: np.array([[0,1]])
	})