Skip to content

Instantly share code, notes, and snippets.

@Syzygy2048
Created January 7, 2018 15:11
Show Gist options
  • Save Syzygy2048/ddb8602652b547a71316ee0febfddbef to your computer and use it in GitHub Desktop.
Save Syzygy2048/ddb8602652b547a71316ee0febfddbef to your computer and use it in GitHub Desktop.
# based on https://medium.com/initialized-capital/we-need-to-go-deeper-a-practical-guide-to-tensorflow-and-inception-50e66281804f
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('data/mnist', one_hot=True)
from tensorflow.contrib.slim.nets import inception as nn_architecture
from tensorflow.contrib import slim
from tensorflow.contrib.layers.python.layers import layers as layers_lib
import numpy as np
from scipy.ndimage.interpolation import zoom
NUM_CLASSES = 10
CHECKPOINT_PATH = "checkpoints/inception_v3.ckpt"
BATCH_SIZE = 20
MEAN = np.mean(mnist.train.images)
STD = np.std(mnist.train.images)
NUM_TRAIN = mnist.train.labels.shape[0]
NUM_TEST = mnist.test.labels.shape[0]
print("train data %d, test data %d" % (NUM_TRAIN, NUM_TEST))
def create_network(input):
with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm,
normalizer_params={'updates_collections': None}): ## this is a fix for an issue where the model doesn't fit the checkpoint https://github.com/tensorflow/models/issues/2977
logits, endpoints = nn_architecture.inception_v3(input, # input
1001, #NUM_CLASSES, #num classes
# num classes #maybe set to 0 or none to ommit logit layer and return input for logit layer instead.
True, # is training (dropout = zero if false for eval
0.8, # dropout keep rate
16, # min depth
1.0, # depth multiplayer
layers_lib.softmax, # prediction function
True, # spatial squeeze
tf.AUTO_REUSE,
# reuse, use get variable to get variables directly... probably
'InceptionV3') # scope
return logits, endpoints
def load_checkpoint(path):
saver = tf.train.Saver()
# saver = tf.train.Saver({ # from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/inception_v3.py
# 'Conv2d_1a_3x3': tf.get_variable('Conv2d_1a_3x3'),
# 'Conv2d_2a_3x3': tf.get_variable('Conv2d_2a_3x3'),
# 'Conv2d_2b_3x3': tf.get_variable('Conv2d_2b_3x3'),
# 'MaxPool_3a_3x3': tf.get_variable('MaxPool_3a_3x3'),
# 'Conv2d_3b_1x1': tf.get_variable('Conv2d_3b_1x1'),
# 'Conv2d_4a_3x3': tf.get_variable('Conv2d_4a_3x3'),
# 'MaxPool_5a_3x3': tf.get_variable('MaxPool_5a_3x3'),
# 'Mixed_5b': tf.get_variable('Mixed_5b'),
# 'Mixed_5c': tf.get_variable('Mixed_5c'),
# 'Mixed_5d': tf.get_variable('Mixed_5d'),
# 'Mixed_6a': tf.get_variable('Mixed_6a'),
# 'Mixed_6b': tf.get_variable('Mixed_6b'),
# 'Mixed_6c': tf.get_variable('Mixed_6c'),
# 'Mixed_6d': tf.get_variable('Mixed_6d'),
# 'Mixed_6e': tf.get_variable('Mixed_6e'),
# 'Mixed_7a': tf.get_variable('Mixed_7a'),
# 'Mixed_7b': tf.get_variable('Mixed_7b'),
# 'Mixed_7c': tf.get_variable('Mixed_7c')
#
#
#
# _______________
# # from print(endpoints), matches with model, but includes all the logits and prediction stuff
# 'Conv2d_1a_3x3'
# 'Conv2d_2a_3x3'
# 'Conv2d_2b_3x3'
# 'MaxPool_3a_3x3'
# 'Conv2d_4a_3x3'
# 'Conv2d_3b_1x1'
# 'MaxPool_5a_3x3'
# 'Mixed_5b'
# 'Mixed_5c'
# 'Mixed_5d'
# 'Mixed_6a'
# 'Mixed_6b'
# 'Mixed_6c'
# 'Mixed_6d'
# 'Mixed_6e'
# 'Mixed_7a'
# 'Mixed_7b'
# 'Mixed_7c'
#
# 'PreLogits' #we need to exclude one or more of these logits/prediction layers (the 1001 element output layer) so that we load everything except that layer for retraining
# 'Logits'
# 'AuxLogits'
# 'Predictions'
#
#
# })
sess.run(saver.restore(sess, path))
# A convenience method for resizing the 784x1 monochrome images into
# the 299x299x3 RGB images that the Inception model accepts as input
RESIZE_FACTOR = (299 / 28)
def resize_images(images, mean=MEAN, std=STD):
reshaped = (images - mean) / std # why? - (standard normal distribution)this causes the mean to be 0 and the variance to be 1, resulting in better recognition results - technically not resizing, but w/e, the mean and std are calculatied from the whole trainings set, and are applied to everything put into the network. training and production. #### should be backed by a source? i was told this by someone in ##machinelearning on freenode
# additional infos at: https://www.tensorflow.org/tutorials/image_recognition#usage_with_the_c_api
# We also need to scale the pixel values from integers that are between 0 and 255 to the floating point values that the graph operates on. We control the scaling with the input_mean and input_std flags: we first subtract input_mean from each pixel value, then divide it by input_std.
# These values probably look somewhat magical, but they are just defined by the original model author based on what he/she wanted to use as input images for training. If you have a graph that you've trained yourself, you'll just need to adjust the values to match whatever you used during your training process.
reshaped = np.reshape(reshaped, [-1, 28, 28, 1]) # Reshape 784 to 28x28x1
# Reshape to 299x299 images, then duplicate the single monochrome channel
# across 3 RGB layers
resized = zoom(reshaped, [1.0, RESIZE_FACTOR, RESIZE_FACTOR, 1.0])
resized = np.repeat(resized, 3, 3) # add color channels
return resized
sess = tf.InteractiveSession()
images = tf.placeholder(tf.float32, shape=(None, 299, 299, 3))
labels = tf.placeholder(tf.float32, shape=(None, 1))
logits, endpoints = create_network(images)
# print(logits)
# print("_______________")
# print(endpoints)
# attempt to run with default weights and biases
# Won't run due to uninitialized value exception - because we actually didn't initialize the network
# ## import image and apply to untrained network just for debugging purposes
# from skimage import io
# car = io.imread("data/car.jpg")
#
# car_scaled = zoom(car, [299/car.shape[0], 299/car.shape[1], 1])
#
# # make 4 element array or not
# car_cnnable = np.array([car_scaled])
# print(car_cnnable.shape)
# #io.imshow(car_cnnable[0])
# #io.show()
#
# predictions = sess.run(logits, feed_dict={images: car_cnnable})
# print(predictions)
# predictions = np.squeeze(predictions)
# print(predictions)
# load_checkpoint(CHECKPOINT_DIR)
saver = tf.train.Saver()
saver.restore(sess, CHECKPOINT_PATH)
## import image and apply to untrained network just for debugging purposes
from skimage import io
car = io.imread("data/car.jpg")
car_scaled = zoom(car, [299 / car.shape[0], 299 / car.shape[1], 1])
# make 4 element array or not
car_cnnable = np.array([car_scaled])
print(car_cnnable.shape)
# io.imshow(car_cnnable[0])
# io.show()
predictions = sess.run(logits, feed_dict={images: car_cnnable})
predictions = np.squeeze(predictions) #shape (1, 1001) to shape (1001)
print(predictions.shape)
print(np.argmax(predictions))
print(predictions[np.argmax(predictions)])
## ISSUE - if i try to classify the image again, I get different results for some reason:
predictions = sess.run(logits, feed_dict={images: car_cnnable})
predictions = np.squeeze(predictions) #shape (1, 1001) to shape (1001)
print(predictions.shape)
print(np.argmax(predictions))
print(predictions[np.argmax(predictions)])
predictions = sess.run(logits, feed_dict={images: car_cnnable})
predictions = np.squeeze(predictions) #shape (1, 1001) to shape (1001)
print(predictions.shape)
print(np.argmax(predictions))
print(predictions[np.argmax(predictions)])
predictions = sess.run(logits, feed_dict={images: car_cnnable})
predictions = np.squeeze(predictions) #shape (1, 1001) to shape (1001)
print(predictions.shape)
print(np.argmax(predictions))
print(predictions[np.argmax(predictions)])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment