Created
January 7, 2018 15:11
-
-
Save Syzygy2048/ddb8602652b547a71316ee0febfddbef to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# based on https://medium.com/initialized-capital/we-need-to-go-deeper-a-practical-guide-to-tensorflow-and-inception-50e66281804f | |
import tensorflow as tf | |
from tensorflow.examples.tutorials.mnist import input_data | |
mnist = input_data.read_data_sets('data/mnist', one_hot=True) | |
from tensorflow.contrib.slim.nets import inception as nn_architecture | |
from tensorflow.contrib import slim | |
from tensorflow.contrib.layers.python.layers import layers as layers_lib | |
import numpy as np | |
from scipy.ndimage.interpolation import zoom | |
NUM_CLASSES = 10 | |
CHECKPOINT_PATH = "checkpoints/inception_v3.ckpt" | |
BATCH_SIZE = 20 | |
MEAN = np.mean(mnist.train.images) | |
STD = np.std(mnist.train.images) | |
NUM_TRAIN = mnist.train.labels.shape[0] | |
NUM_TEST = mnist.test.labels.shape[0] | |
print("train data %d, test data %d" % (NUM_TRAIN, NUM_TEST)) | |
def create_network(input): | |
with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, | |
normalizer_params={'updates_collections': None}): ## this is a fix for an issue where the model doesn't fit the checkpoint https://github.com/tensorflow/models/issues/2977 | |
logits, endpoints = nn_architecture.inception_v3(input, # input | |
1001, #NUM_CLASSES, #num classes | |
# num classes #maybe set to 0 or none to ommit logit layer and return input for logit layer instead. | |
True, # is training (dropout = zero if false for eval | |
0.8, # dropout keep rate | |
16, # min depth | |
1.0, # depth multiplayer | |
layers_lib.softmax, # prediction function | |
True, # spatial squeeze | |
tf.AUTO_REUSE, | |
# reuse, use get variable to get variables directly... probably | |
'InceptionV3') # scope | |
return logits, endpoints | |
def load_checkpoint(path): | |
saver = tf.train.Saver() | |
# saver = tf.train.Saver({ # from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/inception_v3.py | |
# 'Conv2d_1a_3x3': tf.get_variable('Conv2d_1a_3x3'), | |
# 'Conv2d_2a_3x3': tf.get_variable('Conv2d_2a_3x3'), | |
# 'Conv2d_2b_3x3': tf.get_variable('Conv2d_2b_3x3'), | |
# 'MaxPool_3a_3x3': tf.get_variable('MaxPool_3a_3x3'), | |
# 'Conv2d_3b_1x1': tf.get_variable('Conv2d_3b_1x1'), | |
# 'Conv2d_4a_3x3': tf.get_variable('Conv2d_4a_3x3'), | |
# 'MaxPool_5a_3x3': tf.get_variable('MaxPool_5a_3x3'), | |
# 'Mixed_5b': tf.get_variable('Mixed_5b'), | |
# 'Mixed_5c': tf.get_variable('Mixed_5c'), | |
# 'Mixed_5d': tf.get_variable('Mixed_5d'), | |
# 'Mixed_6a': tf.get_variable('Mixed_6a'), | |
# 'Mixed_6b': tf.get_variable('Mixed_6b'), | |
# 'Mixed_6c': tf.get_variable('Mixed_6c'), | |
# 'Mixed_6d': tf.get_variable('Mixed_6d'), | |
# 'Mixed_6e': tf.get_variable('Mixed_6e'), | |
# 'Mixed_7a': tf.get_variable('Mixed_7a'), | |
# 'Mixed_7b': tf.get_variable('Mixed_7b'), | |
# 'Mixed_7c': tf.get_variable('Mixed_7c') | |
# | |
# | |
# | |
# _______________ | |
# # from print(endpoints), matches with model, but includes all the logits and prediction stuff | |
# 'Conv2d_1a_3x3' | |
# 'Conv2d_2a_3x3' | |
# 'Conv2d_2b_3x3' | |
# 'MaxPool_3a_3x3' | |
# 'Conv2d_4a_3x3' | |
# 'Conv2d_3b_1x1' | |
# 'MaxPool_5a_3x3' | |
# 'Mixed_5b' | |
# 'Mixed_5c' | |
# 'Mixed_5d' | |
# 'Mixed_6a' | |
# 'Mixed_6b' | |
# 'Mixed_6c' | |
# 'Mixed_6d' | |
# 'Mixed_6e' | |
# 'Mixed_7a' | |
# 'Mixed_7b' | |
# 'Mixed_7c' | |
# | |
# 'PreLogits' #we need to exclude one or more of these logits/prediction layers (the 1001 element output layer) so that we load everything except that layer for retraining | |
# 'Logits' | |
# 'AuxLogits' | |
# 'Predictions' | |
# | |
# | |
# }) | |
sess.run(saver.restore(sess, path)) | |
# A convenience method for resizing the 784x1 monochrome images into | |
# the 299x299x3 RGB images that the Inception model accepts as input | |
RESIZE_FACTOR = (299 / 28) | |
def resize_images(images, mean=MEAN, std=STD): | |
reshaped = (images - mean) / std # why? - (standard normal distribution)this causes the mean to be 0 and the variance to be 1, resulting in better recognition results - technically not resizing, but w/e, the mean and std are calculatied from the whole trainings set, and are applied to everything put into the network. training and production. #### should be backed by a source? i was told this by someone in ##machinelearning on freenode | |
# additional infos at: https://www.tensorflow.org/tutorials/image_recognition#usage_with_the_c_api | |
# We also need to scale the pixel values from integers that are between 0 and 255 to the floating point values that the graph operates on. We control the scaling with the input_mean and input_std flags: we first subtract input_mean from each pixel value, then divide it by input_std. | |
# These values probably look somewhat magical, but they are just defined by the original model author based on what he/she wanted to use as input images for training. If you have a graph that you've trained yourself, you'll just need to adjust the values to match whatever you used during your training process. | |
reshaped = np.reshape(reshaped, [-1, 28, 28, 1]) # Reshape 784 to 28x28x1 | |
# Reshape to 299x299 images, then duplicate the single monochrome channel | |
# across 3 RGB layers | |
resized = zoom(reshaped, [1.0, RESIZE_FACTOR, RESIZE_FACTOR, 1.0]) | |
resized = np.repeat(resized, 3, 3) # add color channels | |
return resized | |
sess = tf.InteractiveSession() | |
images = tf.placeholder(tf.float32, shape=(None, 299, 299, 3)) | |
labels = tf.placeholder(tf.float32, shape=(None, 1)) | |
logits, endpoints = create_network(images) | |
# print(logits) | |
# print("_______________") | |
# print(endpoints) | |
# attempt to run with default weights and biases | |
# Won't run due to uninitialized value exception - because we actually didn't initialize the network | |
# ## import image and apply to untrained network just for debugging purposes | |
# from skimage import io | |
# car = io.imread("data/car.jpg") | |
# | |
# car_scaled = zoom(car, [299/car.shape[0], 299/car.shape[1], 1]) | |
# | |
# # make 4 element array or not | |
# car_cnnable = np.array([car_scaled]) | |
# print(car_cnnable.shape) | |
# #io.imshow(car_cnnable[0]) | |
# #io.show() | |
# | |
# predictions = sess.run(logits, feed_dict={images: car_cnnable}) | |
# print(predictions) | |
# predictions = np.squeeze(predictions) | |
# print(predictions) | |
# load_checkpoint(CHECKPOINT_DIR) | |
saver = tf.train.Saver() | |
saver.restore(sess, CHECKPOINT_PATH) | |
## import image and apply to untrained network just for debugging purposes | |
from skimage import io | |
car = io.imread("data/car.jpg") | |
car_scaled = zoom(car, [299 / car.shape[0], 299 / car.shape[1], 1]) | |
# make 4 element array or not | |
car_cnnable = np.array([car_scaled]) | |
print(car_cnnable.shape) | |
# io.imshow(car_cnnable[0]) | |
# io.show() | |
predictions = sess.run(logits, feed_dict={images: car_cnnable}) | |
predictions = np.squeeze(predictions) #shape (1, 1001) to shape (1001) | |
print(predictions.shape) | |
print(np.argmax(predictions)) | |
print(predictions[np.argmax(predictions)]) | |
## ISSUE - if i try to classify the image again, I get different results for some reason: | |
predictions = sess.run(logits, feed_dict={images: car_cnnable}) | |
predictions = np.squeeze(predictions) #shape (1, 1001) to shape (1001) | |
print(predictions.shape) | |
print(np.argmax(predictions)) | |
print(predictions[np.argmax(predictions)]) | |
predictions = sess.run(logits, feed_dict={images: car_cnnable}) | |
predictions = np.squeeze(predictions) #shape (1, 1001) to shape (1001) | |
print(predictions.shape) | |
print(np.argmax(predictions)) | |
print(predictions[np.argmax(predictions)]) | |
predictions = sess.run(logits, feed_dict={images: car_cnnable}) | |
predictions = np.squeeze(predictions) #shape (1, 1001) to shape (1001) | |
print(predictions.shape) | |
print(np.argmax(predictions)) | |
print(predictions[np.argmax(predictions)]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment