Skip to content

Instantly share code, notes, and snippets.

@victor-iyi
Created August 27, 2017 14:06
Show Gist options
  • Save victor-iyi/ec08c80e7236517ba1e21a3289c1d9c3 to your computer and use it in GitHub Desktop.
Save victor-iyi/ec08c80e7236517ba1e21a3289c1d9c3 to your computer and use it in GitHub Desktop.
A five (5) layer Convolutional Neural Network for classifying MNIST handwritten digits
# coding: utf-8
# # 5-Layer Convnet for classifying the MNIST dataset
# In[1]:
# importing the dependencies
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
get_ipython().magic('matplotlib inline')
# In[2]:
# Loading the dataset
from tensorflow.examples.tutorials.mnist import input_data
dataset = input_data.read_data_sets('MNIST_data/', one_hot=True)
# In[3]:
print('Length of training data:{:>19,}'.format(dataset.train.num_examples))
print('Length of testing data:{:>20,}'.format(dataset.test.num_examples))
print('Length of validation data:{:>17,}'.format(dataset.validation.num_examples))
# In[4]:
# Create the true label of true classes
dataset.train.true = np.argmax(dataset.train.labels, axis=1)
dataset.test.true = np.argmax(dataset.test.labels, axis=1)
dataset.validation.true = np.argmax(dataset.test.labels, axis=1)
# In[5]:
dataset.train.true[:4]
# In[6]:
dataset.test.true[:4]
# In[7]:
dataset.validation.true[:4]
# ### Define hyperparameters
# In[8]:
# Image dimensions
image_size = 28
image_shape = (image_size, image_size)
image_shape_flat = image_size * image_size
num_channel = 1
filter_size = 4
# Hidden Layer Channels
hidden1_channels = 8
hidden2_channels = 16
hidden3_channels = 32
hidden4_channels = 64
hidden5_channels = 128
fully_connected_1 = 512
fully_connected_2 = 256
num_classes = 10
learning_rate = 1e-3
dropout = 0.8
iterations = 0
batch_size = 24
# ### Helper functions for `weights`, `biases`, `conv2d`, & `max_pool`
# In[9]:
# Weight initialization
def weight(shape):
initial = tf.truncated_normal(shape=shape, stddev=0.05)
return tf.Variable(initial)
# Bias in initialization
def bias(length):
initial = tf.constant(value=0.05, shape=[length])
return tf.Variable(initial)
# Convolutional operation
def conv2d(X, W):
return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME')
# Max pooling operation
def max_pool(X):
return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# In[10]:
# Placeholder Variables
X = tf.placeholder(tf.float32, [None, image_shape_flat])
y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)
y_true = tf.argmax(y, axis=1)
# ## Building the Network
# ### (1st Convolutional Layer + Max pooling) Input Layer `>` Hidden Layer 1
# In[11]:
X_image = tf.reshape(X, shape=[-1, image_size, image_size, num_channel])
# In[12]:
W_hidden1 = weight(shape=[filter_size, filter_size, num_channel, hidden1_channels])
b_hidden1 = bias(length=hidden1_channels)
h_conv1 = tf.nn.relu(conv2d(X_image, W_hidden1) + b_hidden1)
h_pool1 = max_pool(h_conv1)
# In[13]:
h_pool1
# ### (2nd Convolutional Layer + Max pooling) Hidden Layer 1 `>` Hidden Layer 2
# In[14]:
W_hidden2 = weight(shape=[filter_size, filter_size, hidden1_channels, hidden2_channels])
b_hidden2 = bias(length=hidden2_channels)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_hidden2) + b_hidden2)
h_pool2 = max_pool(h_conv2)
# In[15]:
h_pool2
# ### (3rd Convolutional Layer) Hidden Layer 2 `>` Hidden Layer 3
# In[16]:
W_hidden3 = weight(shape=[filter_size, filter_size, hidden2_channels, hidden3_channels])
b_hidden3 = bias(length=hidden3_channels)
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_hidden3) + b_hidden3)
# In[17]:
h_conv3
# ### (4th Convolutional Layer) Hidden Layer 3 `>` Hidden Layer 4
# In[18]:
W_hidden4 = weight(shape=[filter_size, filter_size, hidden3_channels, hidden4_channels])
b_hidden4 = bias(length=hidden4_channels)
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_hidden4) + b_hidden4)
# In[19]:
h_conv4
# ### (5th Convolutional Layer) Hidden Layer 4 `>` Hidden Layer 5
# In[20]:
W_hidden5 = weight(shape=[filter_size, filter_size, hidden4_channels, hidden5_channels])
b_hidden5 = bias(length=hidden5_channels)
h_conv5 = tf.nn.relu(conv2d(h_conv4, W_hidden5) + b_hidden5)
# In[21]:
h_conv5
# ### (1st Fully Connected Layer) Hidden Layer 5 `>` Hidden Layer 6
# In[22]:
h_conv5_flat = tf.reshape(h_conv5, shape=[-1, 7*7*hidden5_channels])
W_fc1 = weight(shape=[7*7*hidden5_channels, fully_connected_1])
b_fc1 = bias(length=fully_connected_1)
h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1)
# In[23]:
h_fc1
# ### (2nd Fully Connected Layer + Dropout) Hidden Layer 6 `>` Hidden Layer 7
# In[24]:
W_fc2 = weight(shape=[fully_connected_1, fully_connected_2])
b_fc2 = bias(length=fully_connected_2)
h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
h_drop = tf.nn.dropout(h_fc2, keep_prob=keep_prob)
# In[25]:
h_drop
# ### Readout/Output Layer
# In[26]:
W_out = weight(shape=[fully_connected_2, num_classes])
b_out = bias(length=num_classes)
y_pred = tf.matmul(h_drop, W_out) + b_out
y_pred_true = tf.argmax(y_pred, axis=1)
# In[27]:
y_pred
# In[28]:
y_pred_true
# ### Cost function, and optimizer
# In[29]:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y, name='xentropy')
cost = tf.reduce_mean(cross_entropy, name='xentropy_mean')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# ### Evaluating accuracy
# In[30]:
correct = tf.equal(y_true, y_pred_true)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
# ### Running tensorflow's `Session()`
# In[31]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# ### `optimize()` and `print_accuracy()` function helper
# In[32]:
# Optimize helper
def optimize(num_iter=1):
global iterations
for i in tqdm(range(num_iter)):
X_batch, y_batch = dataset.train.next_batch(batch_size)
feed_dict_train = {X: X_batch,
y: y_batch,
keep_prob:dropout}
sess.run(optimizer, feed_dict=feed_dict_train)
iterations += 1
print('Total number of iterations so far: {:,}'.format(iterations))
# Accuracy helper
def print_accuracy():
X_batch, y_batch = dataset.test.next_batch(batch_size)
feed_dict_test = {X: X_batch,
y: y_batch,
keep_prob: dropout}
acc = sess.run(accuracy, feed_dict=feed_dict_test)
print('Accuracy after {:,} iterations = {:.2%}'.format(iterations, acc))
# In[46]:
def print_validation_accuracy():
X_batch, y_batch = dataset.validation.next_batch(batch_size)
feed_dict_val = {X: X_batch,
y: y_batch,
keep_prob: dropout}
acc = sess.run(accuracy, feed_dict=feed_dict_val)
print('Accuracy on validation set: {:.2%}'.format(acc))
if __name__ == '__main__':
# ### Training the network
# In[33]:
print_accuracy()
# In[34]:
optimize()
# In[35]:
print_accuracy()
# In[36]:
optimize(num_iter=9)
# In[37]:
optimize(num_iter=90)
# In[38]:
print_accuracy()
# In[39]:
optimize(num_iter=900)
# In[40]:
print_accuracy()
# In[41]:
optimize(num_iter=9000)
# In[42]:
print_accuracy()
# In[43]:
optimize(num_iter=100)
# In[44]:
optimize(num_iter=900)
# In[45]:
print_accuracy()
# In[47]:
print_validation_accuracy()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment