victor-iyi/5-layer-convnet.py

## 5-layer-convnet.py

# coding: utf-8

# # 5-Layer Convnet for classifying the MNIST dataset

# In[1]:

# importing the dependencies
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
get_ipython().magic('matplotlib inline')


# In[2]:

# Loading the dataset
from tensorflow.examples.tutorials.mnist import input_data
dataset = input_data.read_data_sets('MNIST_data/', one_hot=True)


# In[3]:

print('Length of training data:{:>19,}'.format(dataset.train.num_examples))
print('Length of testing data:{:>20,}'.format(dataset.test.num_examples))
print('Length of validation data:{:>17,}'.format(dataset.validation.num_examples))


# In[4]:

# Create the true label of true classes
dataset.train.true = np.argmax(dataset.train.labels, axis=1)
dataset.test.true = np.argmax(dataset.test.labels, axis=1)
dataset.validation.true = np.argmax(dataset.test.labels, axis=1)


# In[5]:

dataset.train.true[:4]


# In[6]:

dataset.test.true[:4]


# In[7]:

dataset.validation.true[:4]


# ### Define hyperparameters

# In[8]:

# Image dimensions
image_size = 28
image_shape = (image_size, image_size)
image_shape_flat = image_size * image_size
num_channel = 1
filter_size = 4
# Hidden Layer Channels
hidden1_channels = 8
hidden2_channels = 16
hidden3_channels = 32
hidden4_channels = 64
hidden5_channels = 128
fully_connected_1 = 512
fully_connected_2 = 256
num_classes = 10
learning_rate = 1e-3
dropout = 0.8
iterations = 0
batch_size = 24


# ### Helper functions for `weights`, `biases`, `conv2d`, & `max_pool`

# In[9]:

# Weight initialization
def weight(shape):
    initial = tf.truncated_normal(shape=shape, stddev=0.05)
    return tf.Variable(initial)

# Bias in initialization
def bias(length):
    initial = tf.constant(value=0.05, shape=[length])
    return tf.Variable(initial)

# Convolutional operation
def conv2d(X, W):
    return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME')

# Max pooling operation
def max_pool(X):
    return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')


# In[10]:

# Placeholder Variables
X = tf.placeholder(tf.float32, [None, image_shape_flat])
y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)
y_true = tf.argmax(y, axis=1)


# ## Building the Network
# ### (1st Convolutional Layer + Max pooling) Input Layer `>` Hidden Layer 1

# In[11]:

X_image = tf.reshape(X, shape=[-1, image_size, image_size, num_channel])


# In[12]:

W_hidden1 = weight(shape=[filter_size, filter_size, num_channel, hidden1_channels])
b_hidden1 = bias(length=hidden1_channels)
h_conv1 = tf.nn.relu(conv2d(X_image, W_hidden1) + b_hidden1)
h_pool1 = max_pool(h_conv1)


# In[13]:

h_pool1


# ### (2nd Convolutional Layer + Max pooling) Hidden Layer 1 `>` Hidden Layer 2

# In[14]:

W_hidden2 = weight(shape=[filter_size, filter_size, hidden1_channels, hidden2_channels])
b_hidden2 = bias(length=hidden2_channels)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_hidden2) + b_hidden2)
h_pool2 = max_pool(h_conv2)


# In[15]:

h_pool2


# ### (3rd Convolutional Layer) Hidden Layer 2 `>` Hidden Layer 3

# In[16]:

W_hidden3 = weight(shape=[filter_size, filter_size, hidden2_channels, hidden3_channels])
b_hidden3 = bias(length=hidden3_channels)
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_hidden3) + b_hidden3)


# In[17]:

h_conv3


# ### (4th Convolutional Layer) Hidden Layer 3 `>` Hidden Layer 4

# In[18]:

W_hidden4 = weight(shape=[filter_size, filter_size, hidden3_channels, hidden4_channels])
b_hidden4 = bias(length=hidden4_channels)
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_hidden4) + b_hidden4)


# In[19]:

h_conv4


# ### (5th Convolutional Layer) Hidden Layer 4 `>` Hidden Layer 5

# In[20]:

W_hidden5 = weight(shape=[filter_size, filter_size, hidden4_channels, hidden5_channels])
b_hidden5 = bias(length=hidden5_channels)
h_conv5 = tf.nn.relu(conv2d(h_conv4, W_hidden5) + b_hidden5)


# In[21]:

h_conv5


# ### (1st Fully Connected Layer) Hidden Layer 5 `>` Hidden Layer 6

# In[22]:

h_conv5_flat = tf.reshape(h_conv5, shape=[-1, 7*7*hidden5_channels])
W_fc1 = weight(shape=[7*7*hidden5_channels, fully_connected_1])
b_fc1 = bias(length=fully_connected_1)
h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1)


# In[23]:

h_fc1


# ### (2nd Fully Connected Layer + Dropout) Hidden Layer 6 `>` Hidden Layer 7

# In[24]:

W_fc2 = weight(shape=[fully_connected_1, fully_connected_2])
b_fc2 = bias(length=fully_connected_2)
h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
h_drop = tf.nn.dropout(h_fc2, keep_prob=keep_prob)


# In[25]:

h_drop


# ### Readout/Output Layer

# In[26]:

W_out = weight(shape=[fully_connected_2, num_classes])
b_out = bias(length=num_classes)
y_pred = tf.matmul(h_drop, W_out) + b_out
y_pred_true = tf.argmax(y_pred, axis=1)


# In[27]:

y_pred


# In[28]:

y_pred_true


# ### Cost function, and optimizer

# In[29]:

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y, name='xentropy')
cost = tf.reduce_mean(cross_entropy, name='xentropy_mean')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)


# ### Evaluating accuracy

# In[30]:

correct = tf.equal(y_true, y_pred_true)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))


# ### Running tensorflow's `Session()`

# In[31]:

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)


# ### `optimize()` and `print_accuracy()` function helper

# In[32]:

# Optimize helper
def optimize(num_iter=1):
    global iterations
    for i in tqdm(range(num_iter)):
        X_batch, y_batch = dataset.train.next_batch(batch_size)
        feed_dict_train = {X: X_batch,
                           y: y_batch,
                           keep_prob:dropout}
        sess.run(optimizer, feed_dict=feed_dict_train)
        iterations += 1
    print('Total number of iterations so far: {:,}'.format(iterations))

# Accuracy helper
def print_accuracy():
    X_batch, y_batch = dataset.test.next_batch(batch_size)
    feed_dict_test = {X: X_batch,
                      y: y_batch,
                      keep_prob: dropout}
    acc = sess.run(accuracy, feed_dict=feed_dict_test)
    print('Accuracy after {:,} iterations = {:.2%}'.format(iterations, acc))


# In[46]:

def print_validation_accuracy():
    X_batch, y_batch = dataset.validation.next_batch(batch_size)
    feed_dict_val = {X: X_batch,
                     y: y_batch,
                     keep_prob: dropout}
    acc = sess.run(accuracy, feed_dict=feed_dict_val)
    print('Accuracy on validation set: {:.2%}'.format(acc))

if __name__ == '__main__':
    # ### Training the network

    # In[33]:

    print_accuracy()


    # In[34]:

    optimize()


    # In[35]:

    print_accuracy()


    # In[36]:

    optimize(num_iter=9)


    # In[37]:

    optimize(num_iter=90)


    # In[38]:

    print_accuracy()


    # In[39]:

    optimize(num_iter=900)


    # In[40]:

    print_accuracy()


    # In[41]:

    optimize(num_iter=9000)


    # In[42]:

    print_accuracy()


    # In[43]:

    optimize(num_iter=100)


    # In[44]:

    optimize(num_iter=900)


    # In[45]:

    print_accuracy()


    # In[47]:

    print_validation_accuracy()

	# coding: utf-8

	# # 5-Layer Convnet for classifying the MNIST dataset

	# In[1]:

	# importing the dependencies
	import tensorflow as tf
	import numpy as np
	import matplotlib.pyplot as plt
	from tqdm import tqdm
	get_ipython().magic('matplotlib inline')


	# In[2]:

	# Loading the dataset
	from tensorflow.examples.tutorials.mnist import input_data
	dataset = input_data.read_data_sets('MNIST_data/', one_hot=True)


	# In[3]:

	print('Length of training data:{:>19,}'.format(dataset.train.num_examples))
	print('Length of testing data:{:>20,}'.format(dataset.test.num_examples))
	print('Length of validation data:{:>17,}'.format(dataset.validation.num_examples))


	# In[4]:

	# Create the true label of true classes
	dataset.train.true = np.argmax(dataset.train.labels, axis=1)
	dataset.test.true = np.argmax(dataset.test.labels, axis=1)
	dataset.validation.true = np.argmax(dataset.test.labels, axis=1)


	# In[5]:

	dataset.train.true[:4]


	# In[6]:

	dataset.test.true[:4]


	# In[7]:

	dataset.validation.true[:4]


	# ### Define hyperparameters

	# In[8]:

	# Image dimensions
	image_size = 28
	image_shape = (image_size, image_size)
	image_shape_flat = image_size * image_size
	num_channel = 1
	filter_size = 4
	# Hidden Layer Channels
	hidden1_channels = 8
	hidden2_channels = 16
	hidden3_channels = 32
	hidden4_channels = 64
	hidden5_channels = 128
	fully_connected_1 = 512
	fully_connected_2 = 256
	num_classes = 10
	learning_rate = 1e-3
	dropout = 0.8
	iterations = 0
	batch_size = 24


	# ### Helper functions for `weights`, `biases`, `conv2d`, & `max_pool`

	# In[9]:

	# Weight initialization
	def weight(shape):
	initial = tf.truncated_normal(shape=shape, stddev=0.05)
	return tf.Variable(initial)

	# Bias in initialization
	def bias(length):
	initial = tf.constant(value=0.05, shape=[length])
	return tf.Variable(initial)

	# Convolutional operation
	def conv2d(X, W):
	return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME')

	# Max pooling operation
	def max_pool(X):
	return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')


	# In[10]:

	# Placeholder Variables
	X = tf.placeholder(tf.float32, [None, image_shape_flat])
	y = tf.placeholder(tf.float32, [None, num_classes])
	keep_prob = tf.placeholder(tf.float32)
	y_true = tf.argmax(y, axis=1)


	# ## Building the Network
	# ### (1st Convolutional Layer + Max pooling) Input Layer `>` Hidden Layer 1

	# In[11]:

	X_image = tf.reshape(X, shape=[-1, image_size, image_size, num_channel])


	# In[12]:

	W_hidden1 = weight(shape=[filter_size, filter_size, num_channel, hidden1_channels])
	b_hidden1 = bias(length=hidden1_channels)
	h_conv1 = tf.nn.relu(conv2d(X_image, W_hidden1) + b_hidden1)
	h_pool1 = max_pool(h_conv1)


	# In[13]:

	h_pool1


	# ### (2nd Convolutional Layer + Max pooling) Hidden Layer 1 `>` Hidden Layer 2

	# In[14]:

	W_hidden2 = weight(shape=[filter_size, filter_size, hidden1_channels, hidden2_channels])
	b_hidden2 = bias(length=hidden2_channels)
	h_conv2 = tf.nn.relu(conv2d(h_pool1, W_hidden2) + b_hidden2)
	h_pool2 = max_pool(h_conv2)


	# In[15]:

	h_pool2


	# ### (3rd Convolutional Layer) Hidden Layer 2 `>` Hidden Layer 3

	# In[16]:

	W_hidden3 = weight(shape=[filter_size, filter_size, hidden2_channels, hidden3_channels])
	b_hidden3 = bias(length=hidden3_channels)
	h_conv3 = tf.nn.relu(conv2d(h_pool2, W_hidden3) + b_hidden3)


	# In[17]:

	h_conv3


	# ### (4th Convolutional Layer) Hidden Layer 3 `>` Hidden Layer 4

	# In[18]:

	W_hidden4 = weight(shape=[filter_size, filter_size, hidden3_channels, hidden4_channels])
	b_hidden4 = bias(length=hidden4_channels)
	h_conv4 = tf.nn.relu(conv2d(h_conv3, W_hidden4) + b_hidden4)


	# In[19]:

	h_conv4


	# ### (5th Convolutional Layer) Hidden Layer 4 `>` Hidden Layer 5

	# In[20]:

	W_hidden5 = weight(shape=[filter_size, filter_size, hidden4_channels, hidden5_channels])
	b_hidden5 = bias(length=hidden5_channels)
	h_conv5 = tf.nn.relu(conv2d(h_conv4, W_hidden5) + b_hidden5)


	# In[21]:

	h_conv5


	# ### (1st Fully Connected Layer) Hidden Layer 5 `>` Hidden Layer 6

	# In[22]:

	h_conv5_flat = tf.reshape(h_conv5, shape=[-1, 77hidden5_channels])
	W_fc1 = weight(shape=[77hidden5_channels, fully_connected_1])
	b_fc1 = bias(length=fully_connected_1)
	h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1)


	# In[23]:

	h_fc1


	# ### (2nd Fully Connected Layer + Dropout) Hidden Layer 6 `>` Hidden Layer 7

	# In[24]:

	W_fc2 = weight(shape=[fully_connected_1, fully_connected_2])
	b_fc2 = bias(length=fully_connected_2)
	h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
	h_drop = tf.nn.dropout(h_fc2, keep_prob=keep_prob)


	# In[25]:

	h_drop


	# ### Readout/Output Layer

	# In[26]:

	W_out = weight(shape=[fully_connected_2, num_classes])
	b_out = bias(length=num_classes)
	y_pred = tf.matmul(h_drop, W_out) + b_out
	y_pred_true = tf.argmax(y_pred, axis=1)


	# In[27]:

	y_pred


	# In[28]:

	y_pred_true


	# ### Cost function, and optimizer

	# In[29]:

	cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y, name='xentropy')
	cost = tf.reduce_mean(cross_entropy, name='xentropy_mean')
	optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)


	# ### Evaluating accuracy

	# In[30]:

	correct = tf.equal(y_true, y_pred_true)
	accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))


	# ### Running tensorflow's `Session()`

	# In[31]:

	sess = tf.Session()
	init = tf.global_variables_initializer()
	sess.run(init)


	# ### `optimize()` and `print_accuracy()` function helper

	# In[32]:

	# Optimize helper
	def optimize(num_iter=1):
	global iterations
	for i in tqdm(range(num_iter)):
	X_batch, y_batch = dataset.train.next_batch(batch_size)
	feed_dict_train = {X: X_batch,
	y: y_batch,
	keep_prob:dropout}
	sess.run(optimizer, feed_dict=feed_dict_train)
	iterations += 1
	print('Total number of iterations so far: {:,}'.format(iterations))

	# Accuracy helper
	def print_accuracy():
	X_batch, y_batch = dataset.test.next_batch(batch_size)
	feed_dict_test = {X: X_batch,
	y: y_batch,
	keep_prob: dropout}
	acc = sess.run(accuracy, feed_dict=feed_dict_test)
	print('Accuracy after {:,} iterations = {:.2%}'.format(iterations, acc))


	# In[46]:

	def print_validation_accuracy():
	X_batch, y_batch = dataset.validation.next_batch(batch_size)
	feed_dict_val = {X: X_batch,
	y: y_batch,
	keep_prob: dropout}
	acc = sess.run(accuracy, feed_dict=feed_dict_val)
	print('Accuracy on validation set: {:.2%}'.format(acc))

	if __name__ == '__main__':
	# ### Training the network

	# In[33]:

	print_accuracy()


	# In[34]:

	optimize()


	# In[35]:

	print_accuracy()


	# In[36]:

	optimize(num_iter=9)


	# In[37]:

	optimize(num_iter=90)


	# In[38]:

	print_accuracy()


	# In[39]:

	optimize(num_iter=900)


	# In[40]:

	print_accuracy()


	# In[41]:

	optimize(num_iter=9000)


	# In[42]:

	print_accuracy()


	# In[43]:

	optimize(num_iter=100)


	# In[44]:

	optimize(num_iter=900)


	# In[45]:

	print_accuracy()


	# In[47]:

	print_validation_accuracy()