Last active
September 6, 2017 18:58
-
-
Save bigsnarfdude/a0b18c2c346e197397ec7402146de5d1 to your computer and use it in GitHub Desktop.
[mnist] visualizing convnet tensorflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# %% Imports | |
get_ipython().magic(u'matplotlib inline') | |
import tensorflow as tf | |
import tensorflow.examples.tutorials.mnist.input_data as input_data | |
from libs.utils import * | |
import matplotlib.pyplot as plt | |
# In[2]: | |
# %% Setup input to the network and true output label. These are | |
# simply placeholders which we'll fill in later. | |
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True) | |
x = tf.placeholder(tf.float32, [None, 784]) | |
y = tf.placeholder(tf.float32, [None, 10]) | |
# In[3]: | |
# %% Since x is currently [batch, height*width], we need to reshape to a | |
# 4-D tensor to use it in a convolutional graph. If one component of | |
# `shape` is the special value -1, the size of that dimension is | |
# computed so that the total size remains constant. Since we haven't | |
# defined the batch dimension's shape yet, we use -1 to denote this | |
# dimension should not change size. | |
x_tensor = tf.reshape(x, [-1, 28, 28, 1]) | |
# In[4]: | |
# %% We'll setup the first convolutional layer | |
# Weight matrix is [height x width x input_channels x output_channels] | |
filter_size = 5 | |
n_filters_1 = 16 | |
W_conv1 = weight_variable([filter_size, filter_size, 1, n_filters_1]) | |
# In[5]: | |
# %% Bias is [output_channels] | |
b_conv1 = bias_variable([n_filters_1]) | |
# In[6]: | |
# %% Now we can build a graph which does the first layer of convolution: | |
# we define our stride as batch x height x width x channels | |
# instead of pooling, we use strides of 2 and more layers | |
# with smaller filters. | |
h_conv1 = tf.nn.relu( | |
tf.nn.conv2d(input=x_tensor, | |
filter=W_conv1, | |
strides=[1, 2, 2, 1], | |
padding='SAME') + | |
b_conv1) | |
# In[7]: | |
# %% And just like the first layer, add additional layers to create | |
# a deep net | |
n_filters_2 = 16 | |
W_conv2 = weight_variable([filter_size, filter_size, n_filters_1, n_filters_2]) | |
b_conv2 = bias_variable([n_filters_2]) | |
h_conv2 = tf.nn.relu( | |
tf.nn.conv2d(input=h_conv1, | |
filter=W_conv2, | |
strides=[1, 2, 2, 1], | |
padding='SAME') + | |
b_conv2) | |
# In[8]: | |
# %% We'll now reshape so we can connect to a fully-connected layer: | |
h_conv2_flat = tf.reshape(h_conv2, [-1, 7 * 7 * n_filters_2]) | |
# In[9]: | |
# %% Create a fully-connected layer: | |
n_fc = 1024 | |
W_fc1 = weight_variable([7 * 7 * n_filters_2, n_fc]) | |
b_fc1 = bias_variable([n_fc]) | |
h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1) | |
# In[10]: | |
# %% We can add dropout for regularizing and to reduce overfitting like so: | |
keep_prob = tf.placeholder(tf.float32) | |
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) | |
# In[11]: | |
# %% And finally our softmax layer: | |
W_fc2 = weight_variable([n_fc, 10]) | |
b_fc2 = bias_variable([10]) | |
y_pred = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) | |
# In[12]: | |
# %% Define loss/eval/training functions | |
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred)) | |
optimizer = tf.train.AdamOptimizer().minimize(cross_entropy) | |
# In[13]: | |
# %% Monitor accuracy | |
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)) | |
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) | |
# In[21]: | |
# %% We now create a new session to actually perform the initialization the | |
# variables: | |
sess = tf.Session() | |
sess.run(tf.initialize_all_variables()) | |
# In[22]: | |
# %% We'll train in minibatches and report accuracy: | |
batch_size = 100 | |
n_epochs = 5 | |
for epoch_i in range(n_epochs): | |
for batch_i in range(mnist.train.num_examples // batch_size): | |
batch_xs, batch_ys = mnist.train.next_batch(batch_size) | |
sess.run(optimizer, feed_dict={ | |
x: batch_xs, y: batch_ys, keep_prob: 0.5}) | |
print(sess.run(accuracy, | |
feed_dict={ | |
x: mnist.validation.images, | |
y: mnist.validation.labels, | |
keep_prob: 1.0 | |
})) | |
# In[23]: | |
# %% Let's take a look at the kernels we've learned | |
W = sess.run(W_conv1) | |
plt.imshow(montage(W / np.max(W)), cmap='coolwarm') | |
# In[25]: | |
# %% Let's take a look at the kernels we've learned | |
W = sess.run(W_conv2) | |
plt.imshow(montage(W / np.max(W)), cmap='coolwarm') | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
where is your code for montage?