Created
April 21, 2017 05:23
-
-
Save nicewook/44a57e24e46dd531681f973b433b7fd0 to your computer and use it in GitHub Desktop.
MNIST challenge of TFKR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Lab 10 MNIST and Deep learning CNN | |
''' | |
모두의딥러닝 lab-11-2 기반으로 짜보았습니다. | |
생각한 컨셉은 다음과 같습니다. | |
1. training을 돌려서 accuracy가 70%가 넘으면 멈춘다 | |
2. 예측이 틀린 train set의 label을 +10 해준다 - 별도로 분류해보겠다는 것이었습니다. | |
3. 모든 Weights, biases 를 초기화 한 다음 다시 training 한다 | |
4. 이렇게 학습시킨 네트웍으로 test set을 검증한다 - 물론 검증시 10보다 큰 예측은 -10 해준다 | |
결론부터 말하면 대실패. | |
그런데 신기한건 | |
train set의 최종 accuracy는 .0.87인데 | |
test set은 0.92로 많이 높게 나왔습니다. | |
''' | |
import tensorflow as tf | |
import numpy as np | |
import random | |
import matplotlib.pyplot as plt | |
np.set_printoptions(threshold=np.nan) | |
from tensorflow.examples.tutorials.mnist import input_data | |
tf.set_random_seed(777) # reproducibility | |
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) | |
# Check out https://www.tensorflow.org/get_started/mnist/beginners for | |
# more information about the mnist dataset | |
# change label shape from (10000,10) to (10000,20) | |
print(mnist.train.images.shape) | |
train_images = mnist.train.images | |
test_images = mnist.test.images | |
train_labels = np.zeros((55000, 20)) | |
train_labels[:,0:10] = mnist.train.labels | |
test_labels = np.zeros((10000, 20)) | |
test_labels[:,0:10] = mnist.test.labels | |
print(train_images.shape) | |
print(train_labels.shape) | |
print(test_images.shape) | |
print(test_labels.shape) | |
# hyper parameters | |
learning_rate = 0.001 | |
training_epochs = 15 | |
batch_size = 100 | |
# dropout (keep_prob) rate 0.7~0.5 on training, but should be 1 for testing | |
keep_prob = tf.placeholder(tf.float32) | |
# input place holders | |
X = tf.placeholder(tf.float32, [None, 784]) | |
X_img = tf.reshape(X, [-1, 28, 28, 1]) # img 28x28x1 (black/white) | |
Y = tf.placeholder(tf.float32, [None, 20]) | |
# L1 ImgIn shape=(?, 28, 28, 1) | |
W1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01)) | |
# Conv -> (?, 28, 28, 32) | |
# Pool -> (?, 14, 14, 32) | |
L1 = tf.nn.conv2d(X_img, W1, strides=[1, 1, 1, 1], padding='SAME') | |
L1 = tf.nn.relu(L1) | |
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1], | |
strides=[1, 2, 2, 1], padding='SAME') | |
L1 = tf.nn.dropout(L1, keep_prob=keep_prob) | |
''' | |
Tensor("Conv2D:0", shape=(?, 28, 28, 32), dtype=float32) | |
Tensor("Relu:0", shape=(?, 28, 28, 32), dtype=float32) | |
Tensor("MaxPool:0", shape=(?, 14, 14, 32), dtype=float32) | |
Tensor("dropout/mul:0", shape=(?, 14, 14, 32), dtype=float32) | |
''' | |
# L2 ImgIn shape=(?, 14, 14, 32) | |
W2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01)) | |
# Conv ->(?, 14, 14, 64) | |
# Pool ->(?, 7, 7, 64) | |
L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME') | |
L2 = tf.nn.relu(L2) | |
L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1], | |
strides=[1, 2, 2, 1], padding='SAME') | |
L2 = tf.nn.dropout(L2, keep_prob=keep_prob) | |
''' | |
Tensor("Conv2D_1:0", shape=(?, 14, 14, 64), dtype=float32) | |
Tensor("Relu_1:0", shape=(?, 14, 14, 64), dtype=float32) | |
Tensor("MaxPool_1:0", shape=(?, 7, 7, 64), dtype=float32) | |
Tensor("dropout_1/mul:0", shape=(?, 7, 7, 64), dtype=float32) | |
''' | |
# L3 ImgIn shape=(?, 7, 7, 64) | |
W3 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01)) | |
# Conv ->(?, 7, 7, 128) | |
# Pool ->(?, 4, 4, 128) | |
# Reshape ->(?, 4 * 4 * 128) # Flatten them for FC | |
L3 = tf.nn.conv2d(L2, W3, strides=[1, 1, 1, 1], padding='SAME') | |
L3 = tf.nn.relu(L3) | |
L3 = tf.nn.max_pool(L3, ksize=[1, 2, 2, 1], strides=[ | |
1, 2, 2, 1], padding='SAME') | |
L3 = tf.nn.dropout(L3, keep_prob=keep_prob) | |
L3 = tf.reshape(L3, [-1, 128 * 4 * 4]) | |
''' | |
Tensor("Conv2D_2:0", shape=(?, 7, 7, 128), dtype=float32) | |
Tensor("Relu_2:0", shape=(?, 7, 7, 128), dtype=float32) | |
Tensor("MaxPool_2:0", shape=(?, 4, 4, 128), dtype=float32) | |
Tensor("dropout_2/mul:0", shape=(?, 4, 4, 128), dtype=float32) | |
Tensor("Reshape_1:0", shape=(?, 2048), dtype=float32) | |
''' | |
# L4 FC 4x4x128 inputs -> 625 outputs | |
W4 = tf.get_variable("W4", shape=[128 * 4 * 4, 625], initializer=tf.contrib.layers.xavier_initializer()) | |
b4 = tf.Variable(tf.random_normal([625])) | |
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4) | |
L4 = tf.nn.dropout(L4, keep_prob=keep_prob) | |
''' | |
Tensor("Relu_3:0", shape=(?, 625), dtype=float32) | |
Tensor("dropout_3/mul:0", shape=(?, 625), dtype=float32) | |
''' | |
# L5 Final FC 625 inputs -> 10 outputs | |
W5 = tf.get_variable("W5", shape=[625, 20], initializer=tf.contrib.layers.xavier_initializer()) | |
b5 = tf.Variable(tf.random_normal([20])) | |
hypothesis = tf.matmul(L4, W5) + b5 | |
''' | |
Tensor("add_1:0", shape=(?, 10), dtype=float32) | |
''' | |
# define cost/loss & optimizer | |
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y)) | |
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) | |
# initialize | |
sess = tf.Session() | |
sess.run(tf.global_variables_initializer()) | |
y_pred_temp = tf.argmax(hypothesis, 1) | |
# evaluate function | |
def evaluate(X_sample, y_sample, batch_size=100): | |
"""Run a minibatch accuracy op""" | |
N = X_sample.shape[0] | |
correct_sample = 0 | |
y_pred = [] | |
y_pred = np.array(y_pred) | |
for i in range(0, N, batch_size): | |
X_batch = X_sample[i: i + batch_size] | |
y_batch = y_sample[i: i + batch_size] | |
N_batch = X_batch.shape[0] | |
feed = { | |
X: X_batch, | |
Y: y_batch, | |
keep_prob: 1 | |
} | |
correct_sample += sess.run(accuracy, feed_dict=feed) * N_batch | |
y_temp = sess.run(y_pred_temp, feed_dict=feed) | |
y_pred = np.append(y_pred, y_temp) | |
#print("y_pred.shape:", y_pred.shape) | |
return correct_sample / N, y_pred | |
def evaluate_test(X_sample, y_sample, batch_size=100): | |
"""Run a minibatch accuracy op""" | |
N = X_sample.shape[0] | |
correct_sample = 0 | |
y_pred = [] | |
y_pred = np.array(y_pred) | |
for i in range(0, N, batch_size): | |
X_batch = X_sample[i: i + batch_size] | |
y_batch = y_sample[i: i + batch_size] | |
N_batch = X_batch.shape[0] | |
feed = { | |
X: X_batch, | |
Y: y_batch, | |
keep_prob: 1 | |
} | |
correct_sample += sess.run(accuracy, feed_dict=feed) * N_batch | |
y_temp = sess.run(y_pred_temp, feed_dict=feed) | |
y_temp[y_temp >= 10] -= 10 | |
y_pred = np.append(y_pred, y_temp) | |
#print("y_pred.shape:", y_pred.shape) | |
return correct_sample / N, y_pred | |
# Test model and check accuracy | |
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1)) | |
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) | |
# train my model | |
def train_model(accu_break=0.7): | |
print('Learning stared. It takes sometime.') | |
for epoch in range(training_epochs): | |
avg_cost = 0 | |
total_batch = int(mnist.train.num_examples / batch_size) | |
for i in range(total_batch): | |
batch_xs = train_images[i: i + batch_size] | |
batch_ys = train_labels[i: i + batch_size] | |
feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7} | |
c, _, = sess.run([cost, optimizer], feed_dict=feed_dict) | |
avg_cost += c / total_batch | |
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost)) | |
train_acc, temp = evaluate(train_images, train_labels) | |
print("train_accuracy: ", train_acc) | |
if train_acc > accu_break: | |
return | |
# 1st train | |
train_model() | |
# re-labeling | |
_, pred_train_final = evaluate(train_images, train_labels) | |
train_y_final = np.argmax(train_labels, 1) | |
mid_train_result = (pred_train_final == train_y_final) | |
#total_false = (mid_train_result == False) | |
#print("False num: ", len(total_false)) | |
#print("mid_train_result: ", mid_train_result) | |
train_labels[mid_train_result==False, 10:20] = train_labels[mid_train_result==False, 0:10] | |
train_labels[mid_train_result==False, 0:10] = 0 | |
print("train_labels[mid_train_result==False, :].shape", train_labels[mid_train_result==False, :].shape) | |
# 2nd train - without accu_break | |
print("W1", sess.run(W1[1,:])) | |
sess.run(tf.global_variables_initializer()) | |
print("W1", sess.run(W1[1,:])) | |
train_model(accu_break=1.0) | |
print('\nLearning Finished!') | |
train_a, train_p = evaluate(train_images, train_labels) | |
test_a, test_p = evaluate_test(test_images, test_labels) | |
print("pred num over 10 in train", tf.reduce_sum(tf.cast(train_p >= 10, tf.float32))) | |
print("pred num over 10 in test", tf.reduce_sum(tf.cast(test_p >= 10, tf.float32))) | |
print("final train_accuracy: ", train_a ) | |
print("final test_accuracy: ", test_a ) | |
#print("final test_pred: ", test_p) | |
# plt.imshow(mnist.test.images[r:r + 1]. | |
# reshape(28, 28), cmap='Greys', interpolation='nearest') | |
# plt.show() | |
''' | |
Learning stared. It takes sometime. | |
Epoch: 0001 cost = 0.385748474 | |
Epoch: 0002 cost = 0.092017397 | |
Epoch: 0003 cost = 0.065854684 | |
Epoch: 0004 cost = 0.055604566 | |
Epoch: 0005 cost = 0.045996377 | |
Epoch: 0006 cost = 0.040913645 | |
Epoch: 0007 cost = 0.036924479 | |
Epoch: 0008 cost = 0.032808939 | |
Epoch: 0009 cost = 0.031791007 | |
Epoch: 0010 cost = 0.030224456 | |
Epoch: 0011 cost = 0.026849916 | |
Epoch: 0012 cost = 0.026826763 | |
Epoch: 0013 cost = 0.027188021 | |
Epoch: 0014 cost = 0.023604777 | |
Epoch: 0015 cost = 0.024607201 | |
Learning Finished! | |
Accuracy: 0.9938 | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment