nicewook/lab-11-2-mnist_deep_cnn_jhs.py

## lab-11-2-mnist_deep_cnn_jhs.py
# Lab 10 MNIST and Deep learning CNN
'''
모두의딥러닝 lab-11-2 기반으로 짜보았습니다.
생각한 컨셉은 다음과 같습니다.

1. training을 돌려서 accuracy가 70%가 넘으면 멈춘다
2. 예측이 틀린 train set의 label을 +10 해준다 - 별도로 분류해보겠다는 것이었습니다.
3. 모든 Weights, biases 를 초기화 한 다음 다시 training 한다
4. 이렇게 학습시킨 네트웍으로 test set을 검증한다 - 물론 검증시 10보다 큰 예측은 -10 해준다

결론부터 말하면 대실패.
그런데 신기한건
train set의 최종 accuracy는 .0.87인데
test set은 0.92로 많이 높게 나왔습니다.

'''
import tensorflow as tf
import numpy as np
import random
import matplotlib.pyplot as plt
np.set_printoptions(threshold=np.nan)

from tensorflow.examples.tutorials.mnist import input_data

tf.set_random_seed(777)  # reproducibility

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# Check out https://www.tensorflow.org/get_started/mnist/beginners for
# more information about the mnist dataset

# change label shape from (10000,10) to (10000,20)
print(mnist.train.images.shape)

train_images = mnist.train.images

test_images = mnist.test.images

train_labels = np.zeros((55000, 20))
train_labels[:,0:10] = mnist.train.labels

test_labels = np.zeros((10000, 20))
test_labels[:,0:10] = mnist.test.labels

print(train_images.shape)
print(train_labels.shape)
print(test_images.shape)
print(test_labels.shape)

# hyper parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

# dropout (keep_prob) rate  0.7~0.5 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)

# input place holders
X = tf.placeholder(tf.float32, [None, 784])
X_img = tf.reshape(X, [-1, 28, 28, 1])   # img 28x28x1 (black/white)
Y = tf.placeholder(tf.float32, [None, 20])

# L1 ImgIn shape=(?, 28, 28, 1)
W1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01))
#    Conv     -> (?, 28, 28, 32)
#    Pool     -> (?, 14, 14, 32)
L1 = tf.nn.conv2d(X_img, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1],
                    strides=[1, 2, 2, 1], padding='SAME')
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)
'''
Tensor("Conv2D:0", shape=(?, 28, 28, 32), dtype=float32)
Tensor("Relu:0", shape=(?, 28, 28, 32), dtype=float32)
Tensor("MaxPool:0", shape=(?, 14, 14, 32), dtype=float32)
Tensor("dropout/mul:0", shape=(?, 14, 14, 32), dtype=float32)
'''

# L2 ImgIn shape=(?, 14, 14, 32)
W2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01))
#    Conv      ->(?, 14, 14, 64)
#    Pool      ->(?, 7, 7, 64)
L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME')
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1],
                    strides=[1, 2, 2, 1], padding='SAME')
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)
'''
Tensor("Conv2D_1:0", shape=(?, 14, 14, 64), dtype=float32)
Tensor("Relu_1:0", shape=(?, 14, 14, 64), dtype=float32)
Tensor("MaxPool_1:0", shape=(?, 7, 7, 64), dtype=float32)
Tensor("dropout_1/mul:0", shape=(?, 7, 7, 64), dtype=float32)
'''

# L3 ImgIn shape=(?, 7, 7, 64)
W3 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01))
#    Conv      ->(?, 7, 7, 128)
#    Pool      ->(?, 4, 4, 128)
#    Reshape   ->(?, 4 * 4 * 128) # Flatten them for FC
L3 = tf.nn.conv2d(L2, W3, strides=[1, 1, 1, 1], padding='SAME')
L3 = tf.nn.relu(L3)
L3 = tf.nn.max_pool(L3, ksize=[1, 2, 2, 1], strides=[
                    1, 2, 2, 1], padding='SAME')
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)
L3 = tf.reshape(L3, [-1, 128 * 4 * 4])
'''
Tensor("Conv2D_2:0", shape=(?, 7, 7, 128), dtype=float32)
Tensor("Relu_2:0", shape=(?, 7, 7, 128), dtype=float32)
Tensor("MaxPool_2:0", shape=(?, 4, 4, 128), dtype=float32)
Tensor("dropout_2/mul:0", shape=(?, 4, 4, 128), dtype=float32)
Tensor("Reshape_1:0", shape=(?, 2048), dtype=float32)
'''

# L4 FC 4x4x128 inputs -> 625 outputs
W4 = tf.get_variable("W4", shape=[128 * 4 * 4, 625], initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([625]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)
'''
Tensor("Relu_3:0", shape=(?, 625), dtype=float32)
Tensor("dropout_3/mul:0", shape=(?, 625), dtype=float32)
'''

# L5 Final FC 625 inputs -> 10 outputs
W5 = tf.get_variable("W5", shape=[625, 20], initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([20]))
hypothesis = tf.matmul(L4, W5) + b5
'''
Tensor("add_1:0", shape=(?, 10), dtype=float32)
'''

# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

y_pred_temp = tf.argmax(hypothesis, 1)

# evaluate function
def evaluate(X_sample, y_sample, batch_size=100):
    """Run a minibatch accuracy op"""

    N = X_sample.shape[0]
    correct_sample = 0
    y_pred = []
    y_pred = np.array(y_pred)

    for i in range(0, N, batch_size):
        X_batch = X_sample[i: i + batch_size]
        y_batch = y_sample[i: i + batch_size]
        N_batch = X_batch.shape[0]

        feed = {
            X: X_batch,
            Y: y_batch,
            keep_prob: 1
        }

        correct_sample += sess.run(accuracy, feed_dict=feed) * N_batch
        y_temp = sess.run(y_pred_temp, feed_dict=feed)
        y_pred = np.append(y_pred, y_temp)

    #print("y_pred.shape:", y_pred.shape)

    return correct_sample / N, y_pred

def evaluate_test(X_sample, y_sample, batch_size=100):
    """Run a minibatch accuracy op"""

    N = X_sample.shape[0]
    correct_sample = 0
    y_pred = []
    y_pred = np.array(y_pred)

    for i in range(0, N, batch_size):
        X_batch = X_sample[i: i + batch_size]
        y_batch = y_sample[i: i + batch_size]
        N_batch = X_batch.shape[0]

        feed = {
            X: X_batch,
            Y: y_batch,
            keep_prob: 1
        }

        correct_sample += sess.run(accuracy, feed_dict=feed) * N_batch
        y_temp = sess.run(y_pred_temp, feed_dict=feed)
        y_temp[y_temp >= 10] -= 10
        y_pred = np.append(y_pred, y_temp)

    #print("y_pred.shape:", y_pred.shape)

    return correct_sample / N, y_pred

# Test model and check accuracy
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


# train my model
def train_model(accu_break=0.7):
    print('Learning stared. It takes sometime.')
    for epoch in range(training_epochs):
        avg_cost = 0
        total_batch = int(mnist.train.num_examples / batch_size)

        for i in range(total_batch):
            batch_xs = train_images[i: i + batch_size]
            batch_ys = train_labels[i: i + batch_size]
            feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
            c, _, = sess.run([cost, optimizer], feed_dict=feed_dict)
            avg_cost += c / total_batch

        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
        train_acc, temp = evaluate(train_images, train_labels)
        print("train_accuracy: ", train_acc)
        if train_acc > accu_break:
            return

# 1st train
train_model()

# re-labeling
_, pred_train_final =  evaluate(train_images, train_labels)
train_y_final = np.argmax(train_labels, 1)

mid_train_result = (pred_train_final == train_y_final)
#total_false = (mid_train_result == False)
#print("False num: ", len(total_false))
#print("mid_train_result: ", mid_train_result)

train_labels[mid_train_result==False, 10:20] = train_labels[mid_train_result==False, 0:10]
train_labels[mid_train_result==False, 0:10] = 0
print("train_labels[mid_train_result==False, :].shape", train_labels[mid_train_result==False, :].shape)


# 2nd train - without accu_break
print("W1", sess.run(W1[1,:]))
sess.run(tf.global_variables_initializer())
print("W1", sess.run(W1[1,:]))
train_model(accu_break=1.0)


print('\nLearning Finished!')

train_a, train_p = evaluate(train_images, train_labels)
test_a, test_p = evaluate_test(test_images, test_labels)

print("pred num over 10 in train", tf.reduce_sum(tf.cast(train_p >= 10, tf.float32)))
print("pred num over 10 in test", tf.reduce_sum(tf.cast(test_p >= 10, tf.float32)))


print("final train_accuracy: ", train_a )
print("final test_accuracy: ", test_a )
#print("final test_pred: ", test_p)


# plt.imshow(mnist.test.images[r:r + 1].
#           reshape(28, 28), cmap='Greys', interpolation='nearest')
# plt.show()

'''
Learning stared. It takes sometime.
Epoch: 0001 cost = 0.385748474
Epoch: 0002 cost = 0.092017397
Epoch: 0003 cost = 0.065854684
Epoch: 0004 cost = 0.055604566
Epoch: 0005 cost = 0.045996377
Epoch: 0006 cost = 0.040913645
Epoch: 0007 cost = 0.036924479
Epoch: 0008 cost = 0.032808939
Epoch: 0009 cost = 0.031791007
Epoch: 0010 cost = 0.030224456
Epoch: 0011 cost = 0.026849916
Epoch: 0012 cost = 0.026826763
Epoch: 0013 cost = 0.027188021
Epoch: 0014 cost = 0.023604777
Epoch: 0015 cost = 0.024607201
Learning Finished!
Accuracy: 0.9938
'''
	# Lab 10 MNIST and Deep learning CNN
	'''
	모두의딥러닝 lab-11-2 기반으로 짜보았습니다.
	생각한 컨셉은 다음과 같습니다.

	1. training을 돌려서 accuracy가 70%가 넘으면 멈춘다
	2. 예측이 틀린 train set의 label을 +10 해준다 - 별도로 분류해보겠다는 것이었습니다.
	3. 모든 Weights, biases 를 초기화 한 다음 다시 training 한다
	4. 이렇게 학습시킨 네트웍으로 test set을 검증한다 - 물론 검증시 10보다 큰 예측은 -10 해준다

	결론부터 말하면 대실패.
	그런데 신기한건
	train set의 최종 accuracy는 .0.87인데
	test set은 0.92로 많이 높게 나왔습니다.

	'''
	import tensorflow as tf
	import numpy as np
	import random
	import matplotlib.pyplot as plt
	np.set_printoptions(threshold=np.nan)

	from tensorflow.examples.tutorials.mnist import input_data

	tf.set_random_seed(777) # reproducibility

	mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
	# Check out https://www.tensorflow.org/get_started/mnist/beginners for
	# more information about the mnist dataset

	# change label shape from (10000,10) to (10000,20)
	print(mnist.train.images.shape)

	train_images = mnist.train.images

	test_images = mnist.test.images

	train_labels = np.zeros((55000, 20))
	train_labels[:,0:10] = mnist.train.labels

	test_labels = np.zeros((10000, 20))
	test_labels[:,0:10] = mnist.test.labels

	print(train_images.shape)
	print(train_labels.shape)
	print(test_images.shape)
	print(test_labels.shape)

	# hyper parameters
	learning_rate = 0.001
	training_epochs = 15
	batch_size = 100

	# dropout (keep_prob) rate 0.7~0.5 on training, but should be 1 for testing
	keep_prob = tf.placeholder(tf.float32)

	# input place holders
	X = tf.placeholder(tf.float32, [None, 784])
	X_img = tf.reshape(X, [-1, 28, 28, 1]) # img 28x28x1 (black/white)
	Y = tf.placeholder(tf.float32, [None, 20])

	# L1 ImgIn shape=(?, 28, 28, 1)
	W1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01))
	# Conv -> (?, 28, 28, 32)
	# Pool -> (?, 14, 14, 32)
	L1 = tf.nn.conv2d(X_img, W1, strides=[1, 1, 1, 1], padding='SAME')
	L1 = tf.nn.relu(L1)
	L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1],
	strides=[1, 2, 2, 1], padding='SAME')
	L1 = tf.nn.dropout(L1, keep_prob=keep_prob)
	'''
	Tensor("Conv2D:0", shape=(?, 28, 28, 32), dtype=float32)
	Tensor("Relu:0", shape=(?, 28, 28, 32), dtype=float32)
	Tensor("MaxPool:0", shape=(?, 14, 14, 32), dtype=float32)
	Tensor("dropout/mul:0", shape=(?, 14, 14, 32), dtype=float32)
	'''

	# L2 ImgIn shape=(?, 14, 14, 32)
	W2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01))
	# Conv ->(?, 14, 14, 64)
	# Pool ->(?, 7, 7, 64)
	L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME')
	L2 = tf.nn.relu(L2)
	L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1],
	strides=[1, 2, 2, 1], padding='SAME')
	L2 = tf.nn.dropout(L2, keep_prob=keep_prob)
	'''
	Tensor("Conv2D_1:0", shape=(?, 14, 14, 64), dtype=float32)
	Tensor("Relu_1:0", shape=(?, 14, 14, 64), dtype=float32)
	Tensor("MaxPool_1:0", shape=(?, 7, 7, 64), dtype=float32)
	Tensor("dropout_1/mul:0", shape=(?, 7, 7, 64), dtype=float32)
	'''

	# L3 ImgIn shape=(?, 7, 7, 64)
	W3 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01))
	# Conv ->(?, 7, 7, 128)
	# Pool ->(?, 4, 4, 128)
	# Reshape ->(?, 4 * 4 * 128) # Flatten them for FC
	L3 = tf.nn.conv2d(L2, W3, strides=[1, 1, 1, 1], padding='SAME')
	L3 = tf.nn.relu(L3)
	L3 = tf.nn.max_pool(L3, ksize=[1, 2, 2, 1], strides=[
	1, 2, 2, 1], padding='SAME')
	L3 = tf.nn.dropout(L3, keep_prob=keep_prob)
	L3 = tf.reshape(L3, [-1, 128 * 4 * 4])
	'''
	Tensor("Conv2D_2:0", shape=(?, 7, 7, 128), dtype=float32)
	Tensor("Relu_2:0", shape=(?, 7, 7, 128), dtype=float32)
	Tensor("MaxPool_2:0", shape=(?, 4, 4, 128), dtype=float32)
	Tensor("dropout_2/mul:0", shape=(?, 4, 4, 128), dtype=float32)
	Tensor("Reshape_1:0", shape=(?, 2048), dtype=float32)
	'''

	# L4 FC 4x4x128 inputs -> 625 outputs
	W4 = tf.get_variable("W4", shape=[128 * 4 * 4, 625], initializer=tf.contrib.layers.xavier_initializer())
	b4 = tf.Variable(tf.random_normal([625]))
	L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
	L4 = tf.nn.dropout(L4, keep_prob=keep_prob)
	'''
	Tensor("Relu_3:0", shape=(?, 625), dtype=float32)
	Tensor("dropout_3/mul:0", shape=(?, 625), dtype=float32)
	'''

	# L5 Final FC 625 inputs -> 10 outputs
	W5 = tf.get_variable("W5", shape=[625, 20], initializer=tf.contrib.layers.xavier_initializer())
	b5 = tf.Variable(tf.random_normal([20]))
	hypothesis = tf.matmul(L4, W5) + b5
	'''
	Tensor("add_1:0", shape=(?, 10), dtype=float32)
	'''

	# define cost/loss & optimizer
	cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
	optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

	# initialize
	sess = tf.Session()
	sess.run(tf.global_variables_initializer())

	y_pred_temp = tf.argmax(hypothesis, 1)

	# evaluate function
	def evaluate(X_sample, y_sample, batch_size=100):
	"""Run a minibatch accuracy op"""

	N = X_sample.shape[0]
	correct_sample = 0
	y_pred = []
	y_pred = np.array(y_pred)

	for i in range(0, N, batch_size):
	X_batch = X_sample[i: i + batch_size]
	y_batch = y_sample[i: i + batch_size]
	N_batch = X_batch.shape[0]

	feed = {
	X: X_batch,
	Y: y_batch,
	keep_prob: 1
	}

	correct_sample += sess.run(accuracy, feed_dict=feed) * N_batch
	y_temp = sess.run(y_pred_temp, feed_dict=feed)
	y_pred = np.append(y_pred, y_temp)

	#print("y_pred.shape:", y_pred.shape)

	return correct_sample / N, y_pred

	def evaluate_test(X_sample, y_sample, batch_size=100):
	"""Run a minibatch accuracy op"""

	N = X_sample.shape[0]
	correct_sample = 0
	y_pred = []
	y_pred = np.array(y_pred)

	for i in range(0, N, batch_size):
	X_batch = X_sample[i: i + batch_size]
	y_batch = y_sample[i: i + batch_size]
	N_batch = X_batch.shape[0]

	feed = {
	X: X_batch,
	Y: y_batch,
	keep_prob: 1
	}

	correct_sample += sess.run(accuracy, feed_dict=feed) * N_batch
	y_temp = sess.run(y_pred_temp, feed_dict=feed)
	y_temp[y_temp >= 10] -= 10
	y_pred = np.append(y_pred, y_temp)

	#print("y_pred.shape:", y_pred.shape)

	return correct_sample / N, y_pred

	# Test model and check accuracy
	correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


	# train my model
	def train_model(accu_break=0.7):
	print('Learning stared. It takes sometime.')
	for epoch in range(training_epochs):
	avg_cost = 0
	total_batch = int(mnist.train.num_examples / batch_size)

	for i in range(total_batch):
	batch_xs = train_images[i: i + batch_size]
	batch_ys = train_labels[i: i + batch_size]
	feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
	c, _, = sess.run([cost, optimizer], feed_dict=feed_dict)
	avg_cost += c / total_batch

	print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
	train_acc, temp = evaluate(train_images, train_labels)
	print("train_accuracy: ", train_acc)
	if train_acc > accu_break:
	return

	# 1st train
	train_model()

	# re-labeling
	_, pred_train_final = evaluate(train_images, train_labels)
	train_y_final = np.argmax(train_labels, 1)

	mid_train_result = (pred_train_final == train_y_final)
	#total_false = (mid_train_result == False)
	#print("False num: ", len(total_false))
	#print("mid_train_result: ", mid_train_result)

	train_labels[mid_train_result==False, 10:20] = train_labels[mid_train_result==False, 0:10]
	train_labels[mid_train_result==False, 0:10] = 0
	print("train_labels[mid_train_result==False, :].shape", train_labels[mid_train_result==False, :].shape)


	# 2nd train - without accu_break
	print("W1", sess.run(W1[1,:]))
	sess.run(tf.global_variables_initializer())
	print("W1", sess.run(W1[1,:]))
	train_model(accu_break=1.0)



	print('\nLearning Finished!')

	train_a, train_p = evaluate(train_images, train_labels)
	test_a, test_p = evaluate_test(test_images, test_labels)

	print("pred num over 10 in train", tf.reduce_sum(tf.cast(train_p >= 10, tf.float32)))
	print("pred num over 10 in test", tf.reduce_sum(tf.cast(test_p >= 10, tf.float32)))


	print("final train_accuracy: ", train_a )
	print("final test_accuracy: ", test_a )
	#print("final test_pred: ", test_p)


	# plt.imshow(mnist.test.images[r:r + 1].
	# reshape(28, 28), cmap='Greys', interpolation='nearest')
	# plt.show()

	'''
	Learning stared. It takes sometime.
	Epoch: 0001 cost = 0.385748474
	Epoch: 0002 cost = 0.092017397
	Epoch: 0003 cost = 0.065854684
	Epoch: 0004 cost = 0.055604566
	Epoch: 0005 cost = 0.045996377
	Epoch: 0006 cost = 0.040913645
	Epoch: 0007 cost = 0.036924479
	Epoch: 0008 cost = 0.032808939
	Epoch: 0009 cost = 0.031791007
	Epoch: 0010 cost = 0.030224456
	Epoch: 0011 cost = 0.026849916
	Epoch: 0012 cost = 0.026826763
	Epoch: 0013 cost = 0.027188021
	Epoch: 0014 cost = 0.023604777
	Epoch: 0015 cost = 0.024607201
	Learning Finished!
	Accuracy: 0.9938
	'''