Last active April 29, 2018 04:39
# coding: utf-8
# ### Homework 4
# #### Problem 3 -- Autoencoder
# ###### John Evans
# ###### 4/26/18
# ##### Imports
# In[ ]:
from tqdm import tqdm
import numpy as np
import cv2
import sys
import os
# Suppress Tensorflow outputs
if len(sys.argv) > 2:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = sys.argv[2]
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# comment out to use GPU
#os.environ['CUDA_VISIBLE_DEVICES'] = 'all'
# add path to
#os.environ['LD_LIBRARY_PATH'] = '/usr/local/nvidia/lib64:'
#os.environ['USE_CUDA_PATH'] = '/usr/local/nvidia/lib64:'
#os.environ['LIBRARY_PATH'] = '/usr/local/nvidia/lib64:'
#os.environ['PATH'] = '/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/nvidia/lib64:$PATH'
from sklearn.model_selection import train_test_split
import tensorflow as tf
# ##### Data preparation functions
# In[ ]:
# Import images from files
def import_images(wd):
images = []
for file in tqdm(os.listdir(wd)):
if num_channels == 1:
image = cv2.imread(os.path.join(wd, file), 0)
image = cv2.imread(os.path.join(wd, file))
if image is not None:
return images
# Process images (make square, resize)
def process_images(images, img_size, num_channels):
min_dim = img_size
new_images = []
for im in tqdm(images):
old_size = im.shape
ratio = float(min_dim)/max(old_size)
new_size = tuple([int(x*ratio) for x in old_size])
im = cv2.resize(im, (new_size[1], new_size[0]))
delta_w = min_dim - new_size[1]
delta_h = min_dim - new_size[0]
top, bottom = delta_h//2, delta_h-(delta_h//2)
left, right = delta_w//2, delta_w-(delta_w//2)
color = [0, 0, 0]
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
return new_images
# ##### Layer and model functions
# In[ ]:
# Convolution Layer
def convolution_layer(name, data, kernel_shape, strides=[1, 1, 1, 1]):
with tf.name_scope(name):
W = tf.get_variable(name='w_' + name, shape=kernel_shape, initializer=tf.contrib.layers.xavier_initializer(uniform=False))
b = tf.get_variable(name='b_' + name, shape=[kernel_shape[3]], initializer=tf.contrib.layers.xavier_initializer(uniform=False))
out = tf.nn.conv2d(data, W, strides=strides, padding='SAME')
out = tf.nn.bias_add(out, b)
return tf.nn.relu(out)
# Pooling Layer
def pooling_layer(name, data, kernel_shape=[1, 2, 2, 1], strides=[1, 2, 2, 1]):
with tf.name_scope(name):
return tf.nn.max_pool(data, ksize=kernel_shape, strides=strides, padding='SAME')
# Dropout Layer
def dropout_layer(name, data, keep_rate):
with tf.name_scope(name):
return tf.nn.dropout(data, keep_rate)
# Fully Connected Layer
def fully_connected_layer(name, data, nodes, has_color = False):
with tf.name_scope(name):
input_size = data.shape[1:]
input_size = int(
W = tf.get_variable(name='w_' + name, shape=[input_size, nodes], initializer=tf.contrib.layers.xavier_initializer(uniform=False))
b = tf.get_variable(name='b_'+name, shape=[nodes], initializer=tf.contrib.layers.xavier_initializer(uniform=False))
if has_color:
data = tf.reshape(data, [-1, input_size, 3])
data = tf.reshape(data, [-1, input_size])
return tf.nn.relu(tf.add(tf.matmul(data, W), b))
# Deconvolution Layer
def deconvolution_layer(name, data, num_outputs, kernel_shape, strides=[1, 1]):
with tf.name_scope(name):
kwargs = {'num_outputs':num_outputs, 'kernel_size':kernel_shape, 'stride':strides, 'padding':'SAME',
'biases_initializer':tf.contrib.layers.xavier_initializer(uniform=False), 'activation_fn':tf.nn.relu}
return tf.contrib.layers.conv2d_transpose(data, **kwargs)
# Upsample Layer
def upsample_layer(name, data, scale_factor=[2,2]):
with tf.name_scope(name):
size = [int(data.shape[1] * scale_factor[0]), int(data.shape[2] * scale_factor[1])]
return tf.image.resize_bilinear(data, size=size, align_corners=None, name=None)
# Network model from layers
def model(name, data, img_size=128):
img_size % 2 must = 0
We want to get dimensionality reduction of 16384 to 4096
reshape1 --> 128, 128 (16384)
conv --> kernel size: (5,5), n_filters:25 ???make it small so that it runs fast
pool --> 64, 64, 25
dropout1 --> keeprate 0.8
reshape2 --> 64*64*25
fc1 --> 64*64*25, 64*64*5
dropout2 --> keeprate 0.8
fc2 --> 64*64*5, 64*64 --> output is the encoder vars
fc3 --> 64*64, 64*64*5
dropout3 --> keeprate 0.8
fc4 --> 64*64*5, 64*64*25
dropout4 --> keeprate 0.8
reshape3 --> 64, 64, 25
deconv --> kernel size:(5,5,25), n_filters: 25
upsample --> 128, 128, 25
fc5 --> 128*128*25, 128*128
assert img_size % 2 == 0
red_img_size = int(img_size / 2)
with tf.name_scope(name):
# Encode data
reshape1 = tf.reshape(data, shape=[-1, img_size, img_size, 1])
conv = convolution_layer('conv', reshape1, [5,5,1,25])
pool = pooling_layer('pool', conv)
dropout1 = dropout_layer('dropout1', pool, 0.8)
reshape2 = tf.reshape(dropout1, shape=[-1, 25*red_img_size**2])
fc1 = fully_connected_layer('fc1', reshape2, 5*red_img_size**2)
dropout2 = dropout_layer('dropout2', fc1, 0.8)
fc2 = fully_connected_layer('fc2', dropout2, red_img_size**2)
# Decode encoded data
fc3 = fully_connected_layer('fc3', fc2, 5*red_img_size**2)
dropout3 = dropout_layer('dropout3', fc3, 0.8)
fc4 = fully_connected_layer('fc4', dropout3, 25*red_img_size**2)
dropout4 = dropout_layer('dropout4', fc4, 0.8)
reshape3 = tf.reshape(dropout4, shape=[-1, red_img_size, red_img_size, 25])
deconv = deconvolution_layer('deconv', reshape3, 25, [5,5])
upsample = upsample_layer('upsample', deconv)
fc5 = fully_connected_layer('fc5', upsample, img_size**2, has_color=True)
# Get difference after encoding/decoding process
with tf.name_scope('cost'):
cost = tf.reduce_mean(tf.square(tf.subtract(fc5, data)))
return fc5, cost
# ##### Training functions
# In[ ]:
def next_batch(images, labels, start, batch_size):
end = start + batch_size
if end > len(images):
# After each epoch we update this
start = 0
end = batch_size
assert batch_size <= len(images)
return end, images[start:end], labels[start:end]
def train_network(x, n_epochs=5, gpu_mem_limit=None):
prediction, cost = model('ConvAutoEnc', x)
with tf.name_scope('opt'):
optimizer = tf.train.AdamOptimizer().minimize(cost)
# Create a summary to monitor cost tensor
tf.summary.scalar("cost", cost)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()
if gpu_mem_limit is None:
kwargs = {}
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_mem_limit)
kwargs = {'config':tf.ConfigProto(gpu_options=gpu_options)}
with tf.Session(**kwargs) as sess:
# create log writer object
writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
train_start = 0
#test_start = 0
for epoch in tqdm(range(n_epochs)):
avg_cost = 0
n_batches = int(len(train_data) / batch_size)
# Loop over all batches
for i in tqdm(range(n_batches)):
train_start, batch_x, batch_y = next_batch(train_data, train_labels, train_start, batch_size)
#test_start, x_valid_batch, y_valid_batch = next_batch(test_data, test_labels, test_start, batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c, summary =[optimizer, cost, merged_summary_op], feed_dict={x: batch_x, y: batch_y})
# Compute average loss
avg_cost += c / n_batches
# write log
writer.add_summary(summary, epoch * n_batches + i)
# Display logs per epoch step
print('Epoch', epoch+1, ' / ', n_epochs, 'cost:', avg_cost)
print('Optimization Finished')
print('Cost:', cost.eval({x: test_data}))
# ##### Build and train network
# In[ ]:
validation_size = 0.15
img_size = 128
num_channels = 1
items = []
labels = []
types = ["daisy", "dandelion", "rose", "sunflower", "tulip"]
num_classes = len(types)
map = {"daisy":[1,0,0,0,0], "dandelion":[0,1,0,0,0], "rose":[0,0,1,0,0], "sunflower":[0,0,0,1,0], "tulip":[0,0,0,0,1]}
if sys.argv[1][:-1] != '/':
sys.argv[1] = ''.join([sys.argv[1],'/'])
for t in types:
images = np.array(import_images(''.join([sys.argv[1],t])))
images = process_images(images, img_size, num_channels)
for i in images:
images = np.array(items)
images = np.reshape(images, (len(images),img_size**2, num_channels))
labels = np.array(labels)
# Split training and testing
train_data, test_data, train_labels, test_labels = train_test_split(images, labels, test_size=validation_size)
n_classes = 5
batch_size = 100
x = tf.placeholder(tf.float32, [None, img_size**2,num_channels], name='InputData')
y = tf.placeholder(tf.float32, [None, n_classes], name='LabelData')
