tomokishii/Autoencoders.md

## Autoencoders.md

      
    Raw
  

              Autoencoders.md
            
          
    README.md

These codes are TensorFlow Autoencoder implementation examples.
They are inspired by very educational Keras Blog article.
http://blog.keras.io/building-autoencoders-in-keras.html

Building Autoencodes in Keras
"Autoencoding" is a data compression algorithm where the compression and decompression
functions are 1) data-specific, 2) lossy, and 3) learned automatically from examples
rather than engineered by a human. Additionally, in almost all contexts where the term
"autoencoder" is used, the compression and decompression functions are implemented with neural networks.

At this time, I use "TensorFlow" to learn how to use tf.nn.conv2d_transpose().
Actually, this TensorFlow API is different from Keras prepareing Upsampling2D().

mnist_ae1.py - very simple model of autoencoder
mnist_ae2.py - convolutional autoencoder


## mnist_ae1.py
#
#   mnist_ae1.py   date. 7/4/2016
#
#   Autoencoder tutorial code
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import tensorflow as tf

# Import data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

# Variables
x = tf.placeholder("float", [None, 784])
y_ = tf.placeholder("float", [None, 10])

w_enc = tf.Variable(tf.random_normal([784, 625], mean=0.0, stddev=0.05))
w_dec = tf.Variable(tf.random_normal([625, 784], mean=0.0, stddev=0.05))
# w_dec = tf.transpose(w_enc) # if you use tied weights
b_enc = tf.Variable(tf.zeros([625]))
b_dec = tf.Variable(tf.zeros([784]))

# Create the model
def model(X, w_e, b_e, w_d, b_d):
    encoded = tf.sigmoid(tf.matmul(X, w_e) + b_e)
    decoded = tf.sigmoid(tf.matmul(encoded, w_d) + b_d)

    return encoded, decoded

encoded, decoded = model(x, w_enc, b_enc, w_dec, b_dec)

# Cost Function basic term
cross_entropy = -1. * x * tf.log(decoded) - (1. - x) * tf.log(1. - decoded)
loss = tf.reduce_mean(cross_entropy)
train_step = tf.train.AdagradOptimizer(0.1).minimize(loss)

# Train
init = tf.initialize_all_variables()

with tf.Session() as sess:
    sess.run(init)
    print('Training...')
    for i in range(10001):
        batch_xs, batch_ys = mnist.train.next_batch(128)
        train_step.run({x: batch_xs, y_: batch_ys})

        if i % 1000 == 0:
            train_loss = loss.eval({x: batch_xs, y_: batch_ys})
            print('  step, loss = %6d: %6.3f' % (i, train_loss))

    # generate decoded image with test data
    test_fd = {x: mnist.test.images, y_: mnist.test.labels}
    decoded_imgs = decoded.eval(test_fd)
    print('loss (test) = ', loss.eval(test_fd))

x_test = mnist.test.images

n = 10  # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

plt.savefig('mnist_ae1.png')


## mnist_ae2.py
#
#   mnist_ae2.py   date. 7/4/2016
#
#   Autoencoder tutorial code - trial of convolutional AE
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data
from my_nn_lib import Convolution2D, MaxPooling2D
from my_nn_lib import FullConnected, ReadOutLayer

# Up-sampling 2-D Layer (deconvolutoinal Layer)
class Conv2Dtranspose(object):
    '''
      constructor's args:
          input      : input image (2D matrix)
          output_siz : output image size
          in_ch      : number of incoming image channel
          out_ch     : number of outgoing image channel
          patch_siz  : filter(patch) size
    '''
    def __init__(self, input, output_siz, in_ch, out_ch, patch_siz, activation='relu'):
        self.input = input
        self.rows = output_siz[0]
        self.cols = output_siz[1]
        self.out_ch = out_ch
        self.activation = activation

        wshape = [patch_siz[0], patch_siz[1], out_ch, in_ch]    # note the arguments order

        w_cvt = tf.Variable(tf.truncated_normal(wshape, stddev=0.1),
                            trainable=True)
        b_cvt = tf.Variable(tf.constant(0.1, shape=[out_ch]),
                            trainable=True)
        self.batsiz = tf.shape(input)[0]
        self.w = w_cvt
        self.b = b_cvt
        self.params = [self.w, self.b]

    def output(self):
        shape4D = [self.batsiz, self.rows, self.cols, self.out_ch]
        linout = tf.nn.conv2d_transpose(self.input, self.w, output_shape=shape4D,
                            strides=[1, 2, 2, 1], padding='SAME') + self.b
        if self.activation == 'relu':
            self.output = tf.nn.relu(linout)
        elif self.activation == 'sigmoid':
            self.output = tf.sigmoid(linout)
        else:
            self.output = linout

        return self.output

# Create the model
def model(X, w_e, b_e, w_d, b_d):
    encoded = tf.sigmoid(tf.matmul(X, w_e) + b_e)
    decoded = tf.sigmoid(tf.matmul(encoded, w_d) + b_d)

    return encoded, decoded

def mk_nn_model(x, y_):
    # Encoding phase
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    conv1 = Convolution2D(x_image, (28, 28), 1, 16,
                          (3, 3), activation='relu')
    conv1_out = conv1.output()

    pool1 = MaxPooling2D(conv1_out)
    pool1_out = pool1.output()

    conv2 = Convolution2D(pool1_out, (14, 14), 16, 8,
                          (3, 3), activation='relu')
    conv2_out = conv2.output()

    pool2 = MaxPooling2D(conv2_out)
    pool2_out = pool2.output()

    conv3 = Convolution2D(pool2_out, (7, 7), 8, 8, (3, 3), activation='relu')
    conv3_out = conv3.output()

    pool3 = MaxPooling2D(conv3_out)
    pool3_out = pool3.output()
    # at this point the representation is (8, 4, 4) i.e. 128-dimensional
    # Decoding phase
    conv_t1 = Conv2Dtranspose(pool3_out, (7, 7), 8, 8,
                         (3, 3), activation='relu')
    conv_t1_out = conv_t1.output()

    conv_t2 = Conv2Dtranspose(conv_t1_out, (14, 14), 8, 8,
                         (3, 3), activation='relu')
    conv_t2_out = conv_t2.output()

    conv_t3 = Conv2Dtranspose(conv_t2_out, (28, 28), 8, 16,
                         (3, 3), activation='relu')
    conv_t3_out = conv_t3.output()

    conv_last = Convolution2D(conv_t3_out, (28, 28), 16, 1, (3, 3),
                         activation='sigmoid')
    decoded = conv_last.output()

    decoded = tf.reshape(decoded, [-1, 784])
    cross_entropy = -1. *x *tf.log(decoded) - (1. - x) *tf.log(1. - decoded)
    loss = tf.reduce_mean(cross_entropy)

    return loss, decoded


if __name__ == '__main__':
    mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)
    # Variables
    x = tf.placeholder(tf.float32, [None, 784])
    y_ = tf.placeholder(tf.float32, [None, 10])

    loss, decoded = mk_nn_model(x, y_)
    train_step = tf.train.AdagradOptimizer(0.1).minimize(loss)

    init = tf.initialize_all_variables()
    # Train
    with tf.Session() as sess:
        sess.run(init)
        print('Training...')
        for i in range(10001):
            batch_xs, batch_ys = mnist.train.next_batch(128)
            train_step.run({x: batch_xs, y_: batch_ys})
            if i % 1000 == 0:
                train_loss= loss.eval({x: batch_xs, y_: batch_ys})
                print('  step, loss = %6d: %6.3f' % (i, train_loss))

        # generate decoded image with test data
        test_fd = {x: mnist.test.images, y_: mnist.test.labels}
        decoded_imgs = decoded.eval(test_fd)
        print('loss (test) = ', loss.eval(test_fd))

    x_test = mnist.test.images
    n = 10  # how many digits we will display
    plt.figure(figsize=(20, 4))
    for i in range(n):
        # display original
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(x_test[i].reshape(28, 28))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(decoded_imgs[i].reshape(28, 28))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    #plt.show()
    plt.savefig('mnist_ae2.png')


## my_nn_lib.py
#
#   my_nn_lib.py
#       date. 5/19/2016
#

from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

import os
import sys
import numpy as np
# import cv2
import tensorflow as tf


# Convolution 2-D Layer
class Convolution2D(object):
    '''
      constructor's args:
          input     : input image (2D matrix)
          input_siz ; input image size
          in_ch     : number of incoming image channel
          out_ch    : number of outgoing image channel
          patch_siz : filter(patch) size
          weights   : (if input) (weights, bias)
    '''
    def __init__(self, input, input_siz, in_ch, out_ch, patch_siz, activation='relu'):
        self.input = input
        self.rows = input_siz[0]
        self.cols = input_siz[1]
        self.in_ch = in_ch
        self.activation = activation

        wshape = [patch_siz[0], patch_siz[1], in_ch, out_ch]

        w_cv = tf.Variable(tf.truncated_normal(wshape, stddev=0.1),
                            trainable=True)
        b_cv = tf.Variable(tf.constant(0.1, shape=[out_ch]),
                            trainable=True)

        self.w = w_cv
        self.b = b_cv
        self.params = [self.w, self.b]

    def output(self):
        shape4D = [-1, self.rows, self.cols, self.in_ch]

        x_image = tf.reshape(self.input, shape4D)  # reshape to 4D tensor
        linout = tf.nn.conv2d(x_image, self.w,
                  strides=[1, 1, 1, 1], padding='SAME') + self.b
        if self.activation == 'relu':
            self.output = tf.nn.relu(linout)
        elif self.activation == 'sigmoid':
            self.output = tf.sigmoid(linout)
        else:
            self.output = linout

        return self.output

# Max Pooling Layer
class MaxPooling2D(object):
    '''
      constructor's args:
          input  : input image (2D matrix)
          ksize  : pooling patch size
    '''
    def __init__(self, input, ksize=None):
        self.input = input
        if ksize == None:
            ksize = [1, 2, 2, 1]
            self.ksize = ksize

    def output(self):
        self.output = tf.nn.max_pool(self.input, ksize=self.ksize,
                    strides=[1, 2, 2, 1], padding='SAME')

        return self.output

# Full-connected Layer
class FullConnected(object):
    def __init__(self, input, n_in, n_out):
        self.input = input

        w_h = tf.Variable(tf.truncated_normal([n_in,n_out],
                          mean=0.0, stddev=0.05), trainable=True)
        b_h = tf.Variable(tf.zeros([n_out]), trainable=True)

        self.w = w_h
        self.b = b_h
        self.params = [self.w, self.b]

    def output(self):
        linarg = tf.matmul(self.input, self.w) + self.b
        self.output = tf.nn.relu(linarg)

        return self.output

# Read-out Layer
class ReadOutLayer(object):
    def __init__(self, input, n_in, n_out):
        self.input = input

        w_o = tf.Variable(tf.random_normal([n_in,n_out],
                        mean=0.0, stddev=0.05), trainable=True)
        b_o = tf.Variable(tf.zeros([n_out]), trainable=True)

        self.w = w_o
        self.b = b_o
        self.params = [self.w, self.b]

    def output(self):
        linarg = tf.matmul(self.input, self.w) + self.b
        self.output = tf.nn.softmax(linarg)

        return self.output
#
	#
	# mnist_ae1.py date. 7/4/2016
	#
	# Autoencoder tutorial code
	#
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import numpy as np
	import matplotlib as mpl
	mpl.use('Agg')
	import matplotlib.pyplot as plt
	import tensorflow as tf

	# Import data
	from tensorflow.examples.tutorials.mnist import input_data
	mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

	# Variables
	x = tf.placeholder("float", [None, 784])
	y_ = tf.placeholder("float", [None, 10])

	w_enc = tf.Variable(tf.random_normal([784, 625], mean=0.0, stddev=0.05))
	w_dec = tf.Variable(tf.random_normal([625, 784], mean=0.0, stddev=0.05))
	# w_dec = tf.transpose(w_enc) # if you use tied weights
	b_enc = tf.Variable(tf.zeros([625]))
	b_dec = tf.Variable(tf.zeros([784]))

	# Create the model
	def model(X, w_e, b_e, w_d, b_d):
	encoded = tf.sigmoid(tf.matmul(X, w_e) + b_e)
	decoded = tf.sigmoid(tf.matmul(encoded, w_d) + b_d)

	return encoded, decoded

	encoded, decoded = model(x, w_enc, b_enc, w_dec, b_dec)

	# Cost Function basic term
	cross_entropy = -1. * x * tf.log(decoded) - (1. - x) * tf.log(1. - decoded)
	loss = tf.reduce_mean(cross_entropy)
	train_step = tf.train.AdagradOptimizer(0.1).minimize(loss)

	# Train
	init = tf.initialize_all_variables()

	with tf.Session() as sess:
	sess.run(init)
	print('Training...')
	for i in range(10001):
	batch_xs, batch_ys = mnist.train.next_batch(128)
	train_step.run({x: batch_xs, y_: batch_ys})

	if i % 1000 == 0:
	train_loss = loss.eval({x: batch_xs, y_: batch_ys})
	print(' step, loss = %6d: %6.3f' % (i, train_loss))

	# generate decoded image with test data
	test_fd = {x: mnist.test.images, y_: mnist.test.labels}
	decoded_imgs = decoded.eval(test_fd)
	print('loss (test) = ', loss.eval(test_fd))

	x_test = mnist.test.images

	n = 10 # how many digits we will display
	plt.figure(figsize=(20, 4))
	for i in range(n):
	# display original
	ax = plt.subplot(2, n, i + 1)
	plt.imshow(x_test[i].reshape(28, 28))
	plt.gray()
	ax.get_xaxis().set_visible(False)
	ax.get_yaxis().set_visible(False)

	# display reconstruction
	ax = plt.subplot(2, n, i + 1 + n)
	plt.imshow(decoded_imgs[i].reshape(28, 28))
	plt.gray()
	ax.get_xaxis().set_visible(False)
	ax.get_yaxis().set_visible(False)

	plt.savefig('mnist_ae1.png')
	#
	# mnist_ae2.py date. 7/4/2016
	#
	# Autoencoder tutorial code - trial of convolutional AE
	#
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import numpy as np
	import matplotlib as mpl
	mpl.use('Agg')
	import matplotlib.pyplot as plt
	import tensorflow as tf

	from tensorflow.examples.tutorials.mnist import input_data
	from my_nn_lib import Convolution2D, MaxPooling2D
	from my_nn_lib import FullConnected, ReadOutLayer

	# Up-sampling 2-D Layer (deconvolutoinal Layer)
	class Conv2Dtranspose(object):
	'''
	constructor's args:
	input : input image (2D matrix)
	output_siz : output image size
	in_ch : number of incoming image channel
	out_ch : number of outgoing image channel
	patch_siz : filter(patch) size
	'''
	def __init__(self, input, output_siz, in_ch, out_ch, patch_siz, activation='relu'):
	self.input = input
	self.rows = output_siz[0]
	self.cols = output_siz[1]
	self.out_ch = out_ch
	self.activation = activation

	wshape = [patch_siz[0], patch_siz[1], out_ch, in_ch] # note the arguments order

	w_cvt = tf.Variable(tf.truncated_normal(wshape, stddev=0.1),
	trainable=True)
	b_cvt = tf.Variable(tf.constant(0.1, shape=[out_ch]),
	trainable=True)
	self.batsiz = tf.shape(input)[0]
	self.w = w_cvt
	self.b = b_cvt
	self.params = [self.w, self.b]

	def output(self):
	shape4D = [self.batsiz, self.rows, self.cols, self.out_ch]
	linout = tf.nn.conv2d_transpose(self.input, self.w, output_shape=shape4D,
	strides=[1, 2, 2, 1], padding='SAME') + self.b
	if self.activation == 'relu':
	self.output = tf.nn.relu(linout)
	elif self.activation == 'sigmoid':
	self.output = tf.sigmoid(linout)
	else:
	self.output = linout

	return self.output

	# Create the model
	def model(X, w_e, b_e, w_d, b_d):
	encoded = tf.sigmoid(tf.matmul(X, w_e) + b_e)
	decoded = tf.sigmoid(tf.matmul(encoded, w_d) + b_d)

	return encoded, decoded

	def mk_nn_model(x, y_):
	# Encoding phase
	x_image = tf.reshape(x, [-1, 28, 28, 1])
	conv1 = Convolution2D(x_image, (28, 28), 1, 16,
	(3, 3), activation='relu')
	conv1_out = conv1.output()

	pool1 = MaxPooling2D(conv1_out)
	pool1_out = pool1.output()

	conv2 = Convolution2D(pool1_out, (14, 14), 16, 8,
	(3, 3), activation='relu')
	conv2_out = conv2.output()

	pool2 = MaxPooling2D(conv2_out)
	pool2_out = pool2.output()

	conv3 = Convolution2D(pool2_out, (7, 7), 8, 8, (3, 3), activation='relu')
	conv3_out = conv3.output()

	pool3 = MaxPooling2D(conv3_out)
	pool3_out = pool3.output()
	# at this point the representation is (8, 4, 4) i.e. 128-dimensional
	# Decoding phase
	conv_t1 = Conv2Dtranspose(pool3_out, (7, 7), 8, 8,
	(3, 3), activation='relu')
	conv_t1_out = conv_t1.output()

	conv_t2 = Conv2Dtranspose(conv_t1_out, (14, 14), 8, 8,
	(3, 3), activation='relu')
	conv_t2_out = conv_t2.output()

	conv_t3 = Conv2Dtranspose(conv_t2_out, (28, 28), 8, 16,
	(3, 3), activation='relu')
	conv_t3_out = conv_t3.output()

	conv_last = Convolution2D(conv_t3_out, (28, 28), 16, 1, (3, 3),
	activation='sigmoid')
	decoded = conv_last.output()

	decoded = tf.reshape(decoded, [-1, 784])
	cross_entropy = -1. x tf.log(decoded) - (1. - x) *tf.log(1. - decoded)
	loss = tf.reduce_mean(cross_entropy)

	return loss, decoded


	if __name__ == '__main__':
	mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)
	# Variables
	x = tf.placeholder(tf.float32, [None, 784])
	y_ = tf.placeholder(tf.float32, [None, 10])

	loss, decoded = mk_nn_model(x, y_)
	train_step = tf.train.AdagradOptimizer(0.1).minimize(loss)

	init = tf.initialize_all_variables()
	# Train
	with tf.Session() as sess:
	sess.run(init)
	print('Training...')
	for i in range(10001):
	batch_xs, batch_ys = mnist.train.next_batch(128)
	train_step.run({x: batch_xs, y_: batch_ys})
	if i % 1000 == 0:
	train_loss= loss.eval({x: batch_xs, y_: batch_ys})
	print(' step, loss = %6d: %6.3f' % (i, train_loss))

	# generate decoded image with test data
	test_fd = {x: mnist.test.images, y_: mnist.test.labels}
	decoded_imgs = decoded.eval(test_fd)
	print('loss (test) = ', loss.eval(test_fd))

	x_test = mnist.test.images
	n = 10 # how many digits we will display
	plt.figure(figsize=(20, 4))
	for i in range(n):
	# display original
	ax = plt.subplot(2, n, i + 1)
	plt.imshow(x_test[i].reshape(28, 28))
	plt.gray()
	ax.get_xaxis().set_visible(False)
	ax.get_yaxis().set_visible(False)

	# display reconstruction
	ax = plt.subplot(2, n, i + 1 + n)
	plt.imshow(decoded_imgs[i].reshape(28, 28))
	plt.gray()
	ax.get_xaxis().set_visible(False)
	ax.get_yaxis().set_visible(False)

	#plt.show()
	plt.savefig('mnist_ae2.png')
	#
	# my_nn_lib.py
	# date. 5/19/2016
	#

	from __future__ import print_function
	from __future__ import division
	from __future__ import absolute_import

	import os
	import sys
	import numpy as np
	# import cv2
	import tensorflow as tf


	# Convolution 2-D Layer
	class Convolution2D(object):
	'''
	constructor's args:
	input : input image (2D matrix)
	input_siz ; input image size
	in_ch : number of incoming image channel
	out_ch : number of outgoing image channel
	patch_siz : filter(patch) size
	weights : (if input) (weights, bias)
	'''
	def __init__(self, input, input_siz, in_ch, out_ch, patch_siz, activation='relu'):
	self.input = input
	self.rows = input_siz[0]
	self.cols = input_siz[1]
	self.in_ch = in_ch
	self.activation = activation

	wshape = [patch_siz[0], patch_siz[1], in_ch, out_ch]

	w_cv = tf.Variable(tf.truncated_normal(wshape, stddev=0.1),
	trainable=True)
	b_cv = tf.Variable(tf.constant(0.1, shape=[out_ch]),
	trainable=True)

	self.w = w_cv
	self.b = b_cv
	self.params = [self.w, self.b]

	def output(self):
	shape4D = [-1, self.rows, self.cols, self.in_ch]

	x_image = tf.reshape(self.input, shape4D) # reshape to 4D tensor
	linout = tf.nn.conv2d(x_image, self.w,
	strides=[1, 1, 1, 1], padding='SAME') + self.b
	if self.activation == 'relu':
	self.output = tf.nn.relu(linout)
	elif self.activation == 'sigmoid':
	self.output = tf.sigmoid(linout)
	else:
	self.output = linout

	return self.output

	# Max Pooling Layer
	class MaxPooling2D(object):
	'''
	constructor's args:
	input : input image (2D matrix)
	ksize : pooling patch size
	'''
	def __init__(self, input, ksize=None):
	self.input = input
	if ksize == None:
	ksize = [1, 2, 2, 1]
	self.ksize = ksize

	def output(self):
	self.output = tf.nn.max_pool(self.input, ksize=self.ksize,
	strides=[1, 2, 2, 1], padding='SAME')

	return self.output

	# Full-connected Layer
	class FullConnected(object):
	def __init__(self, input, n_in, n_out):
	self.input = input

	w_h = tf.Variable(tf.truncated_normal([n_in,n_out],
	mean=0.0, stddev=0.05), trainable=True)
	b_h = tf.Variable(tf.zeros([n_out]), trainable=True)

	self.w = w_h
	self.b = b_h
	self.params = [self.w, self.b]

	def output(self):
	linarg = tf.matmul(self.input, self.w) + self.b
	self.output = tf.nn.relu(linarg)

	return self.output

	# Read-out Layer
	class ReadOutLayer(object):
	def __init__(self, input, n_in, n_out):
	self.input = input

	w_o = tf.Variable(tf.random_normal([n_in,n_out],
	mean=0.0, stddev=0.05), trainable=True)
	b_o = tf.Variable(tf.zeros([n_out]), trainable=True)

	self.w = w_o
	self.b = b_o
	self.params = [self.w, self.b]

	def output(self):
	linarg = tf.matmul(self.input, self.w) + self.b
	self.output = tf.nn.softmax(linarg)

	return self.output
	#