tomokishii/fashion_mnist_theano.py

## fashion_mnist_theano.py
#
#   fashion_mnist_theano.py
#       date. 10/2/2017
#
#   REM: I read the article for stopping development of "THEANO".
#        The deep learning framework stimulated me and made me write codes.
#        I'd like to say thank you to Theano supporting team.
#

import os
import numpy as np
import theano
import theano.tensor as T

# using tensorflow mnist loader to load Fashion mnist
# ref. https://github.com/zalandoresearch/fashion-mnist
#
from tensorflow.examples.tutorials.mnist import input_data


class HiddenLayer(object):
    """
      Fully connected hidden layer
    """
    def __init__(self, input, n_in, n_out, rng, W=None, b=None,
                 activation=T.nnet.relu):
        self.input = input

        if W is None:
            W_values = np.asarray(
                rng.uniform(
                    low=-np.sqrt(6. / (n_in + n_out)),
                    high=np.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
            if activation == T.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name='W', borrow=True)

        if b is None:
            b_values = np.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name='b', borrow=True)

        self.W = W
        self.b = b

        lin_output = T.dot(input, self.W) + self.b
        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )
        # parameters of the model
        self.params = [self.W, self.b]


class SoftmaxRegression(object):
    """
      Multi-class Logistic (Softmax) Regression layer
    """
    def __init__(self, input, n_in, n_out):

        self.W = theano.shared(
            value=np.zeros((n_in, n_out),
                           dtype=theano.config.floatX),
            name='W',
            borrow=True
        )
        # initialize the biases b as a vector of n_out 0s
        self.b = theano.shared(
            value=np.zeros((n_out,),
                           dtype=theano.config.floatX),
            name='b',
            borrow=True
        )

        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        self.params = [self.W, self.b]
        self.input = input


def mlp_model(input, n_in, n_out, rng, n_hidden=[512, 256]):
    """
      make multi-layer perceptron model
    """

    hidden1 = HiddenLayer(input, n_in, n_hidden[0], rng)
    hidden2 = HiddenLayer(hidden1.output,
                          n_hidden[0], n_hidden[1], rng)
    readout = SoftmaxRegression(hidden2.output, n_hidden[1], n_out)

    l2_loss  = ((hidden1.W ** 2).sum()
                +(hidden2.W **2).sum()
                + (readout.W ** 2).sum())

    params = hidden1.params + hidden2.params + readout.params

    return readout.p_y_given_x, readout.y_pred, l2_loss, params


def loss(p_y_given_x, y_pred, y_label):
    # negative log likelihood
    nll = -T.mean(T.log(p_y_given_x)[T.arange(y_label.shape[0]), y_label])

    # errors
    if y_label.ndim != y_pred.ndim:
        raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y_label.type, 'y_pred', self.y_pred.type))

    if y_label.dtype.startswith('int'):
        error_rate =  T.mean(T.neq(y_pred, y_label))
    else:
        raise NotImplementedError()

    return nll, error_rate


def theano_ready_dataset(dirn):
    """
      prepaare theano-ready mnist dataset
    """
    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(np.asarray(data_x,
                                            dtype=theano.config.floatX),
                                            borrow=borrow)
        shared_y = theano.shared(np.asarray(data_y,
                                            dtype=theano.config.floatX),
                                            borrow=borrow)

        return shared_x, T.cast(shared_y, 'int32')

    # Load Dataset
    fmnist = input_data.read_data_sets(dirn, one_hot=False)
    test_set_x, test_set_y = shared_dataset((fmnist.train.images,
                                             fmnist.train.labels))
    valid_set_x, valid_set_y = shared_dataset((fmnist.validation.images,
                                              fmnist.validation.labels))
    train_set_x, train_set_y = shared_dataset((fmnist.test.images,
                                               fmnist.test.labels))
    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]

    return rval


def test_mnist(dirn, batch_size=50,
               learning_rate=0.01,
               L2_reg=0.0001, n_epochs=10):
    """
      Main body of MNIST classification
    """
    # Load Datasets
    fileA_path = os.path.join(dirn, 'train-images-idx3-ubyte.gz')
    if os.path.exists(fileA_path):
        datasets = theano_ready_dataset(dirn)
    else:
        raise OSError('Check path to data files.')
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    # Define Graph
    index = T.lscalar()
    x = T.matrix('x')   # the data is presented as rasterized images [-1, 784]
    y = T.ivector('y')   # the labels are presented as aD vector

    rng = np.random.RandomState(1234)        # Random state

    # MLP model
    p_y_given_x, y_pred, l2_loss, params = mlp_model(
        input=x,
        n_in=28 * 28,
        n_out=10,
        rng=rng
    )

    # Loss
    loss_nll, errors = loss(p_y_given_x, y_pred, y)
    cost = loss_nll + L2_reg * l2_loss      # L2 regularization term

    # Theano Functions
    gparams = [T.grad(cost, param) for param in params]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(params, gparams)]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=[cost, errors],
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # Train
    print('Training...')
    epoch = 0
    while (epoch < n_epochs):
        epoch += 1
        # train step
        for mini_batch_index in range(n_train_batches):
            cost_j = train_model(mini_batch_index)

            if mini_batch_index % 40 == 0:
                print('epoch[{:>5d}] : cost ={:>10.4f}'.format(
                        epoch, float(cost_j)))
        # validation step
        cost_list = []
        err_list = []
        for mini_batch_index in range(n_valid_batches):
            cost_j, err_j = validate_model(mini_batch_index)
            cost_list.append(float(cost_j))
            err_list.append(float(err_j))
        cost_val_mean = np.mean(cost_list)
        accu_val_mean = 1.0 - np.mean(err_list)
        print('validation: cost ={:>10.4f}, accuracy={:>10.4f}'.format(
                        cost_val_mean, accu_val_mean))

    return None


if __name__ == '__main__':
    test_mnist(dirn='../FMNISTdata', n_epochs=20)
	#
	# fashion_mnist_theano.py
	# date. 10/2/2017
	#
	# REM: I read the article for stopping development of "THEANO".
	# The deep learning framework stimulated me and made me write codes.
	# I'd like to say thank you to Theano supporting team.
	#

	import os
	import numpy as np
	import theano
	import theano.tensor as T

	# using tensorflow mnist loader to load Fashion mnist
	# ref. https://github.com/zalandoresearch/fashion-mnist
	#
	from tensorflow.examples.tutorials.mnist import input_data


	class HiddenLayer(object):
	"""
	Fully connected hidden layer
	"""
	def __init__(self, input, n_in, n_out, rng, W=None, b=None,
	activation=T.nnet.relu):
	self.input = input

	if W is None:
	W_values = np.asarray(
	rng.uniform(
	low=-np.sqrt(6. / (n_in + n_out)),
	high=np.sqrt(6. / (n_in + n_out)),
	size=(n_in, n_out)
	),
	dtype=theano.config.floatX
	)
	if activation == T.nnet.sigmoid:
	W_values *= 4

	W = theano.shared(value=W_values, name='W', borrow=True)

	if b is None:
	b_values = np.zeros((n_out,), dtype=theano.config.floatX)
	b = theano.shared(value=b_values, name='b', borrow=True)

	self.W = W
	self.b = b

	lin_output = T.dot(input, self.W) + self.b
	self.output = (
	lin_output if activation is None
	else activation(lin_output)
	)
	# parameters of the model
	self.params = [self.W, self.b]


	class SoftmaxRegression(object):
	"""
	Multi-class Logistic (Softmax) Regression layer
	"""
	def __init__(self, input, n_in, n_out):

	self.W = theano.shared(
	value=np.zeros((n_in, n_out),
	dtype=theano.config.floatX),
	name='W',
	borrow=True
	)
	# initialize the biases b as a vector of n_out 0s
	self.b = theano.shared(
	value=np.zeros((n_out,),
	dtype=theano.config.floatX),
	name='b',
	borrow=True
	)

	self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
	self.y_pred = T.argmax(self.p_y_given_x, axis=1)
	self.params = [self.W, self.b]
	self.input = input


	def mlp_model(input, n_in, n_out, rng, n_hidden=[512, 256]):
	"""
	make multi-layer perceptron model
	"""

	hidden1 = HiddenLayer(input, n_in, n_hidden[0], rng)
	hidden2 = HiddenLayer(hidden1.output,
	n_hidden[0], n_hidden[1], rng)
	readout = SoftmaxRegression(hidden2.output, n_hidden[1], n_out)

	l2_loss = ((hidden1.W ** 2).sum()
	+(hidden2.W **2).sum()
	+ (readout.W ** 2).sum())

	params = hidden1.params + hidden2.params + readout.params

	return readout.p_y_given_x, readout.y_pred, l2_loss, params


	def loss(p_y_given_x, y_pred, y_label):
	# negative log likelihood
	nll = -T.mean(T.log(p_y_given_x)[T.arange(y_label.shape[0]), y_label])

	# errors
	if y_label.ndim != y_pred.ndim:
	raise TypeError(
	'y should have the same shape as self.y_pred',
	('y', y_label.type, 'y_pred', self.y_pred.type))

	if y_label.dtype.startswith('int'):
	error_rate = T.mean(T.neq(y_pred, y_label))
	else:
	raise NotImplementedError()

	return nll, error_rate


	def theano_ready_dataset(dirn):
	"""
	prepaare theano-ready mnist dataset
	"""
	def shared_dataset(data_xy, borrow=True):
	""" Function that loads the dataset into shared variables
	"""
	data_x, data_y = data_xy
	shared_x = theano.shared(np.asarray(data_x,
	dtype=theano.config.floatX),
	borrow=borrow)
	shared_y = theano.shared(np.asarray(data_y,
	dtype=theano.config.floatX),
	borrow=borrow)

	return shared_x, T.cast(shared_y, 'int32')

	# Load Dataset
	fmnist = input_data.read_data_sets(dirn, one_hot=False)
	test_set_x, test_set_y = shared_dataset((fmnist.train.images,
	fmnist.train.labels))
	valid_set_x, valid_set_y = shared_dataset((fmnist.validation.images,
	fmnist.validation.labels))
	train_set_x, train_set_y = shared_dataset((fmnist.test.images,
	fmnist.test.labels))
	rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
	(test_set_x, test_set_y)]

	return rval


	def test_mnist(dirn, batch_size=50,
	learning_rate=0.01,
	L2_reg=0.0001, n_epochs=10):
	"""
	Main body of MNIST classification
	"""
	# Load Datasets
	fileA_path = os.path.join(dirn, 'train-images-idx3-ubyte.gz')
	if os.path.exists(fileA_path):
	datasets = theano_ready_dataset(dirn)
	else:
	raise OSError('Check path to data files.')
	train_set_x, train_set_y = datasets[0]
	valid_set_x, valid_set_y = datasets[1]
	test_set_x, test_set_y = datasets[2]

	# compute number of minibatches for training, validation and testing
	n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
	n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
	n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

	# Define Graph
	index = T.lscalar()
	x = T.matrix('x') # the data is presented as rasterized images [-1, 784]
	y = T.ivector('y') # the labels are presented as aD vector

	rng = np.random.RandomState(1234) # Random state

	# MLP model
	p_y_given_x, y_pred, l2_loss, params = mlp_model(
	input=x,
	n_in=28 * 28,
	n_out=10,
	rng=rng
	)

	# Loss
	loss_nll, errors = loss(p_y_given_x, y_pred, y)
	cost = loss_nll + L2_reg * l2_loss # L2 regularization term

	# Theano Functions
	gparams = [T.grad(cost, param) for param in params]
	updates = [(param, param - learning_rate * gparam)
	for param, gparam in zip(params, gparams)]

	train_model = theano.function(
	inputs=[index],
	outputs=cost,
	updates=updates,
	givens={
	x: train_set_x[index * batch_size: (index + 1) * batch_size],
	y: train_set_y[index * batch_size: (index + 1) * batch_size]
	}
	)

	validate_model = theano.function(
	inputs=[index],
	outputs=[cost, errors],
	givens={
	x: valid_set_x[index * batch_size:(index + 1) * batch_size],
	y: valid_set_y[index * batch_size:(index + 1) * batch_size]
	}
	)

	# Train
	print('Training...')
	epoch = 0
	while (epoch < n_epochs):
	epoch += 1
	# train step
	for mini_batch_index in range(n_train_batches):
	cost_j = train_model(mini_batch_index)

	if mini_batch_index % 40 == 0:
	print('epoch[{:>5d}] : cost ={:>10.4f}'.format(
	epoch, float(cost_j)))
	# validation step
	cost_list = []
	err_list = []
	for mini_batch_index in range(n_valid_batches):
	cost_j, err_j = validate_model(mini_batch_index)
	cost_list.append(float(cost_j))
	err_list.append(float(err_j))
	cost_val_mean = np.mean(cost_list)
	accu_val_mean = 1.0 - np.mean(err_list)
	print('validation: cost ={:>10.4f}, accuracy={:>10.4f}'.format(
	cost_val_mean, accu_val_mean))

	return None


	if __name__ == '__main__':
	test_mnist(dirn='../FMNISTdata', n_epochs=20)