ykshr/cucumber9.py

## cucumber9.py
#BSD 3-Clause License
#=======
#
#Copyright (c) 2017, Xilinx Inc.
#All rights reserved.
#
#Based Matthieu Courbariaux's CIFAR-10 example code
#Copyright (c) 2015-2016, Matthieu Courbariaux
#All rights reserved.
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#
#    * Redistributions of source code must retain the above copyright
#      notice, this list of conditions and the following disclaimer.
#    * Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#    * Neither the name of the copyright holder nor the names of its
#      contributors may be used to endorse or promote products derived from
#      this software without specific prior written permission.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
#EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
#DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import print_function

import sys
import os
import time

import numpy as np
np.random.seed(1234) # for reproducibility?

import lasagne

# specifying the gpu to use
# import theano.sandbox.cuda
# theano.sandbox.cuda.use('gpu1')
import theano
import theano.tensor as T

import cPickle as pickle
import gzip

import binary_net
import cnv

from pylearn2.datasets.zca_dataset import ZCA_Dataset
from pylearn2.datasets.cifar10 import CIFAR10
from pylearn2.utils import serial

from collections import OrderedDict

from pylearn2.datasets import DenseDesignMatrix

class CUCUMBER9(DenseDesignMatrix):
    def __init__(self, which_set):

        self.path = '/home/ubuntu/CUCUMBER-9/prototype_1'

        self.img_shape = (3, 32, 32)
        self.img_size = np.prod(self.img_shape)

        if which_set in {'train'}:
            X, y = self.load_data()
        elif which_set in {'test', 'valid'}:
            X, y = self.load_data_test()

        X = X.astype('float32')

        super(CUCUMBER9, self).__init__(X=X, y=y)

    def unpickle(self, file_name):
        with open(file_name, 'rb') as f:
            if sys.version_info.major == 2:
                return pickle.load(f)
            elif sys.version_info.major == 3:
                return pickle.load(f, encoding='latin-1')

    def load_data(self):

        nb_train_samples = 2475

        X = np.zeros((nb_train_samples, self.img_size), dtype='uint8')
        y = np.zeros((nb_train_samples,1), dtype='uint8')
        for i in range(1, 6):
            fpath = os.path.join(self.path, 'data_batch_' + str(i))
            batch_dict = self.unpickle(fpath)
            data = batch_dict['data']
            labels = batch_dict['labels']

            X[(i-1)*495:i*495, :] = data.reshape(495, self.img_size)
            y[(i-1)*495:i*495, 0] = labels

        return X, y

    def load_data_test(self):

        fpath = os.path.join(self.path, 'test_batch')
        batch_dict = self.unpickle(fpath)
        data = batch_dict['data']
        labels = batch_dict['labels']

        X = np.zeros((495, self.img_size), dtype='uint8')
        y = np.zeros((495,1), dtype='uint8')

        X = data.reshape(495, self.img_size)
        y[:, 0] = labels

        return X, y


if __name__ == "__main__":

    learning_parameters = OrderedDict()
    # BN parameters
    batch_size = 50
    print("batch_size = "+str(batch_size))
    # alpha is the exponential moving average factor
    learning_parameters.alpha = .1
    print("alpha = "+str(learning_parameters.alpha))
    learning_parameters.epsilon = 1e-4
    print("epsilon = "+str(learning_parameters.epsilon))

    # W_LR_scale = 1.
    learning_parameters.W_LR_scale = "Glorot" # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = "+str(learning_parameters.W_LR_scale))

    # Training parameters
    num_epochs = 500
    print("num_epochs = "+str(num_epochs))

    # Decaying LR
    LR_start = 0.001
    print("LR_start = "+str(LR_start))
    LR_fin = 0.0000003
    print("LR_fin = "+str(LR_fin))
    LR_decay = (LR_fin/LR_start)**(1./num_epochs)
    print("LR_decay = "+str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    save_path = "cucumber9_parameters.npz"
    print("save_path = "+str(save_path))

    train_set_size = 2475
    print("train_set_size = "+str(train_set_size))
    shuffle_parts = 1
    print("shuffle_parts = "+str(shuffle_parts))

    print('Loading CUCUMBER9 dataset...')

    train_set = CUCUMBER9(which_set="train")
    valid_set = CUCUMBER9(which_set="valid")
    test_set = CUCUMBER9(which_set="test")


    # bc01 format
    # Inputs in the range [-1,+1]
    # print("Inputs in the range [-1,+1]")
    train_set.X = np.reshape(np.subtract(np.multiply(2./255.,train_set.X),1.),(-1,3,32,32))
    valid_set.X = np.reshape(np.subtract(np.multiply(2./255.,valid_set.X),1.),(-1,3,32,32))
    test_set.X = np.reshape(np.subtract(np.multiply(2./255.,test_set.X),1.),(-1,3,32,32))

    # flatten targets
    train_set.y = np.hstack(train_set.y)
    valid_set.y = np.hstack(valid_set.y)
    test_set.y = np.hstack(test_set.y)

    # Onehot the targets
    train_set.y = np.float32(np.eye(9)[train_set.y])
    valid_set.y = np.float32(np.eye(9)[valid_set.y])
    test_set.y = np.float32(np.eye(9)[test_set.y])

    # for hinge loss
    train_set.y = 2* train_set.y - 1.
    valid_set.y = 2* valid_set.y - 1.
    test_set.y = 2* test_set.y - 1.

    print('Building the CNN...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    cnn = cnv.genCnv(input, 9, learning_parameters)

    train_output = lasagne.layers.get_output(cnn, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0.,1.-target*train_output)))

    # W updates
    W = lasagne.layers.get_all_params(cnn, binary=True)
    W_grads = binary_net.compute_grads(loss,cnn)
    updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR)
    updates = binary_net.clipping_scaling(updates,cnn)

    # other parameters updates
    params = lasagne.layers.get_all_params(cnn, trainable=True, binary=False)
    updates = OrderedDict(updates.items() + lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR).items())

    test_output = lasagne.layers.get_output(cnn, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0.,1.-target*test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)),dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(
            train_fn,val_fn,
            cnn,
            batch_size,
            LR_start,LR_decay,
            num_epochs,
            train_set.X,train_set.y,
            valid_set.X,valid_set.y,
            test_set.X,test_set.y,
            save_path=save_path,
            shuffle_parts=shuffle_parts)
	#BSD 3-Clause License
	#=======
	#
	#Copyright (c) 2017, Xilinx Inc.
	#All rights reserved.
	#
	#Based Matthieu Courbariaux's CIFAR-10 example code
	#Copyright (c) 2015-2016, Matthieu Courbariaux
	#All rights reserved.
	#
	#Redistribution and use in source and binary forms, with or without
	#modification, are permitted provided that the following conditions are met:
	#
	# * Redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer.
	# * Redistributions in binary form must reproduce the above copyright
	# notice, this list of conditions and the following disclaimer in the
	# documentation and/or other materials provided with the distribution.
	# * Neither the name of the copyright holder nor the names of its
	# contributors may be used to endorse or promote products derived from
	# this software without specific prior written permission.
	#
	#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
	#EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	#DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
	#DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	from __future__ import print_function

	import sys
	import os
	import time

	import numpy as np
	np.random.seed(1234) # for reproducibility?

	import lasagne

	# specifying the gpu to use
	# import theano.sandbox.cuda
	# theano.sandbox.cuda.use('gpu1')
	import theano
	import theano.tensor as T

	import cPickle as pickle
	import gzip

	import binary_net
	import cnv

	from pylearn2.datasets.zca_dataset import ZCA_Dataset
	from pylearn2.datasets.cifar10 import CIFAR10
	from pylearn2.utils import serial

	from collections import OrderedDict

	from pylearn2.datasets import DenseDesignMatrix

	class CUCUMBER9(DenseDesignMatrix):
	def __init__(self, which_set):

	self.path = '/home/ubuntu/CUCUMBER-9/prototype_1'

	self.img_shape = (3, 32, 32)
	self.img_size = np.prod(self.img_shape)

	if which_set in {'train'}:
	X, y = self.load_data()
	elif which_set in {'test', 'valid'}:
	X, y = self.load_data_test()

	X = X.astype('float32')

	super(CUCUMBER9, self).__init__(X=X, y=y)

	def unpickle(self, file_name):
	with open(file_name, 'rb') as f:
	if sys.version_info.major == 2:
	return pickle.load(f)
	elif sys.version_info.major == 3:
	return pickle.load(f, encoding='latin-1')

	def load_data(self):

	nb_train_samples = 2475

	X = np.zeros((nb_train_samples, self.img_size), dtype='uint8')
	y = np.zeros((nb_train_samples,1), dtype='uint8')
	for i in range(1, 6):
	fpath = os.path.join(self.path, 'data_batch_' + str(i))
	batch_dict = self.unpickle(fpath)
	data = batch_dict['data']
	labels = batch_dict['labels']

	X[(i-1)495:i495, :] = data.reshape(495, self.img_size)
	y[(i-1)495:i495, 0] = labels

	return X, y

	def load_data_test(self):

	fpath = os.path.join(self.path, 'test_batch')
	batch_dict = self.unpickle(fpath)
	data = batch_dict['data']
	labels = batch_dict['labels']

	X = np.zeros((495, self.img_size), dtype='uint8')
	y = np.zeros((495,1), dtype='uint8')

	X = data.reshape(495, self.img_size)
	y[:, 0] = labels

	return X, y


	if __name__ == "__main__":

	learning_parameters = OrderedDict()
	# BN parameters
	batch_size = 50
	print("batch_size = "+str(batch_size))
	# alpha is the exponential moving average factor
	learning_parameters.alpha = .1
	print("alpha = "+str(learning_parameters.alpha))
	learning_parameters.epsilon = 1e-4
	print("epsilon = "+str(learning_parameters.epsilon))

	# W_LR_scale = 1.
	learning_parameters.W_LR_scale = "Glorot" # "Glorot" means we are using the coefficients from Glorot's paper
	print("W_LR_scale = "+str(learning_parameters.W_LR_scale))

	# Training parameters
	num_epochs = 500
	print("num_epochs = "+str(num_epochs))

	# Decaying LR
	LR_start = 0.001
	print("LR_start = "+str(LR_start))
	LR_fin = 0.0000003
	print("LR_fin = "+str(LR_fin))
	LR_decay = (LR_fin/LR_start)**(1./num_epochs)
	print("LR_decay = "+str(LR_decay))
	# BTW, LR decay might good for the BN moving average...

	save_path = "cucumber9_parameters.npz"
	print("save_path = "+str(save_path))

	train_set_size = 2475
	print("train_set_size = "+str(train_set_size))
	shuffle_parts = 1
	print("shuffle_parts = "+str(shuffle_parts))

	print('Loading CUCUMBER9 dataset...')

	train_set = CUCUMBER9(which_set="train")
	valid_set = CUCUMBER9(which_set="valid")
	test_set = CUCUMBER9(which_set="test")


	# bc01 format
	# Inputs in the range [-1,+1]
	# print("Inputs in the range [-1,+1]")
	train_set.X = np.reshape(np.subtract(np.multiply(2./255.,train_set.X),1.),(-1,3,32,32))
	valid_set.X = np.reshape(np.subtract(np.multiply(2./255.,valid_set.X),1.),(-1,3,32,32))
	test_set.X = np.reshape(np.subtract(np.multiply(2./255.,test_set.X),1.),(-1,3,32,32))

	# flatten targets
	train_set.y = np.hstack(train_set.y)
	valid_set.y = np.hstack(valid_set.y)
	test_set.y = np.hstack(test_set.y)

	# Onehot the targets
	train_set.y = np.float32(np.eye(9)[train_set.y])
	valid_set.y = np.float32(np.eye(9)[valid_set.y])
	test_set.y = np.float32(np.eye(9)[test_set.y])

	# for hinge loss
	train_set.y = 2* train_set.y - 1.
	valid_set.y = 2* valid_set.y - 1.
	test_set.y = 2* test_set.y - 1.

	print('Building the CNN...')

	# Prepare Theano variables for inputs and targets
	input = T.tensor4('inputs')
	target = T.matrix('targets')
	LR = T.scalar('LR', dtype=theano.config.floatX)

	cnn = cnv.genCnv(input, 9, learning_parameters)

	train_output = lasagne.layers.get_output(cnn, deterministic=False)

	# squared hinge loss
	loss = T.mean(T.sqr(T.maximum(0.,1.-target*train_output)))

	# W updates
	W = lasagne.layers.get_all_params(cnn, binary=True)
	W_grads = binary_net.compute_grads(loss,cnn)
	updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR)
	updates = binary_net.clipping_scaling(updates,cnn)

	# other parameters updates
	params = lasagne.layers.get_all_params(cnn, trainable=True, binary=False)
	updates = OrderedDict(updates.items() + lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR).items())

	test_output = lasagne.layers.get_output(cnn, deterministic=True)
	test_loss = T.mean(T.sqr(T.maximum(0.,1.-target*test_output)))
	test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)),dtype=theano.config.floatX)

	# Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
	# and returning the corresponding training loss:
	train_fn = theano.function([input, target, LR], loss, updates=updates)

	# Compile a second function computing the validation loss and accuracy:
	val_fn = theano.function([input, target], [test_loss, test_err])

	print('Training...')

	binary_net.train(
	train_fn,val_fn,
	cnn,
	batch_size,
	LR_start,LR_decay,
	num_epochs,
	train_set.X,train_set.y,
	valid_set.X,valid_set.y,
	test_set.X,test_set.y,
	save_path=save_path,
	shuffle_parts=shuffle_parts)