botev/mnist_hinton.py

## mnist_hinton.py
from __future__ import print_function
import numpy as np
import os
import gzip
import sys
import time
from collections import OrderedDict
floatX = "float32"


def download_data(folder):
    if not os.path.exists(folder):
        os.mkdir(folder)
    file_name = "mnist.pkl.gz"
    file_path = os.path.join(folder, file_name)
    if sys.version_info.major < 3:
        import urllib2 as urllib
    else:
        import urllib.request as urllib
    if not os.path.exists(file_path):
        url = "http://deeplearning.net/data/mnist/mnist.pkl.gz"
        proxy = os.environ.get('HTTP_PROXY', None)
        if proxy is not None:
            proxy = urllib.ProxyHandler({'http': proxy})
            opener = urllib.build_opener(proxy)
            urllib.install_opener(opener)
        with open(file_path, 'wb') as f:
            f.write(urllib.urlopen(url).read())
            f.close()


def load_data(folder):
    file_name = "mnist.pkl.gz"
    file_path = os.path.join(folder, file_name)
    with gzip.open(file_path, 'rb') as f:
        if sys.version_info.major < 3:
            import cPickle as pickle
            data = pickle.load(f)
        else:
            import pickle
            u = pickle._Unpickler(f)
            u.encoding = 'latin1'
            data = u.load()
    # Make images to be floatX, while targets int8
    images = np.concatenate((data[0][0].astype(floatX),
                             data[1][0].astype(floatX)),
                            axis=0)
    labels = np.concatenate((data[0][1].astype(floatX),
                             data[1][1].astype(floatX)),
                            axis=0)
    return images, labels


def main_theano(batch_size , factor, burnout, epochs, period=1):
    import theano
    import theano.tensor as T
    download_data("mnist_hinton")
    images, labels = load_data("mnist_hinton")
    d = [784, factor * 1000, factor * 500, factor * 250, factor * 30,
         factor * 250, factor * 500, factor * 1000, 784]
    learning_rate = 0.01

    data_in = T.matrix(name="Input", dtype=floatX)

    params = list()
    for i in range(1, 9):
        params.append(theano.shared(np.random.randn(d[i-1], d[i]).astype(floatX) / 100.0,
                                    name='W_' + str(i)))
        params.append(theano.shared(np.zeros(d[i]).astype(floatX), name='b_' + str(i)))

    # input layer
    h = T.tanh(T.dot(data_in, params[0]) + params[1])
    for i in range(1, 7):
        h = T.tanh(T.dot(h, params[2*i]) + params[2*i+1])
    h = T.nnet.sigmoid(T.dot(h, params[14]) + params[15])
    error = T.nnet.binary_crossentropy(h, data_in)
    loss = error.sum() / np.asarray(batch_size, dtype=floatX)

    grads = T.grad(loss, params)
    updates = OrderedDict()
    for p, g in zip(params, grads):
        updates[p] = p - learning_rate * g

    start_time = time.time()
    func = theano.function([data_in], loss, updates=updates)
    compile_time = float(1000*(time.time() - start_time))

    vals = np.zeros(epochs // period)
    num_images = images.shape[0]
    for i in range(epochs + burnout):
        if i == burnout:
            start_time = time.time()
        ind = i // (num_images / batch_size)
        data = images[ind:ind+batch_size]
        if i >= burnout and (i + 1 - burnout) % period == 0:
            vals[(i - burnout) // period] = func(data)
        else:
            func(data)
    overall = time.time() - start_time
    return overall, compile_time


def main_tf(batch_size, factor, burnout, epochs, period=1):
    import tensorflow as tf
    download_data("mnist_hinton")
    images, labels = load_data("mnist_hinton")
    d = [784, factor * 1000, factor * 500, factor * 250, factor * 30,
         factor * 250, factor * 500, factor * 1000, 784]
    learning_rate = 0.01

    data_in = tf.placeholder(tf.float32, [None, 784])
    params = list()
    for i in range(1, 9):
        params.append(tf.Variable(np.random.randn(d[i-1], d[i]).astype(floatX) / 100.0,
                                  name='W_' + str(i)))
        params.append(tf.Variable(np.zeros(d[i]).astype(floatX), name='b_' + str(i)))

    # input layer
    h = tf.nn.tanh(tf.matmul(data_in, params[0]) + params[1])
    for i in range(1, 7):
        h = tf.nn.tanh(tf.matmul(h, params[2*i]) + params[2*i+1])
    h = tf.matmul(h, params[14]) + params[15]
    error = tf.nn.sigmoid_cross_entropy_with_logits(h, data_in)
    loss = tf.reduce_sum(error) / np.asarray(batch_size, dtype=floatX)
    tf.scalar_summary("loss", loss)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    start_time = time.time()
    init = tf.initialize_all_variables()
    sess = tf.Session()
    sess.run(init)
    compile_time = float(1000*(time.time() - start_time))

    vals = np.zeros(epochs // period)
    num_images = images.shape[0]
    for i in range(epochs + burnout):
        if i == burnout:
            start_time = time.time()
        ind = i // (num_images / batch_size)
        data = images[ind:ind+batch_size]
        if i >= burnout and (i + 1 - burnout) % period == 0:
            _, vals[(i - burnout) // period] = sess.run([train_step, loss], feed_dict={data_in: data})
        else:
            sess.run([train_step, loss], feed_dict={data_in: data})
    overall = float(1000*(time.time() - start_time)) / float(epochs)
    from tensorflow.python.framework import ops
    ops.reset_default_graph()
    return overall, compile_time


if __name__ == "__main__":
    if len(sys.argv) > 3:
        print("Expecting no more than 2 arguments")
    backend = 'theano'
    repeats = 100
    burnout = 100
    epochs = 500
    if len(sys.argv) > 1:
        backend = sys.argv[1]
    if len(sys.argv) > 2:
        repeats = int(sys.argv[2])
    if len(sys.argv) > 3:
        burnout = int(sys.argv[3])
    if len(sys.argv) > 4:
        epochs = int(sys.argv[4])

    batch_size_grid = [1000, 5000, 10000]
    factor_grid = [1, 5, 10]
    run_times = np.zeros((3, 3, repeats))
    compile_times = np.zeros((3, 3, repeats))

    if backend == 'theano':
        run_func = main_theano
    else:
        run_func = main_tf
    for b, batch_size in enumerate(batch_size_grid):
        for f, factor in enumerate(factor_grid):
            print("Running for batch size", batch_size, "and factor", factor)
            for i in range(repeats):
                run_times[b, f, i], compile_times[b, f, i] = run_func(batch_size, factor, burnout, epochs)
                print("Run:", run_times[b, f, i], compile_times[b, f, i])

    np.savez(backend + "_times", run_times=run_times, compile_times=compile_times)
    run_mean = np.mean(run_times, axis=2)
    run_std = np.std(run_times, axis=2)
    compile_mean = np.mean(compile_times, axis=2)
    compile_std = np.std(compile_times, axis=2)
    print("Run Means:")
    print(run_mean)
    print("Run Stds:")
    print(run_std)
    print("Compile Means:")
    print(compile_mean)
    print("Compile Stds:")
    print(compile_std)
	from __future__ import print_function
	import numpy as np
	import os
	import gzip
	import sys
	import time
	from collections import OrderedDict
	floatX = "float32"


	def download_data(folder):
	if not os.path.exists(folder):
	os.mkdir(folder)
	file_name = "mnist.pkl.gz"
	file_path = os.path.join(folder, file_name)
	if sys.version_info.major < 3:
	import urllib2 as urllib
	else:
	import urllib.request as urllib
	if not os.path.exists(file_path):
	url = "http://deeplearning.net/data/mnist/mnist.pkl.gz"
	proxy = os.environ.get('HTTP_PROXY', None)
	if proxy is not None:
	proxy = urllib.ProxyHandler({'http': proxy})
	opener = urllib.build_opener(proxy)
	urllib.install_opener(opener)
	with open(file_path, 'wb') as f:
	f.write(urllib.urlopen(url).read())
	f.close()


	def load_data(folder):
	file_name = "mnist.pkl.gz"
	file_path = os.path.join(folder, file_name)
	with gzip.open(file_path, 'rb') as f:
	if sys.version_info.major < 3:
	import cPickle as pickle
	data = pickle.load(f)
	else:
	import pickle
	u = pickle._Unpickler(f)
	u.encoding = 'latin1'
	data = u.load()
	# Make images to be floatX, while targets int8
	images = np.concatenate((data[0][0].astype(floatX),
	data[1][0].astype(floatX)),
	axis=0)
	labels = np.concatenate((data[0][1].astype(floatX),
	data[1][1].astype(floatX)),
	axis=0)
	return images, labels


	def main_theano(batch_size , factor, burnout, epochs, period=1):
	import theano
	import theano.tensor as T
	download_data("mnist_hinton")
	images, labels = load_data("mnist_hinton")
	d = [784, factor * 1000, factor * 500, factor * 250, factor * 30,
	factor * 250, factor * 500, factor * 1000, 784]
	learning_rate = 0.01

	data_in = T.matrix(name="Input", dtype=floatX)

	params = list()
	for i in range(1, 9):
	params.append(theano.shared(np.random.randn(d[i-1], d[i]).astype(floatX) / 100.0,
	name='W_' + str(i)))
	params.append(theano.shared(np.zeros(d[i]).astype(floatX), name='b_' + str(i)))

	# input layer
	h = T.tanh(T.dot(data_in, params[0]) + params[1])
	for i in range(1, 7):
	h = T.tanh(T.dot(h, params[2i]) + params[2i+1])
	h = T.nnet.sigmoid(T.dot(h, params[14]) + params[15])
	error = T.nnet.binary_crossentropy(h, data_in)
	loss = error.sum() / np.asarray(batch_size, dtype=floatX)

	grads = T.grad(loss, params)
	updates = OrderedDict()
	for p, g in zip(params, grads):
	updates[p] = p - learning_rate * g

	start_time = time.time()
	func = theano.function([data_in], loss, updates=updates)
	compile_time = float(1000*(time.time() - start_time))

	vals = np.zeros(epochs // period)
	num_images = images.shape[0]
	for i in range(epochs + burnout):
	if i == burnout:
	start_time = time.time()
	ind = i // (num_images / batch_size)
	data = images[ind:ind+batch_size]
	if i >= burnout and (i + 1 - burnout) % period == 0:
	vals[(i - burnout) // period] = func(data)
	else:
	func(data)
	overall = time.time() - start_time
	return overall, compile_time


	def main_tf(batch_size, factor, burnout, epochs, period=1):
	import tensorflow as tf
	download_data("mnist_hinton")
	images, labels = load_data("mnist_hinton")
	d = [784, factor * 1000, factor * 500, factor * 250, factor * 30,
	factor * 250, factor * 500, factor * 1000, 784]
	learning_rate = 0.01

	data_in = tf.placeholder(tf.float32, [None, 784])
	params = list()
	for i in range(1, 9):
	params.append(tf.Variable(np.random.randn(d[i-1], d[i]).astype(floatX) / 100.0,
	name='W_' + str(i)))
	params.append(tf.Variable(np.zeros(d[i]).astype(floatX), name='b_' + str(i)))

	# input layer
	h = tf.nn.tanh(tf.matmul(data_in, params[0]) + params[1])
	for i in range(1, 7):
	h = tf.nn.tanh(tf.matmul(h, params[2i]) + params[2i+1])
	h = tf.matmul(h, params[14]) + params[15]
	error = tf.nn.sigmoid_cross_entropy_with_logits(h, data_in)
	loss = tf.reduce_sum(error) / np.asarray(batch_size, dtype=floatX)
	tf.scalar_summary("loss", loss)
	train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

	start_time = time.time()
	init = tf.initialize_all_variables()
	sess = tf.Session()
	sess.run(init)
	compile_time = float(1000*(time.time() - start_time))

	vals = np.zeros(epochs // period)
	num_images = images.shape[0]
	for i in range(epochs + burnout):
	if i == burnout:
	start_time = time.time()
	ind = i // (num_images / batch_size)
	data = images[ind:ind+batch_size]
	if i >= burnout and (i + 1 - burnout) % period == 0:
	_, vals[(i - burnout) // period] = sess.run([train_step, loss], feed_dict={data_in: data})
	else:
	sess.run([train_step, loss], feed_dict={data_in: data})
	overall = float(1000*(time.time() - start_time)) / float(epochs)
	from tensorflow.python.framework import ops
	ops.reset_default_graph()
	return overall, compile_time


	if __name__ == "__main__":
	if len(sys.argv) > 3:
	print("Expecting no more than 2 arguments")
	backend = 'theano'
	repeats = 100
	burnout = 100
	epochs = 500
	if len(sys.argv) > 1:
	backend = sys.argv[1]
	if len(sys.argv) > 2:
	repeats = int(sys.argv[2])
	if len(sys.argv) > 3:
	burnout = int(sys.argv[3])
	if len(sys.argv) > 4:
	epochs = int(sys.argv[4])

	batch_size_grid = [1000, 5000, 10000]
	factor_grid = [1, 5, 10]
	run_times = np.zeros((3, 3, repeats))
	compile_times = np.zeros((3, 3, repeats))

	if backend == 'theano':
	run_func = main_theano
	else:
	run_func = main_tf
	for b, batch_size in enumerate(batch_size_grid):
	for f, factor in enumerate(factor_grid):
	print("Running for batch size", batch_size, "and factor", factor)
	for i in range(repeats):
	run_times[b, f, i], compile_times[b, f, i] = run_func(batch_size, factor, burnout, epochs)
	print("Run:", run_times[b, f, i], compile_times[b, f, i])

	np.savez(backend + "_times", run_times=run_times, compile_times=compile_times)
	run_mean = np.mean(run_times, axis=2)
	run_std = np.std(run_times, axis=2)
	compile_mean = np.mean(compile_times, axis=2)
	compile_std = np.std(compile_times, axis=2)
	print("Run Means:")
	print(run_mean)
	print("Run Stds:")
	print(run_std)
	print("Compile Means:")
	print(compile_mean)
	print("Compile Stds:")
	print(compile_std)