/mnist_gpu_deterministic.py Secret

## mnist_gpu_deterministic.py
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np, tensorflow as tf, struct, os

def read_mnist(fname):
    f = open(fname, 'rb')
    magic, = struct.unpack('>i', f.read(4))
    assert (magic >> 8) == 0x08, 'Expected unsigned byte data'
    rank = magic & 0xFF
    shape = tuple(struct.unpack('>{}'.format('i' * rank), f.read(4 * rank)))
    f.close()
    return np.memmap(fname, dtype=np.uint8, mode='c', offset=4*(rank+1), shape=shape)

tx, ty, vx, vy = map(read_mnist, [
    'train-images-idx3-ubyte', 'train-labels-idx1-ubyte',
    't10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte'
])

tx = tx / 255.
vx = vx / 255.
ty = np.array([np.array([1. if i == j else 0. for j in range(10)]) for i in ty])
vy = np.array([np.array([1. if i == j else 0. for j in range(10)]) for i in vy])

def reduce_sum_det(x):
    v = tf.reshape(x, [1, -1])
    return tf.reshape(tf.matmul(v, tf.ones_like(v), transpose_b=True), [])

def compute_next_det(prev, out_size):
    """Given previous layer output and size of next layer, compute next layer output"""
    w_initial = tf.random_normal([prev.get_shape().as_list()[1], out_size], 0, .01, dtype=tf.float32, seed=seed)
    w = tf.Variable(concat_constant(w_initial, column=False, constant=0.0))
    return tf.nn.relu(tf.matmul(concat_constant(prev, column=True, constant=1.0), w))

def concat_constant(x, column=True, constant=1.0):
    """Augment a matrix with an extra column or row with a constant value"""
    constant = tf.reshape(tf.constant(constant, dtype=tf.float32), [1, 1])
    shape = tf.stack([tf.shape(x)[0], 1] if column else [1, tf.shape(x)[1]])
    tiled = tf.tile(constant, shape)
    return tf.concat([x, tiled], 1 if column else 0)

seed = 1
ww, hh, oo = 28, 28, 10
num_hidden = 2
hidden_width = 1000
x = tf.placeholder(tf.float32, [None, hh*ww], name='x')
y = tf.placeholder(tf.float32, [None, oo], name='y')
current = x
for i in range(num_hidden):
    current = compute_next_det(current, hidden_width)
o = compute_next_det(current, oo)
diff = o - y
loss = reduce_sum_det(diff*diff) / tf.cast(tf.shape(x)[0], dtype=tf.float32)
train = tf.train.AdamOptimizer().minimize(loss)
mb_size=1000

with tf.Session() as sess:
    np.random.seed(seed)
    sess.run(tf.global_variables_initializer())
    sh = np.arange(len(tx))
    max_epochs=5
    vxx = np.split(vx, len(vx) / mb_size)
    vyy = np.split(vy, len(vy) / mb_size)
    for e in range(max_epochs):
        np.random.shuffle(sh)
        xs = np.split(tx[sh], len(tx) / mb_size)
        ys = np.split(ty[sh], len(ty) / mb_size)
        for mbx, mby in zip(xs, ys):
            sess.run(train, feed_dict={x: mbx.reshape(mb_size, ww*hh), y: mby})
        correct = 0
        vlosses = []
        for vbx, vby in zip(vxx, vyy):
            out, vloss = sess.run((o, loss), feed_dict={x: vbx.reshape(mb_size, ww*hh), y: vby})
            correct += np.sum(np.argmax(out, axis=1) == np.argmax(vby, axis=1))
            vlosses.append(vloss)
        print('epoch = %2d correct = %4d loss = %.8f' % (e, correct, np.mean(vlosses)))
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	import numpy as np, tensorflow as tf, struct, os

	def read_mnist(fname):
	f = open(fname, 'rb')
	magic, = struct.unpack('>i', f.read(4))
	assert (magic >> 8) == 0x08, 'Expected unsigned byte data'
	rank = magic & 0xFF
	shape = tuple(struct.unpack('>{}'.format('i' * rank), f.read(4 * rank)))
	f.close()
	return np.memmap(fname, dtype=np.uint8, mode='c', offset=4*(rank+1), shape=shape)

	tx, ty, vx, vy = map(read_mnist, [
	'train-images-idx3-ubyte', 'train-labels-idx1-ubyte',
	't10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte'
	])

	tx = tx / 255.
	vx = vx / 255.
	ty = np.array([np.array([1. if i == j else 0. for j in range(10)]) for i in ty])
	vy = np.array([np.array([1. if i == j else 0. for j in range(10)]) for i in vy])

	def reduce_sum_det(x):
	v = tf.reshape(x, [1, -1])
	return tf.reshape(tf.matmul(v, tf.ones_like(v), transpose_b=True), [])

	def compute_next_det(prev, out_size):
	"""Given previous layer output and size of next layer, compute next layer output"""
	w_initial = tf.random_normal([prev.get_shape().as_list()[1], out_size], 0, .01, dtype=tf.float32, seed=seed)
	w = tf.Variable(concat_constant(w_initial, column=False, constant=0.0))
	return tf.nn.relu(tf.matmul(concat_constant(prev, column=True, constant=1.0), w))

	def concat_constant(x, column=True, constant=1.0):
	"""Augment a matrix with an extra column or row with a constant value"""
	constant = tf.reshape(tf.constant(constant, dtype=tf.float32), [1, 1])
	shape = tf.stack([tf.shape(x)[0], 1] if column else [1, tf.shape(x)[1]])
	tiled = tf.tile(constant, shape)
	return tf.concat([x, tiled], 1 if column else 0)

	seed = 1
	ww, hh, oo = 28, 28, 10
	num_hidden = 2
	hidden_width = 1000
	x = tf.placeholder(tf.float32, [None, hh*ww], name='x')
	y = tf.placeholder(tf.float32, [None, oo], name='y')
	current = x
	for i in range(num_hidden):
	current = compute_next_det(current, hidden_width)
	o = compute_next_det(current, oo)
	diff = o - y
	loss = reduce_sum_det(diff*diff) / tf.cast(tf.shape(x)[0], dtype=tf.float32)
	train = tf.train.AdamOptimizer().minimize(loss)
	mb_size=1000

	with tf.Session() as sess:
	np.random.seed(seed)
	sess.run(tf.global_variables_initializer())
	sh = np.arange(len(tx))
	max_epochs=5
	vxx = np.split(vx, len(vx) / mb_size)
	vyy = np.split(vy, len(vy) / mb_size)
	for e in range(max_epochs):
	np.random.shuffle(sh)
	xs = np.split(tx[sh], len(tx) / mb_size)
	ys = np.split(ty[sh], len(ty) / mb_size)
	for mbx, mby in zip(xs, ys):
	sess.run(train, feed_dict={x: mbx.reshape(mb_size, ww*hh), y: mby})
	correct = 0
	vlosses = []
	for vbx, vby in zip(vxx, vyy):
	out, vloss = sess.run((o, loss), feed_dict={x: vbx.reshape(mb_size, ww*hh), y: vby})
	correct += np.sum(np.argmax(out, axis=1) == np.argmax(vby, axis=1))
	vlosses.append(vloss)
	print('epoch = %2d correct = %4d loss = %.8f' % (e, correct, np.mean(vlosses)))