Skip to content

Instantly share code, notes, and snippets.

/mnist_gpu_deterministic.py Secret
Created May 24, 2017

Embed
What would you like to do?
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np, tensorflow as tf, struct, os
def read_mnist(fname):
f = open(fname, 'rb')
magic, = struct.unpack('>i', f.read(4))
assert (magic >> 8) == 0x08, 'Expected unsigned byte data'
rank = magic & 0xFF
shape = tuple(struct.unpack('>{}'.format('i' * rank), f.read(4 * rank)))
f.close()
return np.memmap(fname, dtype=np.uint8, mode='c', offset=4*(rank+1), shape=shape)
tx, ty, vx, vy = map(read_mnist, [
'train-images-idx3-ubyte', 'train-labels-idx1-ubyte',
't10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte'
])
tx = tx / 255.
vx = vx / 255.
ty = np.array([np.array([1. if i == j else 0. for j in range(10)]) for i in ty])
vy = np.array([np.array([1. if i == j else 0. for j in range(10)]) for i in vy])
def reduce_sum_det(x):
v = tf.reshape(x, [1, -1])
return tf.reshape(tf.matmul(v, tf.ones_like(v), transpose_b=True), [])
def compute_next_det(prev, out_size):
"""Given previous layer output and size of next layer, compute next layer output"""
w_initial = tf.random_normal([prev.get_shape().as_list()[1], out_size], 0, .01, dtype=tf.float32, seed=seed)
w = tf.Variable(concat_constant(w_initial, column=False, constant=0.0))
return tf.nn.relu(tf.matmul(concat_constant(prev, column=True, constant=1.0), w))
def concat_constant(x, column=True, constant=1.0):
"""Augment a matrix with an extra column or row with a constant value"""
constant = tf.reshape(tf.constant(constant, dtype=tf.float32), [1, 1])
shape = tf.stack([tf.shape(x)[0], 1] if column else [1, tf.shape(x)[1]])
tiled = tf.tile(constant, shape)
return tf.concat([x, tiled], 1 if column else 0)
seed = 1
ww, hh, oo = 28, 28, 10
num_hidden = 2
hidden_width = 1000
x = tf.placeholder(tf.float32, [None, hh*ww], name='x')
y = tf.placeholder(tf.float32, [None, oo], name='y')
current = x
for i in range(num_hidden):
current = compute_next_det(current, hidden_width)
o = compute_next_det(current, oo)
diff = o - y
loss = reduce_sum_det(diff*diff) / tf.cast(tf.shape(x)[0], dtype=tf.float32)
train = tf.train.AdamOptimizer().minimize(loss)
mb_size=1000
with tf.Session() as sess:
np.random.seed(seed)
sess.run(tf.global_variables_initializer())
sh = np.arange(len(tx))
max_epochs=5
vxx = np.split(vx, len(vx) / mb_size)
vyy = np.split(vy, len(vy) / mb_size)
for e in range(max_epochs):
np.random.shuffle(sh)
xs = np.split(tx[sh], len(tx) / mb_size)
ys = np.split(ty[sh], len(ty) / mb_size)
for mbx, mby in zip(xs, ys):
sess.run(train, feed_dict={x: mbx.reshape(mb_size, ww*hh), y: mby})
correct = 0
vlosses = []
for vbx, vby in zip(vxx, vyy):
out, vloss = sess.run((o, loss), feed_dict={x: vbx.reshape(mb_size, ww*hh), y: vby})
correct += np.sum(np.argmax(out, axis=1) == np.argmax(vby, axis=1))
vlosses.append(vloss)
print('epoch = %2d correct = %4d loss = %.8f' % (e, correct, np.mean(vlosses)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.