Skip to content

Instantly share code, notes, and snippets.

Created May 24, 2017 20:13
Show Gist options
  • Save anonymous/d277be86320a0a3d47c2f141881c9a1a to your computer and use it in GitHub Desktop.
Save anonymous/d277be86320a0a3d47c2f141881c9a1a to your computer and use it in GitHub Desktop.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np, tensorflow as tf, struct, os
def read_mnist(fname):
f = open(fname, 'rb')
magic, = struct.unpack('>i', f.read(4))
assert (magic >> 8) == 0x08, 'Expected unsigned byte data'
rank = magic & 0xFF
shape = tuple(struct.unpack('>{}'.format('i' * rank), f.read(4 * rank)))
f.close()
return np.memmap(fname, dtype=np.uint8, mode='c', offset=4*(rank+1), shape=shape)
tx, ty, vx, vy = map(read_mnist, [
'train-images-idx3-ubyte', 'train-labels-idx1-ubyte',
't10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte'
])
tx = tx / 255.
vx = vx / 255.
ty = np.array([np.array([1. if i == j else 0. for j in range(10)]) for i in ty])
vy = np.array([np.array([1. if i == j else 0. for j in range(10)]) for i in vy])
def compute_next(prev, out_size):
"""Given previous layer output and size of next layer, compute next layer output"""
w = tf.Variable(tf.random_normal([prev.get_shape().as_list()[1], out_size], 0, .01, dtype=tf.float32, seed=seed))
b = tf.Variable(tf.zeros(dtype=tf.float32, shape=[out_size]))
return tf.nn.relu(tf.matmul(prev, w) + b)
seed = 1
ww, hh, oo = 28, 28, 10
num_hidden = 2
hidden_width = 1000
x = tf.placeholder(tf.float32, [None, hh*ww], name='x')
y = tf.placeholder(tf.float32, [None, oo], name='y')
current = x
for i in range(num_hidden):
current = compute_next(current, hidden_width)
o = compute_next(current, oo)
diff = o - y
loss = tf.reduce_sum(diff*diff) / tf.cast(tf.shape(x)[0], dtype=tf.float32)
train = tf.train.AdamOptimizer().minimize(loss)
mb_size = 1000
with tf.Session() as sess:
np.random.seed(seed)
sess.run(tf.global_variables_initializer())
sh = np.arange(len(tx))
max_epochs=5
vxx = np.split(vx, len(vx) / mb_size)
vyy = np.split(vy, len(vy) / mb_size)
for e in range(max_epochs):
np.random.shuffle(sh)
xs = np.split(tx[sh], len(tx) / mb_size)
ys = np.split(ty[sh], len(ty) / mb_size)
for mbx, mby in zip(xs, ys):
sess.run(train, feed_dict={x: mbx.reshape(mb_size, ww*hh), y: mby})
correct = 0
vlosses = []
for vbx, vby in zip(vxx, vyy):
out, vloss = sess.run((o, loss), feed_dict={x: vbx.reshape(mb_size, ww*hh), y: vby})
correct += np.sum(np.argmax(out, axis=1) == np.argmax(vby, axis=1))
vlosses.append(vloss)
print('epoch = %2d correct = %4d loss = %.8f' % (e, correct, np.mean(vlosses)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment