Skip to content

Instantly share code, notes, and snippets.

@nyanshell
Created April 27, 2016 17:07
Show Gist options
  • Save nyanshell/c1bcf267841148d8ae9ae86c54832356 to your computer and use it in GitHub Desktop.
Save nyanshell/c1bcf267841148d8ae9ae86c54832356 to your computer and use it in GitHub Desktop.
dnn practice
# -*- encoding: utf-8 -*-
import os, struct
from array import array as pyarray
import numpy as np
from numpy import append, array, int8, uint8, zeros
def _load_mnist(dataset="training", digits=np.arange(10), path="."):
"""
Loads MNIST files into 3D numpy arrays
Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
"""
if dataset == "training":
fname_img = os.path.join(path, 'train-images-idx3-ubyte')
fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
elif dataset == "testing":
fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
else:
raise ValueError("dataset must be 'testing' or 'training'")
flbl = open(fname_lbl, 'rb')
magic_nr, size = struct.unpack(">II", flbl.read(8))
lbl = pyarray("b", flbl.read())
flbl.close()
fimg = open(fname_img, 'rb')
magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
img = pyarray("B", fimg.read())
fimg.close()
ind = [ k for k in range(size) if lbl[k] in digits ]
N = len(ind)
images = zeros((N, rows, cols), dtype=np.float32)
labels = zeros((N, 1), dtype=int8)
for i in range(len(ind)):
images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ],
dtype=np.float32).reshape((rows, cols)) / 255.0
labels[i] = lbl[ind[i]]
return images, labels
def vectorized_result(j):
"""Return a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output from the neural
network."""
e = np.zeros((10, 1))
e[j] = 1.0
return e
def load_mnist():
img, lbs = _load_mnist(path='./data')
tr_d = (img[:50000], lbs[:50000])
va_d = (tr_d[0][:10000], tr_d[1][:10000])
te_d = (img[50000:], lbs[50000:])
# tr_d, va_d, te_d = load_data()
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
training_results = [vectorized_result(y) for y in tr_d[1]]
training_data = zip(training_inputs, training_results)
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = zip(validation_inputs, va_d[1])
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = zip(test_inputs, te_d[1])
return ([_ for _ in training_data],
[_ for _ in validation_data],
[_ for _ in test_data])
def _load_data():
import cPickle
import gzip
f = gzip.open('./data/mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = cPickle.load(f)
f.close()
return (training_data, validation_data, test_data)
def load_data_wrapper():
tr_d, va_d, te_d = _load_data()
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
training_results = [vectorized_result(y) for y in tr_d[1]]
training_data = zip(training_inputs, training_results)
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = zip(validation_inputs, va_d[1])
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = zip(test_inputs, te_d[1])
return (training_data, validation_data, test_data)
# -*- coding: utf-8 -*-
import random
import numpy as np
class Network():
def __init__(self, sizes):
self.num_layers = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
self.weights = [np.random.randn(y, x)
for x, y in zip(sizes[:-1], sizes[1:])]
def feedforward(self, a):
for b, w in zip(self.biases, self.weights):
a = sigmoid(np.dot(w, a) + b)
return a
def SGD(self, training_data,
epochs, mini_batch_size, eta, test_data=None):
if test_data: n_test = len(test_data)
n = len(training_data)
for j in range(epochs):
random.shuffle(training_data)
mini_batches = [
training_data[k:k+mini_batch_size]
for k in range(0, n, mini_batch_size)]
for mini_batch in mini_batches:
self.update_mini_batch(mini_batch, eta)
if test_data:
print("Epoch {0}: {1} / {2}".format(
j, self.evaluate(test_data), n_test
))
else:
print("Epoch %s complete" % j)
def update_mini_batch(self, mini_batch, eta):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
for x, y in mini_batch:
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
self.weights = [w - (eta / len(mini_batch)) * nw
for w, nw in zip(self.weights, nabla_w)]
self.biases = [b - (eta / len(mini_batch)) * nb
for b, nb in zip(self.biases, nabla_b)]
def backprop(self, x, y):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
act = x
acts = [x]
zs = []
for b, w in zip(self.biases, self.weights):
z = np.dot(w, act) + b
zs.append(z)
act = sigmoid(z)
acts.append(act)
delta = self.cost_derivative(acts[-1], y) * sigmoid_prime(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, acts[-2].transpose())
for l in range(2, self.num_layers):
z = zs[-l]
sp = sigmoid_prime(z)
delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, acts[-l-1].transpose())
return (nabla_b, nabla_w)
def cost_derivative(self, output_activations, y):
return output_activations - y
def evaluate(self, test_data):
test_results = [(np.argmax(self.feedforward(x)), y)
for x, y in test_data]
return sum(int( x == y ) for x, y in test_results)
def sigmoid(z):
return 1.0 / ( 1.0 + np.exp(-z) )
def sigmoid_prime(z):
return sigmoid(z) * ( 1 - sigmoid(z) )
if __name__ == '__main__':
from dt_load import load_mnist
tr_dt, val_dt, test_dt = load_mnist()
nn = Network([784, 30, 10])
nn.SGD(tr_dt, 30, 10, 3.0, test_data=test_dt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment