Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
implementation of backpropogation algorithm
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# implementation of backpropogation algorithm based on
# note that mini-batches are calculated in matrix form
import numpy as np
import random
def chunks(lst, n):
"""split lst into evenly sized chunks of n"""
for i in range(0, len(lst), n):
yield lst[i:i+n]
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def sigmoid_prime(z):
"""Derivative of the sigmoid function."""
return sigmoid(z)*(1-sigmoid(z))
class Network(object):
"""A Neural Network"""
def __init__(self, sizes):
"""sizes is List[int] representing number of unit of each layer, starting from the input layer"""
self.num_layers = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(n, 1) for n in self.sizes[1:]]
self.weights = [np.random.randn(y, x) for x, y in zip(self.sizes, self.sizes[1:])]
def feed_forward(self, X):
"""return the result of the network given input X"""
activation = X
for b, w in zip(self.biases, self.weights):
activation = sigmoid( + b)
return activation
def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None, matrix=True):
"""Train the NN using mini-batch stochastic gradient descent. The "training_data" is
:training_data: a list of tuples "(x, y)" where "x" is the input and "y" is the label.
:epochs: number of iteration
:mini_batch_size: number of mini_batches in a chunk
:eta: learning rate
:test_data: If provided, then "test_data" will be evaluated after each epoch
:returns: None
if test_data:
n_test = len(test_data)
n = len(training_data)
update_mini_batch = self.update_mini_batch_matrix if matrix else self.update_mini_batch
for j in range(epochs):
mini_batches = list(chunks(training_data, mini_batch_size))
for mini_batch in mini_batches:
update_mini_batch(mini_batch, eta)
if test_data:
print(f"Epoch {j}: {self.evaluate(test_data)}/{n_test}")
print(f"Epoch {j} complete")
def update_mini_batch(self, mini_batch, eta):
"""Update the NN's weight and bias by applying gradient decent using backpropogation
:mini_batch: List[(x, y)] where x is input and y is label
:eta: learning rate
:returns: None
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
for x, y in mini_batch:
delta_nbla_b, delta_nbla_w = self.backprop(x, y)
nabla_b = [nb+dnb for nb,dnb in zip(nabla_b, delta_nbla_b)]
nabla_w = [nw+dnw for nw,dnw in zip(nabla_w, delta_nbla_w)]
self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]
def update_mini_batch_matrix(self, mini_batch, eta):
"""Matrix version of update_mini_batch
:mini_batch: List[(x, y)] where x is input and y is label
:eta: learning rate
:returns: None
# construct X
X = np.concatenate([x for x,_ in mini_batch], axis=1)
Y = np.concatenate([y for _,y in mini_batch], axis=1)
nabla_b, nabla_w = self.backprop(X, Y)
self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
self.biases = [b-(eta/len(mini_batch))*np.sum(nb, axis=1).reshape(b.shape) for b, nb in zip(self.biases, nabla_b)]
def evaluate(self, test_data):
"""evaluate the label for test_data according to the current weight/bias
:test_data: List[(x, y)]
:returns: number of test_data that is correctly predicted
predictions = [(np.argmax(self.feed_forward(x)), y) for (x, y) in test_data]
return sum([int(x == y) for x, y in predictions])
def cost_derivative(self, output_activations, y):
:output_activations: TODO
:y: TODO
:returns: the vector of partial derivatives \partial C_x / \partial a for the output activations.
return (output_activations - y)
def backprop(self, x, y):
:x: column vector or a matrix of xs [col, col, ...]
:y: column vector or a matrix of ys [col, col, ...]
:returns: a tuple (nabla_b, nabla_w): (List[float], List[List[float]]) representing the
gradient for cost function C_x
nabla_b = [None] * len(self.biases)
nabla_w = [None] * len(self.weights)
# feed forward
activation = x
activations = [x] # list to store activations layer by layer
zs = [] # list to store all z values, i.e. the linear combination
for b, w in zip(self.biases, self.weights):
z = np.add(, b)
activation = sigmoid(z)
# backward pass
delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] =, activations[-2].transpose())
# Note that the variable l in the loop below is used a little
# differently to the notation in Chapter 2 of the book. Here,
# l = 1 means the last layer of neurons, l = 2 is the
# second-last layer, and so on. It's a renumbering of the
# scheme in the book, used here to take advantage of the fact
# that Python can use negative indices in lists.
for l in range(2, self.num_layers):
z = zs[-l]
sp = sigmoid_prime(z)
delta =[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] =, activations[-l-1].transpose())
return (nabla_b, nabla_w)
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
net = Network([784, 30, 10])
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)

This comment has been minimized.

Copy link
Owner Author

@lotabout lotabout commented Mar 14, 2018

The data MNIST dataset could be fetched from

Note the code above is written in Python 3, and the mnist_loader is written in Python 2, so you may need to apply the following changes to make it work with Python 3.

 #### Libraries
 # Standard library
-import cPickle
+import pickle
 import gzip

 # Third-party libraries
@@ -39,9 +39,8 @@ def load_data():
     That's done in the wrapper function ``load_data_wrapper()``, see
-    f ='../data/mnist.pkl.gz', 'rb')
-    training_data, validation_data, test_data = cPickle.load(f)
-    f.close()
+    with'../data/mnist.pkl.gz', 'rb') as f:
+        training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
     return (training_data, validation_data, test_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment