Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
implementation of backpropogation algorithm
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# implementation of backpropogation algorithm based on
# note that mini-batches are calculated in matrix form
import numpy as np
import random
def chunks(lst, n):
"""split lst into evenly sized chunks of n"""
for i in range(0, len(lst), n):
yield lst[i:i+n]
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def sigmoid_prime(z):
"""Derivative of the sigmoid function."""
return sigmoid(z)*(1-sigmoid(z))
class Network(object):
"""A Neural Network"""
def __init__(self, sizes):
"""sizes is List[int] representing number of unit of each layer, starting from the input layer"""
self.num_layers = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(n, 1) for n in self.sizes[1:]]
self.weights = [np.random.randn(y, x) for x, y in zip(self.sizes, self.sizes[1:])]
def feed_forward(self, X):
"""return the result of the network given input X"""
activation = X
for b, w in zip(self.biases, self.weights):
activation = sigmoid( + b)
return activation
def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None, matrix=True):
"""Train the NN using mini-batch stochastic gradient descent. The "training_data" is
:training_data: a list of tuples "(x, y)" where "x" is the input and "y" is the label.
:epochs: number of iteration
:mini_batch_size: number of mini_batches in a chunk
:eta: learning rate
:test_data: If provided, then "test_data" will be evaluated after each epoch
:returns: None
if test_data:
n_test = len(test_data)
n = len(training_data)
update_mini_batch = self.update_mini_batch_matrix if matrix else self.update_mini_batch
for j in range(epochs):
mini_batches = list(chunks(training_data, mini_batch_size))
for mini_batch in mini_batches:
update_mini_batch(mini_batch, eta)
if test_data:
print(f"Epoch {j}: {self.evaluate(test_data)}/{n_test}")
print(f"Epoch {j} complete")
def update_mini_batch(self, mini_batch, eta):
"""Update the NN's weight and bias by applying gradient decent using backpropogation
:mini_batch: List[(x, y)] where x is input and y is label
:eta: learning rate
:returns: None
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
for x, y in mini_batch:
delta_nbla_b, delta_nbla_w = self.backprop(x, y)
nabla_b = [nb+dnb for nb,dnb in zip(nabla_b, delta_nbla_b)]
nabla_w = [nw+dnw for nw,dnw in zip(nabla_w, delta_nbla_w)]
self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]
def update_mini_batch_matrix(self, mini_batch, eta):
"""Matrix version of update_mini_batch
:mini_batch: List[(x, y)] where x is input and y is label
:eta: learning rate
:returns: None
# construct X
X = np.concatenate([x for x,_ in mini_batch], axis=1)
Y = np.concatenate([y for _,y in mini_batch], axis=1)
nabla_b, nabla_w = self.backprop(X, Y)
self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
self.biases = [b-(eta/len(mini_batch))*np.sum(nb, axis=1).reshape(b.shape) for b, nb in zip(self.biases, nabla_b)]
def evaluate(self, test_data):
"""evaluate the label for test_data according to the current weight/bias
:test_data: List[(x, y)]
:returns: number of test_data that is correctly predicted
predictions = [(np.argmax(self.feed_forward(x)), y) for (x, y) in test_data]
return sum([int(x == y) for x, y in predictions])
def cost_derivative(self, output_activations, y):
:output_activations: TODO
:y: TODO
:returns: the vector of partial derivatives \partial C_x / \partial a for the output activations.
return (output_activations - y)
def backprop(self, x, y):
:x: column vector or a matrix of xs [col, col, ...]
:y: column vector or a matrix of ys [col, col, ...]
:returns: a tuple (nabla_b, nabla_w): (List[float], List[List[float]]) representing the
gradient for cost function C_x
nabla_b = [None] * len(self.biases)
nabla_w = [None] * len(self.weights)
# feed forward
activation = x
activations = [x] # list to store activations layer by layer
zs = [] # list to store all z values, i.e. the linear combination
for b, w in zip(self.biases, self.weights):
z = np.add(, b)
activation = sigmoid(z)
# backward pass
delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] =, activations[-2].transpose())
# Note that the variable l in the loop below is used a little
# differently to the notation in Chapter 2 of the book. Here,
# l = 1 means the last layer of neurons, l = 2 is the
# second-last layer, and so on. It's a renumbering of the
# scheme in the book, used here to take advantage of the fact
# that Python can use negative indices in lists.
for l in range(2, self.num_layers):
z = zs[-l]
sp = sigmoid_prime(z)
delta =[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] =, activations[-l-1].transpose())
return (nabla_b, nabla_w)
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
net = Network([784, 30, 10])
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)

This comment has been minimized.

Copy link
Owner Author

commented Mar 14, 2018

The data MNIST dataset could be fetched from

Note the code above is written in Python 3, and the mnist_loader is written in Python 2, so you may need to apply the following changes to make it work with Python 3.

 #### Libraries
 # Standard library
-import cPickle
+import pickle
 import gzip

 # Third-party libraries
@@ -39,9 +39,8 @@ def load_data():
     That's done in the wrapper function ``load_data_wrapper()``, see
-    f ='../data/mnist.pkl.gz', 'rb')
-    training_data, validation_data, test_data = cPickle.load(f)
-    f.close()
+    with'../data/mnist.pkl.gz', 'rb') as f:
+        training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
     return (training_data, validation_data, test_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.