kbarbary/deepnn.py

## deepnn.py
"""A minimal implementation of a dense neural net with an arbitrary
number of layers, backpropagation, and a few different activation functions."""

import numpy as np

# Activation and cost functions (with gradients)
def sigmoid(x):
    y = 1.0 / (1.0 + np.exp(-x))
    return y, y * (1.0 - y)


def relu(x):
    y = np.maximum(0.0, x)
    grad = (x > 0.0).astype(np.float64)
    return y, grad


def tanh(x):
    y = np.tanh(x)
    return y, 1.0 - y**2


def cross_entropy_cost(A, Y):
    m = Y.shape[1]
    cost = (1.0 / m) * np.sum(-Y * np.log(A) - (1.0 - Y) * np.log(1.0 - A))
    dA = (1.0 / m) * (-(Y / A) + (1.0 - Y) / (1.0 - A))  # dcost/dA
    return cost, dA


class Layer(object):
    def __init__(self, n_in: int, n_out: int, activation):
        self.g = activation
        self.W = np.random.normal(scale=0.01, size=(n_out, n_in))
        self.b = np.zeros((n_out, 1))
        self._cache = {}

    def __call__(self, X):
        """Forward propagation (and cache intermediate results)"""
        Z = self.W @ X + self.b
        A, dAdZ = self.g(Z)
        self._cache['X'] = X
        self._cache['Z'] = Z
        self._cache['dAdZ'] = dAdZ
        return A

    def backward(self, dA, alpha):
        """Backward propagation and update parameters"""
        dZ = dA * self._cache['dAdZ']
        dW = dZ @ self._cache['X'].T
        db = np.sum(dZ, axis=1, keepdims=True)
        dX = self.W.T @ dZ

        # update
        self.W -= alpha * dW
        self.b -= alpha * db

        return dX


class NeuralNetwork(object):
    def __init__(self, layer_sizes, activations):
        assert len(activations) == len(layer_sizes) - 1
        self.layers = [Layer(layer_sizes[i], layer_sizes[i+1], activations[i])
                       for i in range(len(activations))]
        self.costs = []

    def __call__(self, X):
        for layer in self.layers:
            X = layer(X)
        return X

    def train(self, X, Y, niter=100, alpha=0.05):
        for i in range(niter):
            A = self(X)
            cost, dA = cross_entropy_cost(A, Y)

            # backprop and update parameters via gradient descent
            for layer in reversed(self.layers):
                dA = layer.backward(dA, alpha)

            self.costs.append(cost)
            if not (i % 10):
                print(i, "cost =", cost)

        return self


# Testing

import os
import gzip
from urllib.request import urlopen


def download_gzip_file(url, file_name):
    response = gzip.GzipFile(fileobj=urlopen(url))
    with open(file_name, 'wb') as f:
        f.write(response.read())


def read_idx(fname):
    """Read an IDX format file into a numpy array. IDX is a very simple
    binary format described here: http://yann.lecun.com/exdb/mnist/"""
    with open(fname, 'rb') as f:
        # read magic bytes: dtype and ndim
        magic = f.read(4)
        assert magic[0:2] == b'\x00\x00'
        dtypes = {8: np.uint8, 9: np.int8, 11: np.int16,
                  12: np.int32, 13: np.float32, 14: np.float64}
        dtype = np.dtype(dtypes[magic[2]]).newbyteorder('>')
        ndim = magic[3]

        # read dimensions
        dims = []
        for i in range(ndim):
            b = f.read(4)
            dims.append(int.from_bytes(b, byteorder='big'))

        # read data
        data = np.fromfile(f, dtype=dtype, count=np.product(dims))
        data.shape = dims

    return data


if __name__ == '__main__':

    # get some data
    root_url = "http://yann.lecun.com/exdb/mnist/"
    urls = {"train_images": root_url + "train-images-idx3-ubyte.gz",
            "train_labels": root_url + "train-labels-idx1-ubyte.gz",
            "test_images": root_url + "t10k-images-idx3-ubyte.gz",
            "test_labels": root_url + "t10k-labels-idx1-ubyte.gz"}
    fnames = {key: url.split('/')[-1][:-3] for key, url in urls.items()}
    for key in urls:
        if not os.path.exists(fnames[key]):
            download_gzip_file(urls[key], fnames[key])

    # Read the data
    data = {key: read_idx(fname) for key, fname in fnames.items()}

    # Munge data: flatten and scale X
    X = {}
    for k in ('train', 'test'):
        images = data[k + '_images']
        images = images.reshape((images.shape[0], -1)).T
        X[k] = images / images.max(axis=0)

    # Munge data: one-hot encode Y
    Y = {}
    for k in ('train', 'test'):
        labels = data[k + '_labels']
        y = np.zeros((labels.size, labels.max() + 1))
        y[np.arange(labels.size), labels] = 1.0
        Y[k] = y.T

    # Run training
    network = NeuralNetwork([28*28, 100, 10], [relu, sigmoid])
    network.train(X['train'], Y['train'], niter=300, alpha=0.2)

    # show training cost
    import matplotlib.pyplot as plt
    plt.plot(network.costs)
    plt.ylim(ymin=0.0)
    plt.ylabel("cost")
    plt.xlabel("iteration")
    plt.savefig("costs.png")

    # Validate
    for key in ('train', 'test'):
        print(key, 'set')
        Ypred = network(X[key])
        labels = np.argmax(Ypred, axis=0)

        print("Truth:     ", data[key + '_labels'][:30])
        print("Prediction:", labels[:30])

        correct = data[key + '_labels'] == labels
        print("Correct: {:6.2f}%\n".format(100.0 * correct.mean()))
	"""A minimal implementation of a dense neural net with an arbitrary
	number of layers, backpropagation, and a few different activation functions."""

	import numpy as np

	# Activation and cost functions (with gradients)
	def sigmoid(x):
	y = 1.0 / (1.0 + np.exp(-x))
	return y, y * (1.0 - y)


	def relu(x):
	y = np.maximum(0.0, x)
	grad = (x > 0.0).astype(np.float64)
	return y, grad


	def tanh(x):
	y = np.tanh(x)
	return y, 1.0 - y**2


	def cross_entropy_cost(A, Y):
	m = Y.shape[1]
	cost = (1.0 / m) * np.sum(-Y * np.log(A) - (1.0 - Y) * np.log(1.0 - A))
	dA = (1.0 / m) * (-(Y / A) + (1.0 - Y) / (1.0 - A)) # dcost/dA
	return cost, dA


	class Layer(object):
	def __init__(self, n_in: int, n_out: int, activation):
	self.g = activation
	self.W = np.random.normal(scale=0.01, size=(n_out, n_in))
	self.b = np.zeros((n_out, 1))
	self._cache = {}

	def __call__(self, X):
	"""Forward propagation (and cache intermediate results)"""
	Z = self.W @ X + self.b
	A, dAdZ = self.g(Z)
	self._cache['X'] = X
	self._cache['Z'] = Z
	self._cache['dAdZ'] = dAdZ
	return A

	def backward(self, dA, alpha):
	"""Backward propagation and update parameters"""
	dZ = dA * self._cache['dAdZ']
	dW = dZ @ self._cache['X'].T
	db = np.sum(dZ, axis=1, keepdims=True)
	dX = self.W.T @ dZ

	# update
	self.W -= alpha * dW
	self.b -= alpha * db

	return dX


	class NeuralNetwork(object):
	def __init__(self, layer_sizes, activations):
	assert len(activations) == len(layer_sizes) - 1
	self.layers = [Layer(layer_sizes[i], layer_sizes[i+1], activations[i])
	for i in range(len(activations))]
	self.costs = []

	def __call__(self, X):
	for layer in self.layers:
	X = layer(X)
	return X

	def train(self, X, Y, niter=100, alpha=0.05):
	for i in range(niter):
	A = self(X)
	cost, dA = cross_entropy_cost(A, Y)

	# backprop and update parameters via gradient descent
	for layer in reversed(self.layers):
	dA = layer.backward(dA, alpha)

	self.costs.append(cost)
	if not (i % 10):
	print(i, "cost =", cost)

	return self


	# Testing

	import os
	import gzip
	from urllib.request import urlopen


	def download_gzip_file(url, file_name):
	response = gzip.GzipFile(fileobj=urlopen(url))
	with open(file_name, 'wb') as f:
	f.write(response.read())


	def read_idx(fname):
	"""Read an IDX format file into a numpy array. IDX is a very simple
	binary format described here: http://yann.lecun.com/exdb/mnist/"""
	with open(fname, 'rb') as f:
	# read magic bytes: dtype and ndim
	magic = f.read(4)
	assert magic[0:2] == b'\x00\x00'
	dtypes = {8: np.uint8, 9: np.int8, 11: np.int16,
	12: np.int32, 13: np.float32, 14: np.float64}
	dtype = np.dtype(dtypes[magic[2]]).newbyteorder('>')
	ndim = magic[3]

	# read dimensions
	dims = []
	for i in range(ndim):
	b = f.read(4)
	dims.append(int.from_bytes(b, byteorder='big'))

	# read data
	data = np.fromfile(f, dtype=dtype, count=np.product(dims))
	data.shape = dims

	return data


	if __name__ == '__main__':

	# get some data
	root_url = "http://yann.lecun.com/exdb/mnist/"
	urls = {"train_images": root_url + "train-images-idx3-ubyte.gz",
	"train_labels": root_url + "train-labels-idx1-ubyte.gz",
	"test_images": root_url + "t10k-images-idx3-ubyte.gz",
	"test_labels": root_url + "t10k-labels-idx1-ubyte.gz"}
	fnames = {key: url.split('/')[-1][:-3] for key, url in urls.items()}
	for key in urls:
	if not os.path.exists(fnames[key]):
	download_gzip_file(urls[key], fnames[key])

	# Read the data
	data = {key: read_idx(fname) for key, fname in fnames.items()}

	# Munge data: flatten and scale X
	X = {}
	for k in ('train', 'test'):
	images = data[k + '_images']
	images = images.reshape((images.shape[0], -1)).T
	X[k] = images / images.max(axis=0)

	# Munge data: one-hot encode Y
	Y = {}
	for k in ('train', 'test'):
	labels = data[k + '_labels']
	y = np.zeros((labels.size, labels.max() + 1))
	y[np.arange(labels.size), labels] = 1.0
	Y[k] = y.T

	# Run training
	network = NeuralNetwork([28*28, 100, 10], [relu, sigmoid])
	network.train(X['train'], Y['train'], niter=300, alpha=0.2)

	# show training cost
	import matplotlib.pyplot as plt
	plt.plot(network.costs)
	plt.ylim(ymin=0.0)
	plt.ylabel("cost")
	plt.xlabel("iteration")
	plt.savefig("costs.png")

	# Validate
	for key in ('train', 'test'):
	print(key, 'set')
	Ypred = network(X[key])
	labels = np.argmax(Ypred, axis=0)

	print("Truth: ", data[key + '_labels'][:30])
	print("Prediction:", labels[:30])

	correct = data[key + '_labels'] == labels
	print("Correct: {:6.2f}%\n".format(100.0 * correct.mean()))