Skip to content

Instantly share code, notes, and snippets.

@righthandabacus
Created March 21, 2019 23:26
Show Gist options
  • Save righthandabacus/b2f72d0aa5de61bd03e3ae996298f6d0 to your computer and use it in GitHub Desktop.
Save righthandabacus/b2f72d0aa5de61bd03e3ae996298f6d0 to your computer and use it in GitHub Desktop.
Artificial neural network with only numpy
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Provide class of a generic ANN for classification (using binary cross entropy)
"""
import numpy as np
# define activation functions g(Z), and their first derivative, using numpy
# hold all activation function pairs in a global dict
logistic = lambda Z: 1/(1+np.exp(-Z.clip(-708, 709)))
def logisticprime(Z):
s = logistic(Z)
return 2 * s * (1-s)
relu = lambda Z: np.maximum(0, Z)
reluprime = lambda Z: (Z > 0).astype(float)
tanh = lambda Z: np.tanh(Z)
tanhprime = lambda Z: logisticprime(2*Z) * 2
elu = lambda Z: np.where(Z>0, Z, np.exp(Z)-1)
eluprime = lambda Z: np.where(Z>0, 1, np.exp(Z))
ACTFUNC = { # function differentiation
'logistic': (logistic, logisticprime),
'relu': (relu, reluprime),
'tanh': (tanh, tanhprime),
'elu': (elu, eluprime),
}
# Loss functions L(Y, Y_hat) and their partial derivative
def xentropy(Y, Y_hat):
"""Binary cross entropy function
L = - Y log Y_hat - (1-Y) log (1-Y_hat)
Args:
Y, Y_hat (np.array): nxm matrices which n are the number of data instances and m the number
of output perceptons
"""
eps = np.finfo(float).eps
return -(np.dot(Y, np.log(Y_hat.clip(eps)).T) + np.dot(1-Y, np.log((1-Y_hat).clip(eps)).T)) / Y.shape[1]
def xentropyprime(Y, Y_hat):
""" dL/dY_hat """
eps = np.finfo(float).eps
return - np.divide(Y, Y_hat.clip(eps)) + np.divide(1-Y, (1-Y_hat).clip(eps))
LOSSFUNC = {
'xentropy': (xentropy, xentropyprime)
}
class pyann:
'''Artificial Neural Network using numpy
'''
def __init__(self, layersizes, activations, lossfunc='xentropy'):
"""remember config, then initialize array to hold NN parameters without init"""
# hold NN config
self.layersizes = tuple(layersizes)
self.activations = tuple(activations)
self.lossfunc = lossfunc
assert len(self.layersizes)-1 == len(self.activations), \
"NN number of layers and the activation function spec does not match"
assert all(f in ACTFUNC for f in activations), "Unrecognized activation function used"
assert all(isinstance(n, int) and n >= 1 for n in layersizes), \
"Only positive integral number of perceptons is allowed in each layer"
assert lossfunc in LOSSFUNC, \
"Unrecognized loss function used"
# parameters, each is a 2D numpy array
L = len(self.layersizes)
self.Z = [None] * L
self.W = [None] * L
self.b = [None] * L
self.A = [None] * L
self.dZ = [None] * L
self.dW = [None] * L
self.db = [None] * L
self.dA = [None] * L
def init_nn(self, seed=42):
"""init the value of weight matrices and bias vectors with small random numbers. We do not
use real truncated normal but a plain normal with clip at 6 sigmas. We assume we use
activation functions that will have large derivatives around 0 so init value concentrated
around 0 will speed up learning
"""
np.random.seed(seed)
sigma = 0.1
for l, (insize, outsize) in enumerate(zip(self.layersizes, self.layersizes[1:]), 1):
self.W[l] = np.random.randn(outsize, insize).clip(-6, 6) * sigma
self.b[l] = np.random.randn(outsize, 1).clip(-6, 6) * sigma
def forward(self, X):
"""Feed forward the NN using existing W and b, and overwrite the result variables A and Z
Args:
X (numpy.ndarray): Input data to feed forward
"""
self.A[0] = X
for l, funcname in enumerate(self.activations, 1):
# Z = W A + b, with A as output from previous layer
# W is of size rxs and A the size sxn with n the number of data instances, Z the size rxn
# b is rx1 and broadcast to each column of Z
g = ACTFUNC[funcname][0]
self.Z[l] = np.dot(self.W[l], self.A[l-1]) + self.b[l]
# A = g(Z), with A as output of this layer, of size rxn
self.A[l] = g(self.Z[l])
return self.A[-1]
def backward(self, Y, Y_hat):
"""back propagation using NN output Y_hat and the reference output Y, generates dW, dZ, db,
dA
"""
assert Y.shape[0] == self.layersizes[-1], "Output size mismatch NN"
assert Y.shape == Y_hat.shape, "Output size mismatch reference"
# first dA, at the output
self.dA[-1] = LOSSFUNC[self.lossfunc][1](Y, Y_hat)
for l, funcname in reversed(list(enumerate(self.activations, 1))):
m = self.layersizes[l]
g_prime = ACTFUNC[funcname][1]
# compute the differentials at this layer
self.dZ[l] = self.dA[l] * g_prime(self.Z[l])
self.dW[l] = np.dot(self.dZ[l], self.A[l-1].T) / m
self.db[l] = np.sum(self.dZ[l], axis=1, keepdims=True) / m
self.dA[l-1] = np.dot(self.W[l].T, self.dZ[l])
def update(self, alpha):
"""Updates W and b
Args:
alpha (float): Learning rate
"""
for l in range(1, len(self.W)):
self.W[l] -= alpha * self.dW[l]
self.b[l] -= alpha * self.db[l]
def fit(self, X, Y, epochs, alpha, printfreq=0):
"""Train a NN
Args:
X: input data, of size mxn which n is the number of data instances and m the number of
features
Y: reference output, of size nxm which n is the number of data instances and m the size
of each output
alpha: the learning rate
epochs:
"""
self.init_nn()
lossfunc = LOSSFUNC[self.lossfunc][0]
# train for each epoch
for j in range(epochs):
self.forward(X)
Y_hat = self.A[-1]
self.backward(Y, Y_hat)
self.update(alpha)
if printfreq and j % printfreq == 0:
loss = float(lossfunc(Y, Y_hat))
print("Iteration {} - loss value {}".format(j, loss))
# report loss value
return lossfunc(Y, Y_hat)
# vim:set ts=4 sw=4 sts=4 et tw=100:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment