righthandabacus/pyann.py

## pyann.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Provide class of a generic ANN for classification (using binary cross entropy)
"""

import numpy as np

# define activation functions g(Z), and their first derivative, using numpy
# hold all activation function pairs in a global dict
logistic = lambda Z: 1/(1+np.exp(-Z.clip(-708, 709)))
def logisticprime(Z):
    s = logistic(Z)
    return 2 * s * (1-s)

relu = lambda Z: np.maximum(0, Z)
reluprime = lambda Z: (Z > 0).astype(float)

tanh = lambda Z: np.tanh(Z)
tanhprime = lambda Z: logisticprime(2*Z) * 2

elu = lambda Z: np.where(Z>0, Z, np.exp(Z)-1)
eluprime = lambda Z: np.where(Z>0, 1, np.exp(Z))

ACTFUNC = { #    function  differentiation
    'logistic': (logistic, logisticprime),
    'relu':     (relu,     reluprime),
    'tanh':     (tanh,     tanhprime),
    'elu':      (elu,      eluprime),
}

# Loss functions L(Y, Y_hat) and their partial derivative
def xentropy(Y, Y_hat):
    """Binary cross entropy function
        L = - Y log Y_hat - (1-Y) log (1-Y_hat)

    Args:
        Y, Y_hat (np.array): nxm matrices which n are the number of data instances and m the number
                of output perceptons
    """
    eps = np.finfo(float).eps
    return -(np.dot(Y, np.log(Y_hat.clip(eps)).T) + np.dot(1-Y, np.log((1-Y_hat).clip(eps)).T)) / Y.shape[1]
def xentropyprime(Y, Y_hat):
    """ dL/dY_hat """
    eps = np.finfo(float).eps
    return - np.divide(Y, Y_hat.clip(eps)) + np.divide(1-Y, (1-Y_hat).clip(eps))

LOSSFUNC = {
        'xentropy': (xentropy, xentropyprime)
}

class pyann:
    '''Artificial Neural Network using numpy
    '''
    def __init__(self, layersizes, activations, lossfunc='xentropy'):
        """remember config, then initialize array to hold NN parameters without init"""
        # hold NN config
        self.layersizes = tuple(layersizes)
        self.activations = tuple(activations)
        self.lossfunc = lossfunc
        assert len(self.layersizes)-1 == len(self.activations), \
            "NN number of layers and the activation function spec does not match"
        assert all(f in ACTFUNC for f in activations), "Unrecognized activation function used"
        assert all(isinstance(n, int) and n >= 1 for n in layersizes), \
            "Only positive integral number of perceptons is allowed in each layer"
        assert lossfunc in LOSSFUNC, \
            "Unrecognized loss function used"
        # parameters, each is a 2D numpy array
        L = len(self.layersizes)
        self.Z = [None] * L
        self.W = [None] * L
        self.b = [None] * L
        self.A = [None] * L
        self.dZ = [None] * L
        self.dW = [None] * L
        self.db = [None] * L
        self.dA = [None] * L

    def init_nn(self, seed=42):
        """init the value of weight matrices and bias vectors with small random numbers. We do not
        use real truncated normal but a plain normal with clip at 6 sigmas. We assume we use
        activation functions that will have large derivatives around 0 so init value concentrated
        around 0 will speed up learning
        """
        np.random.seed(seed)
        sigma = 0.1
        for l, (insize, outsize) in enumerate(zip(self.layersizes, self.layersizes[1:]), 1):
            self.W[l] = np.random.randn(outsize, insize).clip(-6, 6) * sigma
            self.b[l] = np.random.randn(outsize, 1).clip(-6, 6) * sigma

    def forward(self, X):
        """Feed forward the NN using existing W and b, and overwrite the result variables A and Z

        Args:
            X (numpy.ndarray): Input data to feed forward
        """
        self.A[0] = X
        for l, funcname in enumerate(self.activations, 1):
            # Z = W A + b, with A as output from previous layer
            # W is of size rxs and A the size sxn with n the number of data instances, Z the size rxn
            # b is rx1 and broadcast to each column of Z
            g = ACTFUNC[funcname][0]
            self.Z[l] = np.dot(self.W[l], self.A[l-1]) + self.b[l]
            # A = g(Z), with A as output of this layer, of size rxn
            self.A[l] = g(self.Z[l])
        return self.A[-1]

    def backward(self, Y, Y_hat):
        """back propagation using NN output Y_hat and the reference output Y, generates dW, dZ, db,
        dA
        """
        assert Y.shape[0] == self.layersizes[-1], "Output size mismatch NN"
        assert Y.shape == Y_hat.shape, "Output size mismatch reference"
        # first dA, at the output
        self.dA[-1] = LOSSFUNC[self.lossfunc][1](Y, Y_hat)
        for l, funcname in reversed(list(enumerate(self.activations, 1))):
            m = self.layersizes[l]
            g_prime = ACTFUNC[funcname][1]
            # compute the differentials at this layer
            self.dZ[l] = self.dA[l] * g_prime(self.Z[l])
            self.dW[l] = np.dot(self.dZ[l], self.A[l-1].T) / m
            self.db[l] = np.sum(self.dZ[l], axis=1, keepdims=True) / m
            self.dA[l-1] = np.dot(self.W[l].T, self.dZ[l])

    def update(self, alpha):
        """Updates W and b

        Args:
            alpha (float): Learning rate
        """
        for l in range(1, len(self.W)):
            self.W[l] -= alpha * self.dW[l]
            self.b[l] -= alpha * self.db[l]

    def fit(self, X, Y, epochs, alpha, printfreq=0):
        """Train a NN

        Args:
            X: input data, of size mxn which n is the number of data instances and m the number of
                features
            Y: reference output, of size nxm which n is the number of data instances and m the size
                of each output
            alpha: the learning rate
            epochs:
        """
        self.init_nn()
        lossfunc = LOSSFUNC[self.lossfunc][0]
        # train for each epoch
        for j in range(epochs):
            self.forward(X)
            Y_hat = self.A[-1]
            self.backward(Y, Y_hat)
            self.update(alpha)
            if printfreq and j % printfreq == 0:
                loss = float(lossfunc(Y, Y_hat))
                print("Iteration {} - loss value {}".format(j, loss))
        # report loss value
        return lossfunc(Y, Y_hat)

# vim:set ts=4 sw=4 sts=4 et tw=100:
	#!/usr/bin/env python
	# -- coding: utf-8 --

	"""Provide class of a generic ANN for classification (using binary cross entropy)
	"""

	import numpy as np

	# define activation functions g(Z), and their first derivative, using numpy
	# hold all activation function pairs in a global dict
	logistic = lambda Z: 1/(1+np.exp(-Z.clip(-708, 709)))
	def logisticprime(Z):
	s = logistic(Z)
	return 2 * s * (1-s)

	relu = lambda Z: np.maximum(0, Z)
	reluprime = lambda Z: (Z > 0).astype(float)

	tanh = lambda Z: np.tanh(Z)
	tanhprime = lambda Z: logisticprime(2Z) 2

	elu = lambda Z: np.where(Z>0, Z, np.exp(Z)-1)
	eluprime = lambda Z: np.where(Z>0, 1, np.exp(Z))

	ACTFUNC = { # function differentiation
	'logistic': (logistic, logisticprime),
	'relu': (relu, reluprime),
	'tanh': (tanh, tanhprime),
	'elu': (elu, eluprime),
	}

	# Loss functions L(Y, Y_hat) and their partial derivative
	def xentropy(Y, Y_hat):
	"""Binary cross entropy function
	L = - Y log Y_hat - (1-Y) log (1-Y_hat)

	Args:
	Y, Y_hat (np.array): nxm matrices which n are the number of data instances and m the number
	of output perceptons
	"""
	eps = np.finfo(float).eps
	return -(np.dot(Y, np.log(Y_hat.clip(eps)).T) + np.dot(1-Y, np.log((1-Y_hat).clip(eps)).T)) / Y.shape[1]
	def xentropyprime(Y, Y_hat):
	""" dL/dY_hat """
	eps = np.finfo(float).eps
	return - np.divide(Y, Y_hat.clip(eps)) + np.divide(1-Y, (1-Y_hat).clip(eps))

	LOSSFUNC = {
	'xentropy': (xentropy, xentropyprime)
	}

	class pyann:
	'''Artificial Neural Network using numpy
	'''
	def __init__(self, layersizes, activations, lossfunc='xentropy'):
	"""remember config, then initialize array to hold NN parameters without init"""
	# hold NN config
	self.layersizes = tuple(layersizes)
	self.activations = tuple(activations)
	self.lossfunc = lossfunc
	assert len(self.layersizes)-1 == len(self.activations), \
	"NN number of layers and the activation function spec does not match"
	assert all(f in ACTFUNC for f in activations), "Unrecognized activation function used"
	assert all(isinstance(n, int) and n >= 1 for n in layersizes), \
	"Only positive integral number of perceptons is allowed in each layer"
	assert lossfunc in LOSSFUNC, \
	"Unrecognized loss function used"
	# parameters, each is a 2D numpy array
	L = len(self.layersizes)
	self.Z = [None] * L
	self.W = [None] * L
	self.b = [None] * L
	self.A = [None] * L
	self.dZ = [None] * L
	self.dW = [None] * L
	self.db = [None] * L
	self.dA = [None] * L

	def init_nn(self, seed=42):
	"""init the value of weight matrices and bias vectors with small random numbers. We do not
	use real truncated normal but a plain normal with clip at 6 sigmas. We assume we use
	activation functions that will have large derivatives around 0 so init value concentrated
	around 0 will speed up learning
	"""
	np.random.seed(seed)
	sigma = 0.1
	for l, (insize, outsize) in enumerate(zip(self.layersizes, self.layersizes[1:]), 1):
	self.W[l] = np.random.randn(outsize, insize).clip(-6, 6) * sigma
	self.b[l] = np.random.randn(outsize, 1).clip(-6, 6) * sigma

	def forward(self, X):
	"""Feed forward the NN using existing W and b, and overwrite the result variables A and Z

	Args:
	X (numpy.ndarray): Input data to feed forward
	"""
	self.A[0] = X
	for l, funcname in enumerate(self.activations, 1):
	# Z = W A + b, with A as output from previous layer
	# W is of size rxs and A the size sxn with n the number of data instances, Z the size rxn
	# b is rx1 and broadcast to each column of Z
	g = ACTFUNC[funcname][0]
	self.Z[l] = np.dot(self.W[l], self.A[l-1]) + self.b[l]
	# A = g(Z), with A as output of this layer, of size rxn
	self.A[l] = g(self.Z[l])
	return self.A[-1]

	def backward(self, Y, Y_hat):
	"""back propagation using NN output Y_hat and the reference output Y, generates dW, dZ, db,
	dA
	"""
	assert Y.shape[0] == self.layersizes[-1], "Output size mismatch NN"
	assert Y.shape == Y_hat.shape, "Output size mismatch reference"
	# first dA, at the output
	self.dA[-1] = LOSSFUNC[self.lossfunc][1](Y, Y_hat)
	for l, funcname in reversed(list(enumerate(self.activations, 1))):
	m = self.layersizes[l]
	g_prime = ACTFUNC[funcname][1]
	# compute the differentials at this layer
	self.dZ[l] = self.dA[l] * g_prime(self.Z[l])
	self.dW[l] = np.dot(self.dZ[l], self.A[l-1].T) / m
	self.db[l] = np.sum(self.dZ[l], axis=1, keepdims=True) / m
	self.dA[l-1] = np.dot(self.W[l].T, self.dZ[l])

	def update(self, alpha):
	"""Updates W and b

	Args:
	alpha (float): Learning rate
	"""
	for l in range(1, len(self.W)):
	self.W[l] -= alpha * self.dW[l]
	self.b[l] -= alpha * self.db[l]

	def fit(self, X, Y, epochs, alpha, printfreq=0):
	"""Train a NN

	Args:
	X: input data, of size mxn which n is the number of data instances and m the number of
	features
	Y: reference output, of size nxm which n is the number of data instances and m the size
	of each output
	alpha: the learning rate
	epochs:
	"""
	self.init_nn()
	lossfunc = LOSSFUNC[self.lossfunc][0]
	# train for each epoch
	for j in range(epochs):
	self.forward(X)
	Y_hat = self.A[-1]
	self.backward(Y, Y_hat)
	self.update(alpha)
	if printfreq and j % printfreq == 0:
	loss = float(lossfunc(Y, Y_hat))
	print("Iteration {} - loss value {}".format(j, loss))
	# report loss value
	return lossfunc(Y, Y_hat)

	# vim:set ts=4 sw=4 sts=4 et tw=100: