bridgesign/nn_s.py

## nn_s.py
import json
import numpy as np
from scipy.misc import imread, imresize
import os
import random
from math import exp

#Takes input of the path for image data, the final width and
#height to which the image needs to be resized
#Gives the output as numpy array. Images are converted to Grayscale.
def image_data(path="", width=150, height=150, grey=True):
    files = os.listdir(path)
    if grey==True:
        images = [imread(path+i, mode='L') for i in files]
    else:
        images = [imread(path+i) for i in files]
    resized = [imresize(i, (width, height)) for i in images]
    return resized

#Class neuron is a basic structure of any normal neuron
class neuron:
    #Default value gives the initial values of the neuron output
    #kernel gives the kernel associated with the neuron
    def __init__(self, default=0, kernel=lambda x:x):
        self.z_value = default
        self.kernel = kernel
        self.a_value = self.kernel(default)
        self.inputs = []
        self.outputs = []
        self.theta = [random.random()]
        self.grad = [0]
        self.alpha = []

    #Updates before and after activation outputs. Adds bias unit by
    #itself. Assumes theta and inputs be row and column matrix.
    def update_value(self, network):
        inputs = [1]
        for i in self.inputs:
            if i[0] ==0:
                inputs.append(network[i[0]][i[1]])
            else:
                inputs.append(network[i[0]][i[1]].a_value)
        self.z_value = np.matmul(self.theta, inputs)
        self.a_value = self.kernel(self.z_value)

#Converts the given structure data into neuron network.
#Theta is generated with random values between positive and
#negative epsilon. Outputs a nested list for network structure.
def init_structure(structure, default=0, epsilon=1, kernel=lambda x:x):
    network = []
    network.append(structure[0])
    for layer_i in structure[1:]:
        layer = []
        for neuron_i in layer_i:
            neur = neuron(default=default,kernel=kernel)
            neur.inputs = neuron_i
            for k in neuron_i:
                neur.theta.append((random.random())*2*epsilon-epsilon)
                neur.grad.append(0)
            layer.append(neur)
        network.append(layer)
    for layer_i in range(1,len(network)):
        for neuron_i in range(len(network[layer_i])):
            for con in network[layer_i][neuron_i].inputs:
                if (con[0] != 0) and ([layer_i, neuron_i] not in network[con[0]][con[1]].outputs):
                    network[con[0]][con[1]].outputs.append((layer_i, neuron_i))
    return network

#Takes in the network and input layer and updates all neurons.
def forward_prop(network, input_layer):
    network[0] = input_layer
    for layer in network[1:]:
        for neuron in layer:
            neuron.update_value(network)
    return network

#Converts matrix to vector
def mat2vec(matrix):
    vec=[]
    for i in matrix:
        for j in i:
            vec.append(j)
    return vec

#Convert layer neuron data to vector form
def layer_conv_vec(layer, column):
    for neuron in layer:
        for address_i in range(len(neuron)):
            neuron[address_i][1] = neuron[address_i][1][0]*column+neuron[address_i][1][1]
    return layer

#Converts a number to the hot one form
def hot1(i, l):
    s = (i-1)*'0' +'1'+(l-i)*'0'
    s = [int(j) for j in s]
    return s

#Converts a number to cold one form
def cold1(i, l):
    s = (i-1)*'1' +'0'+(l-i)*'1'
    s = [int(j) for j in s]
    return s

#The function takes in network and Dkernel which gives the derivative
#function for the kernel. It only gives the gradient value for one
#test case. For each test case, this needs to be rerun.
#This can be used with feedback type netwroks also.
def back_prop(network, factor, Dkernel=lambda x:1):
    #Excludes the first and last layer of network
    layer_range = range(len(network)-2,0,-1)
    #Calculation for the last layer
    for neuron_i in range(len(network[-1])):
        neuron = network[-1][neuron_i]
        input_list = [1]
        for inp in neuron.inputs:
            if inp[0] == 0:
                input_list.append(network[0][inp[1]])
            else:
                input_list.append(network[inp[0]][inp[1]].a_value)
        neuron.grad = factor[neuron_i]*Dkernel(neuron.z_value)*np.array(input_list)
    #Calculation for other layers
    for layer_i in layer_range:
        neuron_range = range(len(network[layer_i]))
        for neuron_i in neuron_range:
            neuron = network[layer_i][neuron_i]
            #For finding changes in next layer due to its own change
            gradient_sum = 0
            #This part takes into consideration where the neuron is
            #affecting the output.
            for output in neuron.outputs:
                in_list = network[output[0]][output[1]].inputs
                index = in_list.index([layer_i,neuron_i])
                gradient_sum+= network[output[0]][output[1]].grad[index]*network[output[0]][output[1]].theta[index]
            gradient_sum=gradient_sum/neuron.a_value
            input_list = [1]
            for inp in neuron.inputs:
                if inp[0] == 0:
                    input_list.append(network[0][inp[1]])
                else:
                    input_list.append(network[inp[0]][inp[1]].a_value)
            neuron.grad = gradient_sum*Dkernel(neuron.z_value)*np.array(input_list)
    return network

#Adam Algorithm for obtaining learning rates for inidividual
#weight parameters.
#Need to create a timestep variable and update it manually.
def adam_algo(alpha, B1, B2, e, t, network):
    """
    Pseudo Algorithm as published in arXiv paper
    Require: α: Stepsize
    Require: β1,β2 ∈ [0,1): Exponential decay rates for the moment estimates
    Require: f(θ): Stochastic objective function with parameters θ
    Require: θ0: Initial parameter vector
        m0 ← 0 (Initialize 1st moment vector)
        v0 ← 0 (Initialize 2nd moment vector)
        t ← 0 (Initialize timestep)
        while θt not converged do
            t ← t + 1
            gt ←∇θft(θt−1) (Get gradients w.r.t. stochastic objective at timestep t)
            mt ← β1 ·mt−1 + (1−β1)·gt (Update biased ﬁrst moment estimate)
            vt ← β2 ·vt−1 + (1−β2)·gt^2 (Update biased second raw moment estimate)
            bmt ← mt/(1−β1^t) (Compute bias-corrected ﬁrst moment estimate)
            bvt ← vt/(1−β2^t) (Compute bias-corrected second raw moment estimate)
            θt ← θt−1 −α·b mt/(√bvt + e) (Update parameters)
        endwhile
        return θt (Resulting parameters)

    We will have B1 and B2 in place of β1,β2. We get gradients from back_prop.
    """
    t+=1
    for layer in network[1:]:
        for neuron in layer:
            if t==1:
                neuron.alpha.append((1-B1)*np.array(neuron.grad))
                neuron.alpha.append((1-B2)*((np.array(neuron.grad))**2))
            else:
                neuron.alpha[0] = B1*neuron.alpha[0] + (1-B1)*np.array(neuron.grad)
                neuron.alpha[1] = B2*neuron.alpha[1]
            bmt = neuron.alpha[0]/(1-(B1)**t)
            bvt = neuron.alpha[1]/(1-(B2)**t)
            neuron.theta = np.array(neuron.theta)
            neuron.theta = neuron.theta - alpha*bmt/((bvt)**(0.5) + e)
            neuron.theta = list(neuron.theta)
    return (network, t)

#Fixed learning rate algorithm
def flr_algo(alpha, network):
    for layer in network[1:]:
        for neuron in layer:
            neuron.theta = np.array(neuron.theta)
            neuron.theta = neuron.theta - alpha*np.array(neuron.grad)
            neuron.theta = list(neuron.theta)
    return network

#def train(trainData, network, epoch, batch, costFunction,
#          kernel=lambda x:x, Dkernel=lambda x:1, method, *args):
#    batches = np.array_split(trainData, batch)
#    for epoc in range(epoch):
#        for bat in batches:

## testing.py
import numpy as np
from math import exp
from nn_s import *

structure = [[],
             [[[0,0],[0,1]],[[0,0],[0,1]]],
             [[[1,0],[1,1]],[[1,0],[1,1]]],
             [[[2,0],[2,1]],[[2,0],[2,1]]]
             ]

A0 = np.array([1,0])

kernel = lambda x:1/(1+exp(-x))
Dkernel = lambda x: kernel(x)*(1-kernel(x))

network = init_structure(structure, kernel=kernel)

network = forward_prop(network, [0.34,0.56])
T=np.array([network[3][0].a_value,network[3][1].a_value])
T=T-A0
print(Dkernel(network[3][1].z_value)*T[1])
network = back_prop(network, Dkernel=Dkernel, factor=T)
J=np.matmul(T,T)/2

h = (0.1)**8

thets = []

for layer_i in range(1,4):
    for neuron_i in range(2):
        for grad_i in range(3):
            network[layer_i][neuron_i].theta[grad_i]+=h
            network = forward_prop(network, [0.34,0.56])
            T=np.array([network[3][0].a_value,network[3][1].a_value])
            T=T-A0
            Jt = np.matmul(T,T)/2
            thets.append((Jt-J)/h)
            network[layer_i][neuron_i].theta[grad_i]-=h
            network = forward_prop(network, [0.34,0.56])

print('Actual \t \t \t Expected')
for layer_i in range(1,4):
    for neuron_i in range(2):
        for grad_i in range(3):
            print(str(network[layer_i][neuron_i].grad[grad_i])+' \t '+str(thets[(layer_i-1)*3+neuron_i*2+grad_i])+'\t'+str(network[layer_i][neuron_i].grad[grad_i]/thets[(layer_i-1)*3+neuron_i*2+grad_i]))
	import json
	import numpy as np
	from scipy.misc import imread, imresize
	import os
	import random
	from math import exp

	#Takes input of the path for image data, the final width and
	#height to which the image needs to be resized
	#Gives the output as numpy array. Images are converted to Grayscale.
	def image_data(path="", width=150, height=150, grey=True):
	files = os.listdir(path)
	if grey==True:
	images = [imread(path+i, mode='L') for i in files]
	else:
	images = [imread(path+i) for i in files]
	resized = [imresize(i, (width, height)) for i in images]
	return resized

	#Class neuron is a basic structure of any normal neuron
	class neuron:
	#Default value gives the initial values of the neuron output
	#kernel gives the kernel associated with the neuron
	def __init__(self, default=0, kernel=lambda x:x):
	self.z_value = default
	self.kernel = kernel
	self.a_value = self.kernel(default)
	self.inputs = []
	self.outputs = []
	self.theta = [random.random()]
	self.grad = [0]
	self.alpha = []

	#Updates before and after activation outputs. Adds bias unit by
	#itself. Assumes theta and inputs be row and column matrix.
	def update_value(self, network):
	inputs = [1]
	for i in self.inputs:
	if i[0] ==0:
	inputs.append(network[i[0]][i[1]])
	else:
	inputs.append(network[i[0]][i[1]].a_value)
	self.z_value = np.matmul(self.theta, inputs)
	self.a_value = self.kernel(self.z_value)

	#Converts the given structure data into neuron network.
	#Theta is generated with random values between positive and
	#negative epsilon. Outputs a nested list for network structure.
	def init_structure(structure, default=0, epsilon=1, kernel=lambda x:x):
	network = []
	network.append(structure[0])
	for layer_i in structure[1:]:
	layer = []
	for neuron_i in layer_i:
	neur = neuron(default=default,kernel=kernel)
	neur.inputs = neuron_i
	for k in neuron_i:
	neur.theta.append((random.random())2epsilon-epsilon)
	neur.grad.append(0)
	layer.append(neur)
	network.append(layer)
	for layer_i in range(1,len(network)):
	for neuron_i in range(len(network[layer_i])):
	for con in network[layer_i][neuron_i].inputs:
	if (con[0] != 0) and ([layer_i, neuron_i] not in network[con[0]][con[1]].outputs):
	network[con[0]][con[1]].outputs.append((layer_i, neuron_i))
	return network

	#Takes in the network and input layer and updates all neurons.
	def forward_prop(network, input_layer):
	network[0] = input_layer
	for layer in network[1:]:
	for neuron in layer:
	neuron.update_value(network)
	return network

	#Converts matrix to vector
	def mat2vec(matrix):
	vec=[]
	for i in matrix:
	for j in i:
	vec.append(j)
	return vec

	#Convert layer neuron data to vector form
	def layer_conv_vec(layer, column):
	for neuron in layer:
	for address_i in range(len(neuron)):
	neuron[address_i][1] = neuron[address_i][1][0]*column+neuron[address_i][1][1]
	return layer

	#Converts a number to the hot one form
	def hot1(i, l):
	s = (i-1)'0' +'1'+(l-i)'0'
	s = [int(j) for j in s]
	return s

	#Converts a number to cold one form
	def cold1(i, l):
	s = (i-1)'1' +'0'+(l-i)'1'
	s = [int(j) for j in s]
	return s

	#The function takes in network and Dkernel which gives the derivative
	#function for the kernel. It only gives the gradient value for one
	#test case. For each test case, this needs to be rerun.
	#This can be used with feedback type netwroks also.
	def back_prop(network, factor, Dkernel=lambda x:1):
	#Excludes the first and last layer of network
	layer_range = range(len(network)-2,0,-1)
	#Calculation for the last layer
	for neuron_i in range(len(network[-1])):
	neuron = network[-1][neuron_i]
	input_list = [1]
	for inp in neuron.inputs:
	if inp[0] == 0:
	input_list.append(network[0][inp[1]])
	else:
	input_list.append(network[inp[0]][inp[1]].a_value)
	neuron.grad = factor[neuron_i]Dkernel(neuron.z_value)np.array(input_list)
	#Calculation for other layers
	for layer_i in layer_range:
	neuron_range = range(len(network[layer_i]))
	for neuron_i in neuron_range:
	neuron = network[layer_i][neuron_i]
	#For finding changes in next layer due to its own change
	gradient_sum = 0
	#This part takes into consideration where the neuron is
	#affecting the output.
	for output in neuron.outputs:
	in_list = network[output[0]][output[1]].inputs
	index = in_list.index([layer_i,neuron_i])
	gradient_sum+= network[output[0]][output[1]].grad[index]*network[output[0]][output[1]].theta[index]
	gradient_sum=gradient_sum/neuron.a_value
	input_list = [1]
	for inp in neuron.inputs:
	if inp[0] == 0:
	input_list.append(network[0][inp[1]])
	else:
	input_list.append(network[inp[0]][inp[1]].a_value)
	neuron.grad = gradient_sumDkernel(neuron.z_value)np.array(input_list)
	return network

	#Adam Algorithm for obtaining learning rates for inidividual
	#weight parameters.
	#Need to create a timestep variable and update it manually.
	def adam_algo(alpha, B1, B2, e, t, network):
	"""
	Pseudo Algorithm as published in arXiv paper
	Require: α: Stepsize
	Require: β1,β2 ∈ [0,1): Exponential decay rates for the moment estimates
	Require: f(θ): Stochastic objective function with parameters θ
	Require: θ0: Initial parameter vector
	m0 ← 0 (Initialize 1st moment vector)
	v0 ← 0 (Initialize 2nd moment vector)
	t ← 0 (Initialize timestep)
	while θt not converged do
	t ← t + 1
	gt ←∇θft(θt−1) (Get gradients w.r.t. stochastic objective at timestep t)
	mt ← β1 ·mt−1 + (1−β1)·gt (Update biased ﬁrst moment estimate)
	vt ← β2 ·vt−1 + (1−β2)·gt^2 (Update biased second raw moment estimate)
	bmt ← mt/(1−β1^t) (Compute bias-corrected ﬁrst moment estimate)
	bvt ← vt/(1−β2^t) (Compute bias-corrected second raw moment estimate)
	θt ← θt−1 −α·b mt/(√bvt + e) (Update parameters)
	endwhile
	return θt (Resulting parameters)

	We will have B1 and B2 in place of β1,β2. We get gradients from back_prop.
	"""
	t+=1
	for layer in network[1:]:
	for neuron in layer:
	if t==1:
	neuron.alpha.append((1-B1)*np.array(neuron.grad))
	neuron.alpha.append((1-B2)((np.array(neuron.grad))*2))
	else:
	neuron.alpha[0] = B1neuron.alpha[0] + (1-B1)np.array(neuron.grad)
	neuron.alpha[1] = B2*neuron.alpha[1]
	bmt = neuron.alpha[0]/(1-(B1)**t)
	bvt = neuron.alpha[1]/(1-(B2)**t)
	neuron.theta = np.array(neuron.theta)
	neuron.theta = neuron.theta - alphabmt/((bvt)*(0.5) + e)
	neuron.theta = list(neuron.theta)
	return (network, t)

	#Fixed learning rate algorithm
	def flr_algo(alpha, network):
	for layer in network[1:]:
	for neuron in layer:
	neuron.theta = np.array(neuron.theta)
	neuron.theta = neuron.theta - alpha*np.array(neuron.grad)
	neuron.theta = list(neuron.theta)
	return network

	#def train(trainData, network, epoch, batch, costFunction,
	# kernel=lambda x:x, Dkernel=lambda x:1, method, *args):
	# batches = np.array_split(trainData, batch)
	# for epoc in range(epoch):
	# for bat in batches:
	import numpy as np
	from math import exp
	from nn_s import *

	structure = [[],
	[[[0,0],[0,1]],[[0,0],[0,1]]],
	[[[1,0],[1,1]],[[1,0],[1,1]]],
	[[[2,0],[2,1]],[[2,0],[2,1]]]
	]

	A0 = np.array([1,0])

	kernel = lambda x:1/(1+exp(-x))
	Dkernel = lambda x: kernel(x)*(1-kernel(x))

	network = init_structure(structure, kernel=kernel)

	network = forward_prop(network, [0.34,0.56])
	T=np.array([network[3][0].a_value,network[3][1].a_value])
	T=T-A0
	print(Dkernel(network[3][1].z_value)*T[1])
	network = back_prop(network, Dkernel=Dkernel, factor=T)
	J=np.matmul(T,T)/2

	h = (0.1)**8

	thets = []

	for layer_i in range(1,4):
	for neuron_i in range(2):
	for grad_i in range(3):
	network[layer_i][neuron_i].theta[grad_i]+=h
	network = forward_prop(network, [0.34,0.56])
	T=np.array([network[3][0].a_value,network[3][1].a_value])
	T=T-A0
	Jt = np.matmul(T,T)/2
	thets.append((Jt-J)/h)
	network[layer_i][neuron_i].theta[grad_i]-=h
	network = forward_prop(network, [0.34,0.56])

	print('Actual \t \t \t Expected')
	for layer_i in range(1,4):
	for neuron_i in range(2):
	for grad_i in range(3):
	print(str(network[layer_i][neuron_i].grad[grad_i])+' \t '+str(thets[(layer_i-1)3+neuron_i2+grad_i])+'\t'+str(network[layer_i][neuron_i].grad[grad_i]/thets[(layer_i-1)3+neuron_i2+grad_i]))