Last active
May 23, 2018 17:58
-
-
Save bridgesign/71b50b1141bc73b963b8df18fd7b59c5 to your computer and use it in GitHub Desktop.
Neural Network - Under Construction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import numpy as np | |
from scipy.misc import imread, imresize | |
import os | |
import random | |
from math import exp | |
#Takes input of the path for image data, the final width and | |
#height to which the image needs to be resized | |
#Gives the output as numpy array. Images are converted to Grayscale. | |
def image_data(path="", width=150, height=150, grey=True): | |
files = os.listdir(path) | |
if grey==True: | |
images = [imread(path+i, mode='L') for i in files] | |
else: | |
images = [imread(path+i) for i in files] | |
resized = [imresize(i, (width, height)) for i in images] | |
return resized | |
#Class neuron is a basic structure of any normal neuron | |
class neuron: | |
#Default value gives the initial values of the neuron output | |
#kernel gives the kernel associated with the neuron | |
def __init__(self, default=0, kernel=lambda x:x): | |
self.z_value = default | |
self.kernel = kernel | |
self.a_value = self.kernel(default) | |
self.inputs = [] | |
self.outputs = [] | |
self.theta = [random.random()] | |
self.grad = [0] | |
self.alpha = [] | |
#Updates before and after activation outputs. Adds bias unit by | |
#itself. Assumes theta and inputs be row and column matrix. | |
def update_value(self, network): | |
inputs = [1] | |
for i in self.inputs: | |
if i[0] ==0: | |
inputs.append(network[i[0]][i[1]]) | |
else: | |
inputs.append(network[i[0]][i[1]].a_value) | |
self.z_value = np.matmul(self.theta, inputs) | |
self.a_value = self.kernel(self.z_value) | |
#Converts the given structure data into neuron network. | |
#Theta is generated with random values between positive and | |
#negative epsilon. Outputs a nested list for network structure. | |
def init_structure(structure, default=0, epsilon=1, kernel=lambda x:x): | |
network = [] | |
network.append(structure[0]) | |
for layer_i in structure[1:]: | |
layer = [] | |
for neuron_i in layer_i: | |
neur = neuron(default=default,kernel=kernel) | |
neur.inputs = neuron_i | |
for k in neuron_i: | |
neur.theta.append((random.random())*2*epsilon-epsilon) | |
neur.grad.append(0) | |
layer.append(neur) | |
network.append(layer) | |
for layer_i in range(1,len(network)): | |
for neuron_i in range(len(network[layer_i])): | |
for con in network[layer_i][neuron_i].inputs: | |
if (con[0] != 0) and ([layer_i, neuron_i] not in network[con[0]][con[1]].outputs): | |
network[con[0]][con[1]].outputs.append((layer_i, neuron_i)) | |
return network | |
#Takes in the network and input layer and updates all neurons. | |
def forward_prop(network, input_layer): | |
network[0] = input_layer | |
for layer in network[1:]: | |
for neuron in layer: | |
neuron.update_value(network) | |
return network | |
#Converts matrix to vector | |
def mat2vec(matrix): | |
vec=[] | |
for i in matrix: | |
for j in i: | |
vec.append(j) | |
return vec | |
#Convert layer neuron data to vector form | |
def layer_conv_vec(layer, column): | |
for neuron in layer: | |
for address_i in range(len(neuron)): | |
neuron[address_i][1] = neuron[address_i][1][0]*column+neuron[address_i][1][1] | |
return layer | |
#Converts a number to the hot one form | |
def hot1(i, l): | |
s = (i-1)*'0' +'1'+(l-i)*'0' | |
s = [int(j) for j in s] | |
return s | |
#Converts a number to cold one form | |
def cold1(i, l): | |
s = (i-1)*'1' +'0'+(l-i)*'1' | |
s = [int(j) for j in s] | |
return s | |
#The function takes in network and Dkernel which gives the derivative | |
#function for the kernel. It only gives the gradient value for one | |
#test case. For each test case, this needs to be rerun. | |
#This can be used with feedback type netwroks also. | |
def back_prop(network, factor, Dkernel=lambda x:1): | |
#Excludes the first and last layer of network | |
layer_range = range(len(network)-2,0,-1) | |
#Calculation for the last layer | |
for neuron_i in range(len(network[-1])): | |
neuron = network[-1][neuron_i] | |
input_list = [1] | |
for inp in neuron.inputs: | |
if inp[0] == 0: | |
input_list.append(network[0][inp[1]]) | |
else: | |
input_list.append(network[inp[0]][inp[1]].a_value) | |
neuron.grad = factor[neuron_i]*Dkernel(neuron.z_value)*np.array(input_list) | |
#Calculation for other layers | |
for layer_i in layer_range: | |
neuron_range = range(len(network[layer_i])) | |
for neuron_i in neuron_range: | |
neuron = network[layer_i][neuron_i] | |
#For finding changes in next layer due to its own change | |
gradient_sum = 0 | |
#This part takes into consideration where the neuron is | |
#affecting the output. | |
for output in neuron.outputs: | |
in_list = network[output[0]][output[1]].inputs | |
index = in_list.index([layer_i,neuron_i]) | |
gradient_sum+= network[output[0]][output[1]].grad[index]*network[output[0]][output[1]].theta[index] | |
gradient_sum=gradient_sum/neuron.a_value | |
input_list = [1] | |
for inp in neuron.inputs: | |
if inp[0] == 0: | |
input_list.append(network[0][inp[1]]) | |
else: | |
input_list.append(network[inp[0]][inp[1]].a_value) | |
neuron.grad = gradient_sum*Dkernel(neuron.z_value)*np.array(input_list) | |
return network | |
#Adam Algorithm for obtaining learning rates for inidividual | |
#weight parameters. | |
#Need to create a timestep variable and update it manually. | |
def adam_algo(alpha, B1, B2, e, t, network): | |
""" | |
Pseudo Algorithm as published in arXiv paper | |
Require: α: Stepsize | |
Require: β1,β2 ∈ [0,1): Exponential decay rates for the moment estimates | |
Require: f(θ): Stochastic objective function with parameters θ | |
Require: θ0: Initial parameter vector | |
m0 ← 0 (Initialize 1st moment vector) | |
v0 ← 0 (Initialize 2nd moment vector) | |
t ← 0 (Initialize timestep) | |
while θt not converged do | |
t ← t + 1 | |
gt ←∇θft(θt−1) (Get gradients w.r.t. stochastic objective at timestep t) | |
mt ← β1 ·mt−1 + (1−β1)·gt (Update biased first moment estimate) | |
vt ← β2 ·vt−1 + (1−β2)·gt^2 (Update biased second raw moment estimate) | |
bmt ← mt/(1−β1^t) (Compute bias-corrected first moment estimate) | |
bvt ← vt/(1−β2^t) (Compute bias-corrected second raw moment estimate) | |
θt ← θt−1 −α·b mt/(√bvt + e) (Update parameters) | |
endwhile | |
return θt (Resulting parameters) | |
We will have B1 and B2 in place of β1,β2. We get gradients from back_prop. | |
""" | |
t+=1 | |
for layer in network[1:]: | |
for neuron in layer: | |
if t==1: | |
neuron.alpha.append((1-B1)*np.array(neuron.grad)) | |
neuron.alpha.append((1-B2)*((np.array(neuron.grad))**2)) | |
else: | |
neuron.alpha[0] = B1*neuron.alpha[0] + (1-B1)*np.array(neuron.grad) | |
neuron.alpha[1] = B2*neuron.alpha[1] | |
bmt = neuron.alpha[0]/(1-(B1)**t) | |
bvt = neuron.alpha[1]/(1-(B2)**t) | |
neuron.theta = np.array(neuron.theta) | |
neuron.theta = neuron.theta - alpha*bmt/((bvt)**(0.5) + e) | |
neuron.theta = list(neuron.theta) | |
return (network, t) | |
#Fixed learning rate algorithm | |
def flr_algo(alpha, network): | |
for layer in network[1:]: | |
for neuron in layer: | |
neuron.theta = np.array(neuron.theta) | |
neuron.theta = neuron.theta - alpha*np.array(neuron.grad) | |
neuron.theta = list(neuron.theta) | |
return network | |
#def train(trainData, network, epoch, batch, costFunction, | |
# kernel=lambda x:x, Dkernel=lambda x:1, method, *args): | |
# batches = np.array_split(trainData, batch) | |
# for epoc in range(epoch): | |
# for bat in batches: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from math import exp | |
from nn_s import * | |
structure = [[], | |
[[[0,0],[0,1]],[[0,0],[0,1]]], | |
[[[1,0],[1,1]],[[1,0],[1,1]]], | |
[[[2,0],[2,1]],[[2,0],[2,1]]] | |
] | |
A0 = np.array([1,0]) | |
kernel = lambda x:1/(1+exp(-x)) | |
Dkernel = lambda x: kernel(x)*(1-kernel(x)) | |
network = init_structure(structure, kernel=kernel) | |
network = forward_prop(network, [0.34,0.56]) | |
T=np.array([network[3][0].a_value,network[3][1].a_value]) | |
T=T-A0 | |
print(Dkernel(network[3][1].z_value)*T[1]) | |
network = back_prop(network, Dkernel=Dkernel, factor=T) | |
J=np.matmul(T,T)/2 | |
h = (0.1)**8 | |
thets = [] | |
for layer_i in range(1,4): | |
for neuron_i in range(2): | |
for grad_i in range(3): | |
network[layer_i][neuron_i].theta[grad_i]+=h | |
network = forward_prop(network, [0.34,0.56]) | |
T=np.array([network[3][0].a_value,network[3][1].a_value]) | |
T=T-A0 | |
Jt = np.matmul(T,T)/2 | |
thets.append((Jt-J)/h) | |
network[layer_i][neuron_i].theta[grad_i]-=h | |
network = forward_prop(network, [0.34,0.56]) | |
print('Actual \t \t \t Expected') | |
for layer_i in range(1,4): | |
for neuron_i in range(2): | |
for grad_i in range(3): | |
print(str(network[layer_i][neuron_i].grad[grad_i])+' \t '+str(thets[(layer_i-1)*3+neuron_i*2+grad_i])+'\t'+str(network[layer_i][neuron_i].grad[grad_i]/thets[(layer_i-1)*3+neuron_i*2+grad_i])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment