suvojit-0x55aa/SimpleNeuralNet.py

## SimpleNeuralNet.py
import numpy as np
import csv
import matplotlib.pyplot as plt


#fix random seed for reproducibility
np.random.seed(1)

#Read Dataset
iris = open('iris.csv','r')
iris = csv.reader(iris,delimiter=',')
iris = np.array(list(iris)).astype(np.float64)

#shuffle dataset
np.random.shuffle(iris)
#all columns excluding the last is a feature last column is the label
features = iris[:,:-1]
features = (features - features.mean())/features.std()
#make a row sized vectors of 1
biasPad = np.ones((features.shape[0],1), dtype=features.dtype)
#pad 1s on the right side of each feature vector
features = np.concatenate((features,biasPad), axis=1)
#create a one hot matrix repesentation of the labels
label = np.array(iris[:,-1],dtype=int).reshape(-1)
label = np.eye(3)[label]

#split training and testing set 0.8 split
M = features.shape[0]
splitIdx = int(0.8*M)
XTest = features[splitIdx:,:]
XTrain = features[:splitIdx,:]

YTest = label[splitIdx:,:]
YTrain = label[:splitIdx,:]


#neurons
#no of input neurons is equal to size of feature vector
inputCount = features.shape[1]
#basic case
hiddenCount = inputCount
#3 class classification thus 3 neurons
outputCount = 3

#activations for each layer of neurons
ai = np.ones((inputCount,1))
ah = np.ones((hiddenCount,1))
ao = np.ones((outputCount,1))


#neuron weights
#for each neuron in hidden layer calculate weights from ith input neuron
wih = np.random.rand(inputCount, hiddenCount)*np.sqrt(2./inputCount)
#for each neuron in output layer calculate weights from ith hidden neuron
who = np.random.rand(hiddenCount, outputCount)*np.sqrt(2./hiddenCount)


#Update Arrays for momentum updates
cih = np.zeros((inputCount, hiddenCount))
cho = np.zeros((hiddenCount, outputCount))


#function for feed forward calc
def feedFwd(featureMat):
    global ai,ah,ao,wih,who
    #input activations
    ai = featureMat

    '''hidden activations
    vectorized matrix multiply
    ai.T * wih'''
    ah = np.dot(ai,wih)
    #vectorized sigmoid
    ah = np.tanh(ah)

    '''output ativations
    ah.T * who'''
    ao = np.dot(ah, who)
    #vectorized sigmoid
    ao = np.tanh(ao)
    #ao = 1.0/1.0+np.exp(-1*ao)

    return ao


#function for backpropagation
def backProp(X,label,output,N,batchSize=1,beta=0.0009):
    '''N: learning rate'''
    global ai,ah,ao,wih,who,cih,cho

    delOut = output - label
    dwho = np.dot(ah.T,delOut)/batchSize

    delHidden = np.dot(delOut,who.T)*(1.0 - ah**2)
    dwih = np.dot(X.T,delHidden)/batchSize


    '''weight updates'''
    who -= N*dwho + beta*cho
    cho[:] = dwho

    wih -= N*dwih + beta*cih
    cih[:] = dwih


def train(X,Y,iteration=1000,learningRate=0.001,batchSize=1,beta=0.099,decayRate=0.0005):
    errorTimeline = []
    epochList = []

    #train it for iteration number of epoch
    for epoch in xrange(iteration):

        #for each mini batch
        for i in xrange(0,X.shape[0],batchSize):
            #split the dataset into mini batches
            batchSplit = min(i+batchSize,X.shape[0])
            XminiBatch = X[i:batchSplit,:]
            YminiBatch = Y[i:batchSplit,:]

            #calculate a forwasd pass through the network
            output = feedFwd(XminiBatch)

            #calculate mean squared error
            error = 0.5*np.sum((YminiBatch-output)**2)/batchSize
            #print error

            #backprop and update weights
            backProp(XminiBatch,YminiBatch,output,learningRate,batchSize,beta)

        #after every 50 iteration decrease momentum and learning rate
        #decreasing momentum helps reduce the chances of overshooting a convergence point
        if epoch%50 == 0 and epoch > 0:
            learningRate *= 1./(1. + (decayRate * epoch))
            beta *= 1./(1. + (decayRate * epoch))
            #Store error for ploting graph
            errorTimeline.append(error)
            epochList.append(epoch)
            print 'Epoch :',epoch,', Error :',error,', alpha :',learningRate

    return errorTimeline,epochList

#Work it, make it, do it,
#Makes us harder, better, faster, stronger!
learningRate = 0.0001
beta = 0.099

errorTimeline,epochList = train(XTrain,YTrain,2000,learningRate,M,beta)


#How tough are ya ?

#get output for test features
predOutput = feedFwd(XTest)
#vectorised count compare the indices of output and labels along rows
#add to count if they are same
count = np.sum(np.argmax(predOutput,axis=1) == np.argmax(YTest,axis=1))


#print accuracy
print 'Accuracy : ',(float(count)/float(YTest.shape[0]))
#plot graph
plt.plot(epochList,errorTimeline)
plt.xlabel('Number of epoch')
plt.ylabel('Training Error')
plt.savefig('loss-function.png')
plt.show()
#mow the lawn, take out garbage, have a good nights sleep
	import numpy as np
	import csv
	import matplotlib.pyplot as plt


	#fix random seed for reproducibility
	np.random.seed(1)

	#Read Dataset
	iris = open('iris.csv','r')
	iris = csv.reader(iris,delimiter=',')
	iris = np.array(list(iris)).astype(np.float64)

	#shuffle dataset
	np.random.shuffle(iris)
	#all columns excluding the last is a feature last column is the label
	features = iris[:,:-1]
	features = (features - features.mean())/features.std()
	#make a row sized vectors of 1
	biasPad = np.ones((features.shape[0],1), dtype=features.dtype)
	#pad 1s on the right side of each feature vector
	features = np.concatenate((features,biasPad), axis=1)
	#create a one hot matrix repesentation of the labels
	label = np.array(iris[:,-1],dtype=int).reshape(-1)
	label = np.eye(3)[label]

	#split training and testing set 0.8 split
	M = features.shape[0]
	splitIdx = int(0.8*M)
	XTest = features[splitIdx:,:]
	XTrain = features[:splitIdx,:]

	YTest = label[splitIdx:,:]
	YTrain = label[:splitIdx,:]


	#neurons
	#no of input neurons is equal to size of feature vector
	inputCount = features.shape[1]
	#basic case
	hiddenCount = inputCount
	#3 class classification thus 3 neurons
	outputCount = 3

	#activations for each layer of neurons
	ai = np.ones((inputCount,1))
	ah = np.ones((hiddenCount,1))
	ao = np.ones((outputCount,1))


	#neuron weights
	#for each neuron in hidden layer calculate weights from ith input neuron
	wih = np.random.rand(inputCount, hiddenCount)*np.sqrt(2./inputCount)
	#for each neuron in output layer calculate weights from ith hidden neuron
	who = np.random.rand(hiddenCount, outputCount)*np.sqrt(2./hiddenCount)


	#Update Arrays for momentum updates
	cih = np.zeros((inputCount, hiddenCount))
	cho = np.zeros((hiddenCount, outputCount))


	#function for feed forward calc
	def feedFwd(featureMat):
	global ai,ah,ao,wih,who
	#input activations
	ai = featureMat

	'''hidden activations
	vectorized matrix multiply
	ai.T * wih'''
	ah = np.dot(ai,wih)
	#vectorized sigmoid
	ah = np.tanh(ah)

	'''output ativations
	ah.T * who'''
	ao = np.dot(ah, who)
	#vectorized sigmoid
	ao = np.tanh(ao)
	#ao = 1.0/1.0+np.exp(-1*ao)

	return ao


	#function for backpropagation
	def backProp(X,label,output,N,batchSize=1,beta=0.0009):
	'''N: learning rate'''
	global ai,ah,ao,wih,who,cih,cho

	delOut = output - label
	dwho = np.dot(ah.T,delOut)/batchSize

	delHidden = np.dot(delOut,who.T)(1.0 - ah*2)
	dwih = np.dot(X.T,delHidden)/batchSize


	'''weight updates'''
	who -= Ndwho + betacho
	cho[:] = dwho

	wih -= Ndwih + betacih
	cih[:] = dwih


	def train(X,Y,iteration=1000,learningRate=0.001,batchSize=1,beta=0.099,decayRate=0.0005):
	errorTimeline = []
	epochList = []

	#train it for iteration number of epoch
	for epoch in xrange(iteration):

	#for each mini batch
	for i in xrange(0,X.shape[0],batchSize):
	#split the dataset into mini batches
	batchSplit = min(i+batchSize,X.shape[0])
	XminiBatch = X[i:batchSplit,:]
	YminiBatch = Y[i:batchSplit,:]

	#calculate a forwasd pass through the network
	output = feedFwd(XminiBatch)

	#calculate mean squared error
	error = 0.5np.sum((YminiBatch-output)*2)/batchSize
	#print error

	#backprop and update weights
	backProp(XminiBatch,YminiBatch,output,learningRate,batchSize,beta)

	#after every 50 iteration decrease momentum and learning rate
	#decreasing momentum helps reduce the chances of overshooting a convergence point
	if epoch%50 == 0 and epoch > 0:
	learningRate = 1./(1. + (decayRate epoch))
	beta = 1./(1. + (decayRate epoch))
	#Store error for ploting graph
	errorTimeline.append(error)
	epochList.append(epoch)
	print 'Epoch :',epoch,', Error :',error,', alpha :',learningRate

	return errorTimeline,epochList

	#Work it, make it, do it,
	#Makes us harder, better, faster, stronger!
	learningRate = 0.0001
	beta = 0.099

	errorTimeline,epochList = train(XTrain,YTrain,2000,learningRate,M,beta)



	#How tough are ya ?

	#get output for test features
	predOutput = feedFwd(XTest)
	#vectorised count compare the indices of output and labels along rows
	#add to count if they are same
	count = np.sum(np.argmax(predOutput,axis=1) == np.argmax(YTest,axis=1))


	#print accuracy
	print 'Accuracy : ',(float(count)/float(YTest.shape[0]))
	#plot graph
	plt.plot(epochList,errorTimeline)
	plt.xlabel('Number of epoch')
	plt.ylabel('Training Error')
	plt.savefig('loss-function.png')
	plt.show()
	#mow the lawn, take out garbage, have a good nights sleep