varun19299/ANN.py

## ANN.py
def sigmoid(x):
    x = 1/(1+np.exp(-x))
    return x

def sigmoid_grad(x):
    return (x)*(1-x)

def relu(x):
    return np.maximum(0,x)

def tanh(x):
    return (2*sigmoid(2*x) - 1)

def tanh_grad(x):
    return (1 - (tanh(x))**2)

#function to train a three layer neural net with either RELU or sigmoid nonlinearity via vanilla grad descent

def three_layer_net(NONLINEARITY,X,y, model, step_size, reg):
    #parameter initialization

    h= model['h']
    h2= model['h2']
    W1= model['W1']
    W2= model['W2']
    W3= model['W3']
    b1= model['b1']
    b2= model['b2']
    b3= model['b3']


    # some hyperparameters


    # gradient descent loop
    num_examples = X.shape[0]
    plot_array_1=[]
    plot_array_2=[]
    for i in range(50000):

        #FOWARD PROP

        if NONLINEARITY== 'RELU':
            hidden_layer = relu(np.dot(X, W1) + b1)
            hidden_layer2 = relu(np.dot(hidden_layer, W2) + b2)
            scores = np.dot(hidden_layer2, W3) + b3

        elif NONLINEARITY == 'SIGM':
            hidden_layer = sigmoid(np.dot(X, W1) + b1)
            hidden_layer2 = sigmoid(np.dot(hidden_layer, W2) + b2)
            scores = np.dot(hidden_layer2, W3) + b3

        elif NONLINEARITY == 'TANH':
            hidden_layer = tanh(np.dot(X, W1) + b1)
            hidden_layer2 = tanh(np.dot(hidden_layer, W2) + b2)
            scores = np.dot(hidden_layer2, W3) + b3

        exp_scores = np.exp(scores)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # [N x K]

        # compute the loss: average cross-entropy loss and regularization
        corect_logprobs = -np.log(probs[range(num_examples),y])
        data_loss = np.sum(corect_logprobs)/num_examples
        reg_loss = 0.5*reg*np.sum(W1*W1) + 0.5*reg*np.sum(W2*W2)+ 0.5*reg*np.sum(W3*W3)
        loss = data_loss + reg_loss
        if i % 1000 == 0:
            print("iteration : "+ str(i) + " loss : " + str(loss)  )


        # compute the gradient on scores
        dscores = probs
        dscores[range(num_examples),y] -= 1
        dscores /= num_examples


        # BACKPROP HERE
        dW3 = (hidden_layer2.T).dot(dscores)
        db3 = np.sum(dscores, axis=0, keepdims=True)


        if NONLINEARITY == 'RELU':

            #backprop ReLU nonlinearity here
            dhidden2 = np.dot(dscores, W3.T)
            dhidden2[hidden_layer2 <= 0] = 0
            dW2 =  np.dot( hidden_layer.T, dhidden2)
            plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape)))
            db2 = np.sum(dhidden2, axis=0)
            dhidden = np.dot(dhidden2, W2.T)
            dhidden[hidden_layer <= 0] = 0

        elif NONLINEARITY == 'SIGM':

            #backprop sigmoid nonlinearity here
            dhidden2 = dscores.dot(W3.T)*sigmoid_grad(hidden_layer2)
            dW2 = (hidden_layer.T).dot(dhidden2)
            plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape)))
            db2 = np.sum(dhidden2, axis=0)
            dhidden = dhidden2.dot(W2.T)*sigmoid_grad(hidden_layer)

        elif NONLINEARITY == 'TANH':

            #backprop tanh nonlinearity here
            dhidden2 = dscores.dot(W3.T)*tanh_grad(hidden_layer2)
            dW2 = (hidden_layer.T).dot(dhidden2)
            plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape)))
            db2 = np.sum(dhidden2, axis=0)
            dhidden = dhidden2.dot(W2.T)*tanh_grad(hidden_layer)


        dW1 =  np.dot(X.T, dhidden)
        plot_array_1.append(np.sum(np.abs(dW1))/np.sum(np.abs(dW1.shape)))
        db1 = np.sum(dhidden, axis=0)

        # add regularization
        dW3+= reg * W3
        dW2 += reg * W2
        dW1 += reg * W1

        #option to return loss, grads -- uncomment next comment
        grads={}
        grads['W1']=dW1
        grads['W2']=dW2
        grads['W3']=dW3
        grads['b1']=db1
        grads['b2']=db2
        grads['b3']=db3
        #return loss, grads


        # update
        W1 += -step_size * dW1
        b1 += -step_size * db1
        W2 += -step_size * dW2
        b2 += -step_size * db2
        W3 += -step_size * dW3
        b3 += -step_size * db3
    # evaluate training set accuracy
    if NONLINEARITY == 'RELU':
        hidden_layer = relu(np.dot(X, W1) + b1)
        hidden_layer2 = relu(np.dot(hidden_layer, W2) + b2)
    elif NONLINEARITY == 'SIGM':
        hidden_layer = sigmoid(np.dot(X, W1) + b1)
        hidden_layer2 = sigmoid(np.dot(hidden_layer, W2) + b2)
    scores = np.dot(hidden_layer2, W3) + b3
    predicted_class = np.argmax(scores, axis=1)
    print("training accuracy:" + str(np.mean(predicted_class == y)))
    #return cost, grads
    return plot_array_1, plot_array_2, W1, W2, W3, b1, b2, b3
	def sigmoid(x):
	x = 1/(1+np.exp(-x))
	return x

	def sigmoid_grad(x):
	return (x)*(1-x)

	def relu(x):
	return np.maximum(0,x)

	def tanh(x):
	return (2sigmoid(2x) - 1)

	def tanh_grad(x):
	return (1 - (tanh(x))**2)

	#function to train a three layer neural net with either RELU or sigmoid nonlinearity via vanilla grad descent

	def three_layer_net(NONLINEARITY,X,y, model, step_size, reg):
	#parameter initialization

	h= model['h']
	h2= model['h2']
	W1= model['W1']
	W2= model['W2']
	W3= model['W3']
	b1= model['b1']
	b2= model['b2']
	b3= model['b3']


	# some hyperparameters


	# gradient descent loop
	num_examples = X.shape[0]
	plot_array_1=[]
	plot_array_2=[]
	for i in range(50000):

	#FOWARD PROP

	if NONLINEARITY== 'RELU':
	hidden_layer = relu(np.dot(X, W1) + b1)
	hidden_layer2 = relu(np.dot(hidden_layer, W2) + b2)
	scores = np.dot(hidden_layer2, W3) + b3

	elif NONLINEARITY == 'SIGM':
	hidden_layer = sigmoid(np.dot(X, W1) + b1)
	hidden_layer2 = sigmoid(np.dot(hidden_layer, W2) + b2)
	scores = np.dot(hidden_layer2, W3) + b3

	elif NONLINEARITY == 'TANH':
	hidden_layer = tanh(np.dot(X, W1) + b1)
	hidden_layer2 = tanh(np.dot(hidden_layer, W2) + b2)
	scores = np.dot(hidden_layer2, W3) + b3

	exp_scores = np.exp(scores)
	probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # [N x K]

	# compute the loss: average cross-entropy loss and regularization
	corect_logprobs = -np.log(probs[range(num_examples),y])
	data_loss = np.sum(corect_logprobs)/num_examples
	reg_loss = 0.5regnp.sum(W1W1) + 0.5regnp.sum(W2W2)+ 0.5regnp.sum(W3*W3)
	loss = data_loss + reg_loss
	if i % 1000 == 0:
	print("iteration : "+ str(i) + " loss : " + str(loss) )


	# compute the gradient on scores
	dscores = probs
	dscores[range(num_examples),y] -= 1
	dscores /= num_examples


	# BACKPROP HERE
	dW3 = (hidden_layer2.T).dot(dscores)
	db3 = np.sum(dscores, axis=0, keepdims=True)


	if NONLINEARITY == 'RELU':

	#backprop ReLU nonlinearity here
	dhidden2 = np.dot(dscores, W3.T)
	dhidden2[hidden_layer2 <= 0] = 0
	dW2 = np.dot( hidden_layer.T, dhidden2)
	plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape)))
	db2 = np.sum(dhidden2, axis=0)
	dhidden = np.dot(dhidden2, W2.T)
	dhidden[hidden_layer <= 0] = 0

	elif NONLINEARITY == 'SIGM':

	#backprop sigmoid nonlinearity here
	dhidden2 = dscores.dot(W3.T)*sigmoid_grad(hidden_layer2)
	dW2 = (hidden_layer.T).dot(dhidden2)
	plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape)))
	db2 = np.sum(dhidden2, axis=0)
	dhidden = dhidden2.dot(W2.T)*sigmoid_grad(hidden_layer)

	elif NONLINEARITY == 'TANH':

	#backprop tanh nonlinearity here
	dhidden2 = dscores.dot(W3.T)*tanh_grad(hidden_layer2)
	dW2 = (hidden_layer.T).dot(dhidden2)
	plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape)))
	db2 = np.sum(dhidden2, axis=0)
	dhidden = dhidden2.dot(W2.T)*tanh_grad(hidden_layer)



	dW1 = np.dot(X.T, dhidden)
	plot_array_1.append(np.sum(np.abs(dW1))/np.sum(np.abs(dW1.shape)))
	db1 = np.sum(dhidden, axis=0)

	# add regularization
	dW3+= reg * W3
	dW2 += reg * W2
	dW1 += reg * W1

	#option to return loss, grads -- uncomment next comment
	grads={}
	grads['W1']=dW1
	grads['W2']=dW2
	grads['W3']=dW3
	grads['b1']=db1
	grads['b2']=db2
	grads['b3']=db3
	#return loss, grads


	# update
	W1 += -step_size * dW1
	b1 += -step_size * db1
	W2 += -step_size * dW2
	b2 += -step_size * db2
	W3 += -step_size * dW3
	b3 += -step_size * db3
	# evaluate training set accuracy
	if NONLINEARITY == 'RELU':
	hidden_layer = relu(np.dot(X, W1) + b1)
	hidden_layer2 = relu(np.dot(hidden_layer, W2) + b2)
	elif NONLINEARITY == 'SIGM':
	hidden_layer = sigmoid(np.dot(X, W1) + b1)
	hidden_layer2 = sigmoid(np.dot(hidden_layer, W2) + b2)
	scores = np.dot(hidden_layer2, W3) + b3
	predicted_class = np.argmax(scores, axis=1)
	print("training accuracy:" + str(np.mean(predicted_class == y)))
	#return cost, grads
	return plot_array_1, plot_array_2, W1, W2, W3, b1, b2, b3