ipritom/nn_scratch.py Secret

## nn_scratch.py
# Package imports
import numpy as np
import matplotlib.pyplot as plt

def sigmoid(x):
    """
    Compute the sigmoid of x

    Arguments:
    x -- A scalar or numpy array of any size.

    Return:
    s -- sigmoid(x)
    """
    return 1/(1+np.exp(-x))

def sigmoid_derivative(x):
    return x * (1.0 - x)


def softmax(x):
    s = np.divide(np.exp(x),np.sum(np.exp(x)))
    return s

def softmax_derivative(x):
    '''
    compute the derivative of softmax function
    '''
    p1 = np.exp(x)
    p2 = np.sum(np.exp(x))
    #print('p1 =',p1,'\np2 =',p2)
    #print('p1*p2-p1',p1*(p2-p1))
    ds = np.divide(np.multiply(p1,(p2-p1)),np.power(p2,2))

    return ds

def initialize_parameters(n_x,n_h,n_y):
    W1 = np.random.randn(n_h,n_x)* 0.01
    W2 = np.random.randn(n_y,n_h)* 0.01

    parameters = {"W1": W1,
                  "W2": W2
                  }

    return parameters

def forward_prop(X,parameters):
    #retriving parameters
    W1 = parameters["W1"]
    W2 = parameters["W2"]

    #forward propagation to calculate A2
    Z1 = np.dot(W1,X)
    A1 = sigmoid(Z1)
    Z2 = np.dot(W2,A1)
    A2 = softmax(Z2)

    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    return cache

def compute_cost(cache,parameters,Y):
    m = Y.shape[1] #number of examples
    A2 = cache["A2"]

    logprobs = np.multiply(np.log(A2), Y) + np.multiply((1-Y), np.log(1-A2))
    ### END CODE HERE ###
    cost = -1/m * np.sum(logprobs)

    cost = np.squeeze(cost)     # makes sure cost is the dimension we expect.
                                # E.g., turns [[17]] into 17

    return cost

def back_prop(parameters,cache,X,Y):
    m = X.shape[1]
    #retriving parameters
    W1 = parameters["W1"]
    W2 = parameters["W2"]

    A1 = cache["A1"]
    A2 = cache["A2"]
    Z1 = cache["Z1"]
    Z2 = cache["Z2"]

    #back propagation calculation
    #calculating dL1,dL2 for updating W1 and W2
    # loss for W2 #
    xa = np.divide(Y,A2)+np.divide((1-Y),(1-A2)) #part of dA2 calculation
    dA2 = (1/m) * np.sum(xa)
    dZ2 = sigmoid_derivative(Z2)
    dW2 = A2
    dL2 = dA2*dZ2*dW2
    # loss for W1 #
    dA1 = W2
    print(dA1.shape)
    dZ1 = softmax_derivative(Z1)
    print(dZ1.shape)
    dW1 = X.T
    print(dW1.shape)

    dl0 = np.dot(dA2,dZ2.T)
    print('shape dl0',dl0.shape)

    dl1 = np.dot(dl0,dA1)
    print('shape dl1',dl1.shape)

    dl2 = np.dot(dl1,dZ1.T)
    print('shape dl2', dl2.shape)
    dL1 = np.dot(dl2,dW1) ###error : dimension for broadcasting is not satisfied
    print('shape dL1',dL1.shape)
    grads ={"dL1" : dL1,
            "dL2" : dL2}

    return grads

def update_parameters(grads,parameters,learning_rate=1):
    print('ppp',parameters)
    W1 = parameters["W1"]
    W2 = parameters["W2"]

    dL1 = grads["dL1"]
    dL2 = grads["dL2"]

    #update rule for each parameters
    W1 = W1 - learning_rate*dL1
    W2 = W2 - learning_rate*dL2

    parameters = {"W1": W1,
                  "W2": W2,
                }

    return parameters


def nn_model(X,Y, num_iteration = 1000,print_cost=False):
    n_x = X.shape[0]
    n_y = X.shape[0]
    n_h = 4 #hidden layer units

    #initializing parameters
    parameters = initialize_parameters(n_x,n_h,n_y)

    for i in range(0,num_iteration):
        #forward propagation
        cache = forward_prop(X,parameters)

        #cost
        cost = compute_cost(cache,parameters,Y)
        print('cost',cost)
        #Back propagation
        grads = back_prop(parameters,cache,X,Y)

        #Gradient descent parameter update
        parameters = update_parameters(grads,parameters)

        if print_cost and i % 100 == 0:
            print("Cost after iteration %i: %f" %(i,cost))

    return parameters

if __name__ == "__main__":
    X = np.array([[0,0,0],
                  [0,0,1],
                  [0,1,0],
                  [0,1,1],
                  [1,0,0],
                  [1,0,1],
                  [1,1,0],
                  [1,1,1],
                  ])
    y = np.array([[0],[1],[1],[0],[1],[0],[0],[1]])


    parameter = nn_model(X,y,print_cost=True)
    print(parameter)
	# Package imports
	import numpy as np
	import matplotlib.pyplot as plt

	def sigmoid(x):
	"""
	Compute the sigmoid of x

	Arguments:
	x -- A scalar or numpy array of any size.

	Return:
	s -- sigmoid(x)
	"""
	return 1/(1+np.exp(-x))

	def sigmoid_derivative(x):
	return x * (1.0 - x)


	def softmax(x):
	s = np.divide(np.exp(x),np.sum(np.exp(x)))
	return s

	def softmax_derivative(x):
	'''
	compute the derivative of softmax function
	'''
	p1 = np.exp(x)
	p2 = np.sum(np.exp(x))
	#print('p1 =',p1,'\np2 =',p2)
	#print('p1p2-p1',p1(p2-p1))
	ds = np.divide(np.multiply(p1,(p2-p1)),np.power(p2,2))

	return ds

	def initialize_parameters(n_x,n_h,n_y):
	W1 = np.random.randn(n_h,n_x)* 0.01
	W2 = np.random.randn(n_y,n_h)* 0.01

	parameters = {"W1": W1,
	"W2": W2
	}

	return parameters

	def forward_prop(X,parameters):
	#retriving parameters
	W1 = parameters["W1"]
	W2 = parameters["W2"]

	#forward propagation to calculate A2
	Z1 = np.dot(W1,X)
	A1 = sigmoid(Z1)
	Z2 = np.dot(W2,A1)
	A2 = softmax(Z2)

	cache = {"Z1": Z1,
	"A1": A1,
	"Z2": Z2,
	"A2": A2}
	return cache

	def compute_cost(cache,parameters,Y):
	m = Y.shape[1] #number of examples
	A2 = cache["A2"]

	logprobs = np.multiply(np.log(A2), Y) + np.multiply((1-Y), np.log(1-A2))
	### END CODE HERE ###
	cost = -1/m * np.sum(logprobs)

	cost = np.squeeze(cost) # makes sure cost is the dimension we expect.
	# E.g., turns [[17]] into 17

	return cost

	def back_prop(parameters,cache,X,Y):
	m = X.shape[1]
	#retriving parameters
	W1 = parameters["W1"]
	W2 = parameters["W2"]

	A1 = cache["A1"]
	A2 = cache["A2"]
	Z1 = cache["Z1"]
	Z2 = cache["Z2"]

	#back propagation calculation
	#calculating dL1,dL2 for updating W1 and W2
	# loss for W2 #
	xa = np.divide(Y,A2)+np.divide((1-Y),(1-A2)) #part of dA2 calculation
	dA2 = (1/m) * np.sum(xa)
	dZ2 = sigmoid_derivative(Z2)
	dW2 = A2
	dL2 = dA2dZ2dW2
	# loss for W1 #
	dA1 = W2
	print(dA1.shape)
	dZ1 = softmax_derivative(Z1)
	print(dZ1.shape)
	dW1 = X.T
	print(dW1.shape)

	dl0 = np.dot(dA2,dZ2.T)
	print('shape dl0',dl0.shape)

	dl1 = np.dot(dl0,dA1)
	print('shape dl1',dl1.shape)

	dl2 = np.dot(dl1,dZ1.T)
	print('shape dl2', dl2.shape)
	dL1 = np.dot(dl2,dW1) ###error : dimension for broadcasting is not satisfied
	print('shape dL1',dL1.shape)
	grads ={"dL1" : dL1,
	"dL2" : dL2}

	return grads

	def update_parameters(grads,parameters,learning_rate=1):
	print('ppp',parameters)
	W1 = parameters["W1"]
	W2 = parameters["W2"]

	dL1 = grads["dL1"]
	dL2 = grads["dL2"]

	#update rule for each parameters
	W1 = W1 - learning_rate*dL1
	W2 = W2 - learning_rate*dL2

	parameters = {"W1": W1,
	"W2": W2,
	}

	return parameters


	def nn_model(X,Y, num_iteration = 1000,print_cost=False):
	n_x = X.shape[0]
	n_y = X.shape[0]
	n_h = 4 #hidden layer units

	#initializing parameters
	parameters = initialize_parameters(n_x,n_h,n_y)

	for i in range(0,num_iteration):
	#forward propagation
	cache = forward_prop(X,parameters)

	#cost
	cost = compute_cost(cache,parameters,Y)
	print('cost',cost)
	#Back propagation
	grads = back_prop(parameters,cache,X,Y)

	#Gradient descent parameter update
	parameters = update_parameters(grads,parameters)

	if print_cost and i % 100 == 0:
	print("Cost after iteration %i: %f" %(i,cost))

	return parameters

	if __name__ == "__main__":
	X = np.array([[0,0,0],
	[0,0,1],
	[0,1,0],
	[0,1,1],
	[1,0,0],
	[1,0,1],
	[1,1,0],
	[1,1,1],
	])
	y = np.array([[0],[1],[1],[0],[1],[0],[0],[1]])


	parameter = nn_model(X,y,print_cost=True)
	print(parameter)