mgaffar muhammadgaffar

## backprop_L2norm.py
def backward_propagation_with_regularization(X,Y,cache,lambd):

    m = X.shape[1]
    Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3 = cache

    dZ3 = A3 - Y

    dW3 = 1./m * np.dot(dZ3, A2.T) + (lambd/m)*W3
    db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)


## cost_L2norm.py
def compute_cost_L2norm(A3,Y,parameters,lambd):

    m = Y.shape[1]
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    W3 = parameters["W3"]

    cross_entropy_cost = compute_cost(A3,Y)

    L2_regularization_cost = (lambd/(2*m))*(np.sum(np.square(W1))+np.sum(np.square(W2))+np.sum(np.square(W3)))

## NN_model.py
def NN_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):

    costs = [] # keep track of cost

    parameters = initialize_parameters_deep(layers_dims)

    # Loop (gradient descent)
    for i in range(0, num_iterations):

        # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.

## update_parameter.py
def update_parameters(parameters, grads, learning_rate):

    L = len(parameters) // 2 # number of layers in the neural network

    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)]-learning_rate*grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)]-learning_rate*grads["db" + str(l+1)]

    return parameters

## L_model_backward.py
def L_model_backward(AL, Y, caches):

    grads = {}
    L = len(caches) # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL

    #Initizalizing backward propagation
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))


## linear_activation_backward.py
def linear_activation_backward(dA, cache, activation):

    linear_cache, activation_cache = cache

    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)

## linear_backward.py
def linear_backward(dZ, cache):
    A_prev, W, b = cache #cache is from forward propagation
    m = A_prev.shape[1]

    dW = np.dot(dZ,A_prev.T)/m  #dW = dZxA.T/m
    db = np.sum(dZ, axis = 1, keepdims = True)/m
    dA_prev = np.dot(W.T,dZ)

    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)

## compute_cost.py
def compute_cost(AL, Y):

    m = Y.shape[1]

    cost = -(1./m)*(np.dot(Y,np.log(AL).T) + np.dot(1-Y, np.log(1-AL).T))

    cost = np.squeeze(cost)
    assert(cost.shape == ())

    return cost

## L_model_forward.py
def L_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2     # number of layers in the neural network

    #From 1 to L-1 layer we use ReLU
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev,
                                             parameters['W' + str(l)],

## linear_activation_forward.py
def linear_activation_forward(A_prev, W, b, activation):

    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)

    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
	def backward_propagation_with_regularization(X,Y,cache,lambd):

	m = X.shape[1]
	Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3 = cache

	dZ3 = A3 - Y

	dW3 = 1./m * np.dot(dZ3, A2.T) + (lambd/m)*W3
	db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)
	def compute_cost_L2norm(A3,Y,parameters,lambd):

	m = Y.shape[1]
	W1 = parameters["W1"]
	W2 = parameters["W2"]
	W3 = parameters["W3"]

	cross_entropy_cost = compute_cost(A3,Y)

	L2_regularization_cost = (lambd/(2m))(np.sum(np.square(W1))+np.sum(np.square(W2))+np.sum(np.square(W3)))
	def NN_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):

	costs = [] # keep track of cost

	parameters = initialize_parameters_deep(layers_dims)

	# Loop (gradient descent)
	for i in range(0, num_iterations):

	# Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.
	def update_parameters(parameters, grads, learning_rate):

	L = len(parameters) // 2 # number of layers in the neural network

	for l in range(L):
	parameters["W" + str(l+1)] = parameters["W" + str(l+1)]-learning_rate*grads["dW" + str(l+1)]
	parameters["b" + str(l+1)] = parameters["b" + str(l+1)]-learning_rate*grads["db" + str(l+1)]

	return parameters
	def L_model_backward(AL, Y, caches):

	grads = {}
	L = len(caches) # the number of layers
	m = AL.shape[1]
	Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL

	#Initizalizing backward propagation
	dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
	def linear_activation_backward(dA, cache, activation):

	linear_cache, activation_cache = cache

	if activation == "relu":
	dZ = relu_backward(dA, activation_cache)
	dA_prev, dW, db = linear_backward(dZ, linear_cache)

	elif activation == "sigmoid":
	dZ = sigmoid_backward(dA, activation_cache)
	def linear_backward(dZ, cache):
	A_prev, W, b = cache #cache is from forward propagation
	m = A_prev.shape[1]

	dW = np.dot(dZ,A_prev.T)/m #dW = dZxA.T/m
	db = np.sum(dZ, axis = 1, keepdims = True)/m
	dA_prev = np.dot(W.T,dZ)

	assert (dA_prev.shape == A_prev.shape)
	assert (dW.shape == W.shape)
	def compute_cost(AL, Y):

	m = Y.shape[1]

	cost = -(1./m)*(np.dot(Y,np.log(AL).T) + np.dot(1-Y, np.log(1-AL).T))

	cost = np.squeeze(cost)
	assert(cost.shape == ())

	return cost
	def L_model_forward(X, parameters):
	caches = []
	A = X
	L = len(parameters) // 2 # number of layers in the neural network

	#From 1 to L-1 layer we use ReLU
	for l in range(1, L):
	A_prev = A
	A, cache = linear_activation_forward(A_prev,
	parameters['W' + str(l)],
	def linear_activation_forward(A_prev, W, b, activation):

	if activation == "sigmoid":
	Z, linear_cache = linear_forward(A_prev, W, b)
	A, activation_cache = sigmoid(Z)

	elif activation == "relu":
	Z, linear_cache = linear_forward(A_prev, W, b)
	A, activation_cache = relu(Z)