Skip to content

Instantly share code, notes, and snippets.

@ipritom
Created June 29, 2019 10:25
Show Gist options
  • Save ipritom/30fcad0c74ab59e5b31e1daac1c1d1e7 to your computer and use it in GitHub Desktop.
Save ipritom/30fcad0c74ab59e5b31e1daac1c1d1e7 to your computer and use it in GitHub Desktop.
# Package imports
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(x):
"""
Compute the sigmoid of x
Arguments:
x -- A scalar or numpy array of any size.
Return:
s -- sigmoid(x)
"""
return 1/(1+np.exp(-x))
def sigmoid_derivative(x):
return x * (1.0 - x)
def softmax(x):
s = np.divide(np.exp(x),np.sum(np.exp(x)))
return s
def softmax_derivative(x):
'''
compute the derivative of softmax function
'''
p1 = np.exp(x)
p2 = np.sum(np.exp(x))
#print('p1 =',p1,'\np2 =',p2)
#print('p1*p2-p1',p1*(p2-p1))
ds = np.divide(np.multiply(p1,(p2-p1)),np.power(p2,2))
return ds
def initialize_parameters(n_x,n_h,n_y):
W1 = np.random.randn(n_h,n_x)* 0.01
W2 = np.random.randn(n_y,n_h)* 0.01
parameters = {"W1": W1,
"W2": W2
}
return parameters
def forward_prop(X,parameters):
#retriving parameters
W1 = parameters["W1"]
W2 = parameters["W2"]
#forward propagation to calculate A2
Z1 = np.dot(W1,X)
A1 = sigmoid(Z1)
Z2 = np.dot(W2,A1)
A2 = softmax(Z2)
cache = {"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2}
return cache
def compute_cost(cache,parameters,Y):
m = Y.shape[1] #number of examples
A2 = cache["A2"]
logprobs = np.multiply(np.log(A2), Y) + np.multiply((1-Y), np.log(1-A2))
### END CODE HERE ###
cost = -1/m * np.sum(logprobs)
cost = np.squeeze(cost) # makes sure cost is the dimension we expect.
# E.g., turns [[17]] into 17
return cost
def back_prop(parameters,cache,X,Y):
m = X.shape[1]
#retriving parameters
W1 = parameters["W1"]
W2 = parameters["W2"]
A1 = cache["A1"]
A2 = cache["A2"]
Z1 = cache["Z1"]
Z2 = cache["Z2"]
#back propagation calculation
#calculating dL1,dL2 for updating W1 and W2
# loss for W2 #
xa = np.divide(Y,A2)+np.divide((1-Y),(1-A2)) #part of dA2 calculation
dA2 = (1/m) * np.sum(xa)
dZ2 = sigmoid_derivative(Z2)
dW2 = A2
dL2 = dA2*dZ2*dW2
# loss for W1 #
dA1 = W2
print(dA1.shape)
dZ1 = softmax_derivative(Z1)
print(dZ1.shape)
dW1 = X.T
print(dW1.shape)
dl0 = np.dot(dA2,dZ2.T)
print('shape dl0',dl0.shape)
dl1 = np.dot(dl0,dA1)
print('shape dl1',dl1.shape)
dl2 = np.dot(dl1,dZ1.T)
print('shape dl2', dl2.shape)
dL1 = np.dot(dl2,dW1) ###error : dimension for broadcasting is not satisfied
print('shape dL1',dL1.shape)
grads ={"dL1" : dL1,
"dL2" : dL2}
return grads
def update_parameters(grads,parameters,learning_rate=1):
print('ppp',parameters)
W1 = parameters["W1"]
W2 = parameters["W2"]
dL1 = grads["dL1"]
dL2 = grads["dL2"]
#update rule for each parameters
W1 = W1 - learning_rate*dL1
W2 = W2 - learning_rate*dL2
parameters = {"W1": W1,
"W2": W2,
}
return parameters
def nn_model(X,Y, num_iteration = 1000,print_cost=False):
n_x = X.shape[0]
n_y = X.shape[0]
n_h = 4 #hidden layer units
#initializing parameters
parameters = initialize_parameters(n_x,n_h,n_y)
for i in range(0,num_iteration):
#forward propagation
cache = forward_prop(X,parameters)
#cost
cost = compute_cost(cache,parameters,Y)
print('cost',cost)
#Back propagation
grads = back_prop(parameters,cache,X,Y)
#Gradient descent parameter update
parameters = update_parameters(grads,parameters)
if print_cost and i % 100 == 0:
print("Cost after iteration %i: %f" %(i,cost))
return parameters
if __name__ == "__main__":
X = np.array([[0,0,0],
[0,0,1],
[0,1,0],
[0,1,1],
[1,0,0],
[1,0,1],
[1,1,0],
[1,1,1],
])
y = np.array([[0],[1],[1],[0],[1],[0],[0],[1]])
parameter = nn_model(X,y,print_cost=True)
print(parameter)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment