-
-
Save ipritom/30fcad0c74ab59e5b31e1daac1c1d1e7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Package imports | |
import numpy as np | |
import matplotlib.pyplot as plt | |
def sigmoid(x): | |
""" | |
Compute the sigmoid of x | |
Arguments: | |
x -- A scalar or numpy array of any size. | |
Return: | |
s -- sigmoid(x) | |
""" | |
return 1/(1+np.exp(-x)) | |
def sigmoid_derivative(x): | |
return x * (1.0 - x) | |
def softmax(x): | |
s = np.divide(np.exp(x),np.sum(np.exp(x))) | |
return s | |
def softmax_derivative(x): | |
''' | |
compute the derivative of softmax function | |
''' | |
p1 = np.exp(x) | |
p2 = np.sum(np.exp(x)) | |
#print('p1 =',p1,'\np2 =',p2) | |
#print('p1*p2-p1',p1*(p2-p1)) | |
ds = np.divide(np.multiply(p1,(p2-p1)),np.power(p2,2)) | |
return ds | |
def initialize_parameters(n_x,n_h,n_y): | |
W1 = np.random.randn(n_h,n_x)* 0.01 | |
W2 = np.random.randn(n_y,n_h)* 0.01 | |
parameters = {"W1": W1, | |
"W2": W2 | |
} | |
return parameters | |
def forward_prop(X,parameters): | |
#retriving parameters | |
W1 = parameters["W1"] | |
W2 = parameters["W2"] | |
#forward propagation to calculate A2 | |
Z1 = np.dot(W1,X) | |
A1 = sigmoid(Z1) | |
Z2 = np.dot(W2,A1) | |
A2 = softmax(Z2) | |
cache = {"Z1": Z1, | |
"A1": A1, | |
"Z2": Z2, | |
"A2": A2} | |
return cache | |
def compute_cost(cache,parameters,Y): | |
m = Y.shape[1] #number of examples | |
A2 = cache["A2"] | |
logprobs = np.multiply(np.log(A2), Y) + np.multiply((1-Y), np.log(1-A2)) | |
### END CODE HERE ### | |
cost = -1/m * np.sum(logprobs) | |
cost = np.squeeze(cost) # makes sure cost is the dimension we expect. | |
# E.g., turns [[17]] into 17 | |
return cost | |
def back_prop(parameters,cache,X,Y): | |
m = X.shape[1] | |
#retriving parameters | |
W1 = parameters["W1"] | |
W2 = parameters["W2"] | |
A1 = cache["A1"] | |
A2 = cache["A2"] | |
Z1 = cache["Z1"] | |
Z2 = cache["Z2"] | |
#back propagation calculation | |
#calculating dL1,dL2 for updating W1 and W2 | |
# loss for W2 # | |
xa = np.divide(Y,A2)+np.divide((1-Y),(1-A2)) #part of dA2 calculation | |
dA2 = (1/m) * np.sum(xa) | |
dZ2 = sigmoid_derivative(Z2) | |
dW2 = A2 | |
dL2 = dA2*dZ2*dW2 | |
# loss for W1 # | |
dA1 = W2 | |
print(dA1.shape) | |
dZ1 = softmax_derivative(Z1) | |
print(dZ1.shape) | |
dW1 = X.T | |
print(dW1.shape) | |
dl0 = np.dot(dA2,dZ2.T) | |
print('shape dl0',dl0.shape) | |
dl1 = np.dot(dl0,dA1) | |
print('shape dl1',dl1.shape) | |
dl2 = np.dot(dl1,dZ1.T) | |
print('shape dl2', dl2.shape) | |
dL1 = np.dot(dl2,dW1) ###error : dimension for broadcasting is not satisfied | |
print('shape dL1',dL1.shape) | |
grads ={"dL1" : dL1, | |
"dL2" : dL2} | |
return grads | |
def update_parameters(grads,parameters,learning_rate=1): | |
print('ppp',parameters) | |
W1 = parameters["W1"] | |
W2 = parameters["W2"] | |
dL1 = grads["dL1"] | |
dL2 = grads["dL2"] | |
#update rule for each parameters | |
W1 = W1 - learning_rate*dL1 | |
W2 = W2 - learning_rate*dL2 | |
parameters = {"W1": W1, | |
"W2": W2, | |
} | |
return parameters | |
def nn_model(X,Y, num_iteration = 1000,print_cost=False): | |
n_x = X.shape[0] | |
n_y = X.shape[0] | |
n_h = 4 #hidden layer units | |
#initializing parameters | |
parameters = initialize_parameters(n_x,n_h,n_y) | |
for i in range(0,num_iteration): | |
#forward propagation | |
cache = forward_prop(X,parameters) | |
#cost | |
cost = compute_cost(cache,parameters,Y) | |
print('cost',cost) | |
#Back propagation | |
grads = back_prop(parameters,cache,X,Y) | |
#Gradient descent parameter update | |
parameters = update_parameters(grads,parameters) | |
if print_cost and i % 100 == 0: | |
print("Cost after iteration %i: %f" %(i,cost)) | |
return parameters | |
if __name__ == "__main__": | |
X = np.array([[0,0,0], | |
[0,0,1], | |
[0,1,0], | |
[0,1,1], | |
[1,0,0], | |
[1,0,1], | |
[1,1,0], | |
[1,1,1], | |
]) | |
y = np.array([[0],[1],[1],[0],[1],[0],[0],[1]]) | |
parameter = nn_model(X,y,print_cost=True) | |
print(parameter) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment