Skip to content

Instantly share code, notes, and snippets.

@FavioVazquez
Created May 28, 2018 00:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FavioVazquez/6b4f476c3258a52b6accd842d57a3281 to your computer and use it in GitHub Desktop.
Save FavioVazquez/6b4f476c3258a52b6accd842d57a3281 to your computer and use it in GitHub Desktop.
Digit Recognition with Shallow network by @akshaybahadur21
import numpy as np
import matplotlib.pyplot as plt
def softmax(z):
z -= np.max(z)
sm = (np.exp(z).T / np.sum(np.exp(z),axis=1))
return sm
def layers(X, Y):
"""
:param X:
:param Y:
:return:
"""
n_x = X.shape[0]
n_y = Y.shape[0]
return n_x, n_y
def initialize_nn(n_x, n_h, n_y):
"""
:param n_x:
:param n_h:
:param n_y:
:return:
"""
np.random.seed(2)
W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.random.rand(n_h, 1)
W2 = np.random.rand(n_y, n_h)
b2 = np.random.rand(n_y, 1)
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2}
return parameters
def forward_prop(X, parameters):
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
Z1 = np.dot(W1, X) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = softmax(Z2.T)
cache = {"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2}
return A2, cache
def compute_cost(A2, Y, parameters):
m = Y.shape[1]
W1 = parameters['W1']
W2 = parameters['W2']
logprobs = np.multiply(np.log(A2), Y)
cost = - np.sum(logprobs) / m
cost = np.squeeze(cost)
return cost
def back_prop(parameters, cache, X, Y):
m = Y.shape[1]
W1 = parameters['W1']
W2 = parameters['W2']
A1 = cache['A1']
A2 = cache['A2']
dZ2 = A2 - Y
dW2 = (1 / m) * np.dot(dZ2, A1.T)
db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.square(A1))
dW1 = (1 / m) * np.dot(dZ1, X.T)
db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
grads = {"dW1": dW1,
"db1": db1,
"dW2": dW2,
"db2": db2}
return grads
def update_params(parameters, grads, alpha):
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
dW1 = grads['dW1']
db1 = grads['db1']
dW2 = grads['dW2']
db2 = grads['db2']
W1 = W1 - alpha * dW1
b1 = b1 - alpha * db1
W2 = W2 - alpha * dW2
b2 = b2 - alpha * db2
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2}
return parameters
def model_nn(X, Y,Y_real,test_x,test_y, n_h, num_iters, alpha, print_cost):
np.random.seed(3)
n_x,n_y = layers(X, Y)
parameters = initialize_nn(n_x, n_h, n_y)
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
costs = []
for i in range(0, num_iters):
A2, cache = forward_prop(X, parameters)
cost = compute_cost(A2, Y, parameters)
grads = back_prop(parameters, cache, X, Y)
if (i > 1500):
alpha1 = 0.95*alpha
parameters = update_params(parameters, grads, alpha1)
else:
parameters = update_params(parameters, grads, alpha)
if i % 100 == 0:
costs.append(cost)
if print_cost and i % 100 == 0:
print("Cost after iteration for %i: %f" % (i, cost))
predictions = predict_nn(parameters, X)
print("Train accuracy: {} %", sum(predictions == Y_real) / (float(len(Y_real))) * 100)
predictions=predict_nn(parameters,test_x)
print("Train accuracy: {} %", sum(predictions == test_y) / (float(len(test_y))) * 100)
#plt.plot(costs)
#plt.ylabel('cost')
#plt.xlabel('iterations (per hundreds)')
#plt.title("Learning rate =" + str(alpha))
#plt.show()
return parameters
def predict_nn(parameters, X):
A2, cache = forward_prop(X, parameters)
predictions = np.argmax(A2, axis=0)
return predictions
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment