Created
October 15, 2018 08:54
-
-
Save jasstionzyf/63e2f9cfcbaa437d3c29291139c20b26 to your computer and use it in GitHub Desktop.
nn_numpy.py code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
nn_architecture = [ | |
{"input_dim": 2, "output_dim": 4, "activation": "relu"}, | |
{"input_dim": 4, "output_dim": 6, "activation": "relu"}, | |
{"input_dim": 6, "output_dim": 6, "activation": "relu"}, | |
{"input_dim": 6, "output_dim": 4, "activation": "relu"}, | |
{"input_dim": 4, "output_dim": 1, "activation": "sigmoid"}, | |
] | |
def init_layers(nn_architecture, seed=99): | |
np.random.seed(seed) | |
number_of_layers = len(nn_architecture) | |
params_values = {} | |
for idx, layer in enumerate(nn_architecture): | |
layer_idx = idx + 1 | |
layer_input_size = layer["input_dim"] | |
layer_output_size = layer["output_dim"] | |
params_values['W' + str(layer_idx)] = np.random.randn( | |
layer_output_size, layer_input_size) * 0.1 | |
params_values['b' + str(layer_idx)] = np.random.randn( | |
layer_output_size, 1) * 0.1 | |
return params_values | |
def sigmoid(Z): | |
return 1/(1+np.exp(-Z)) | |
def relu(Z): | |
return np.maximum(0,Z) | |
def sigmoid_backward(dA, Z): | |
sig = sigmoid(Z) | |
return dA * sig * (1 - sig) | |
def relu_backward(dA, Z): | |
dZ = np.array(dA, copy = True) | |
dZ[Z <= 0] = 0; | |
return dZ; | |
def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"): | |
Z_curr = np.dot(W_curr, A_prev) + b_curr | |
if activation is "relu": | |
activation_func = relu | |
elif activation is "sigmoid": | |
activation_func = sigmoid | |
else: | |
raise Exception('Non-supported activation function') | |
return activation_func(Z_curr), Z_curr | |
def full_forward_propagation(X, params_values, nn_architecture): | |
memory = {} | |
A_curr = X | |
for idx, layer in enumerate(nn_architecture): | |
layer_idx = idx + 1 | |
A_prev = A_curr | |
activ_function_curr = layer["activation"] | |
W_curr = params_values["W" + str(layer_idx)] | |
b_curr = params_values["b" + str(layer_idx)] | |
A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr) | |
memory["A" + str(idx)] = A_prev | |
memory["Z" + str(layer_idx)] = Z_curr | |
return A_curr, memory | |
def get_cost_value(Y_hat, Y): | |
m = Y_hat.shape[1] | |
cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T)) | |
return np.squeeze(cost) | |
def convert_prob_into_class(probs): | |
probs_ = np.copy(probs) | |
probs_[probs_ > 0.5] = 1 | |
probs_[probs_ <= 0.5] = 0 | |
return probs_ | |
def get_accuracy_value(Y_hat, Y): | |
Y_hat_ = convert_prob_into_class(Y_hat) | |
return (Y_hat_ == Y).all(axis=0).mean() | |
def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"): | |
m = A_prev.shape[1] | |
if activation is "relu": | |
backward_activation_func = relu_backward | |
elif activation is "sigmoid": | |
backward_activation_func = sigmoid_backward | |
else: | |
raise Exception('Non-supported activation function') | |
dZ_curr = backward_activation_func(dA_curr, Z_curr) | |
dW_curr = np.dot(dZ_curr, A_prev.T) / m | |
db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m | |
dA_prev = np.dot(W_curr.T, dZ_curr) | |
return dA_prev, dW_curr, db_curr | |
def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture): | |
grads_values = {} | |
m = Y.shape[1] | |
Y = Y.reshape(Y_hat.shape) | |
dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat)); | |
for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))): | |
layer_idx_curr = layer_idx_prev + 1 | |
activ_function_curr = layer["activation"] | |
dA_curr = dA_prev | |
A_prev = memory["A" + str(layer_idx_prev)] | |
Z_curr = memory["Z" + str(layer_idx_curr)] | |
W_curr = params_values["W" + str(layer_idx_curr)] | |
b_curr = params_values["b" + str(layer_idx_curr)] | |
dA_prev, dW_curr, db_curr = single_layer_backward_propagation( | |
dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr) | |
grads_values["dW" + str(layer_idx_curr)] = dW_curr | |
grads_values["db" + str(layer_idx_curr)] = db_curr | |
return grads_values | |
def update(params_values, grads_values, nn_architecture, learning_rate): | |
for idx, layer in enumerate(nn_architecture): | |
layer_idx = idx + 1 | |
params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)] | |
params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)] | |
return params_values; | |
def train(X, Y, nn_architecture, epochs, learning_rate): | |
params_values = init_layers(nn_architecture, 2) | |
cost_history = [] | |
accuracy_history = [] | |
for i in range(epochs): | |
Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture) | |
cost = get_cost_value(Y_hat, Y) | |
cost_history.append(cost) | |
accuracy = get_accuracy_value(Y_hat, Y) | |
accuracy_history.append(accuracy) | |
grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture) | |
params_values = update(params_values, grads_values, nn_architecture, learning_rate) | |
return params_values, cost_history, accuracy_history | |
import os | |
from sklearn.datasets import make_moons | |
from sklearn.model_selection import train_test_split | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
from matplotlib import cm | |
from mpl_toolkits.mplot3d import Axes3D | |
sns.set_style("whitegrid") | |
from sklearn.metrics import accuracy_score | |
# number of samples in the data set | |
N_SAMPLES = 10000 | |
# ratio between training and test sets | |
TEST_SIZE = 0.1 | |
X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100) | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42) | |
# the function making up the graph of a dataset | |
def make_plot(X, y, plot_name, file_name=None, XX=None, YY=None, preds=None, dark=False): | |
if (dark): | |
plt.style.use('dark_background') | |
else: | |
sns.set_style("whitegrid") | |
plt.figure(figsize=(16,12)) | |
axes = plt.gca() | |
axes.set(xlabel="$X_1$", ylabel="$X_2$") | |
plt.title(plot_name, fontsize=30) | |
plt.subplots_adjust(left=0.20) | |
plt.subplots_adjust(right=0.80) | |
if(XX is not None and YY is not None and preds is not None): | |
plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha = 1, cmap=cm.Spectral) | |
plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap="Greys", vmin=0, vmax=.6) | |
plt.scatter(X[:, 0], X[:, 1], c=y.ravel(), s=40, cmap=plt.cm.Spectral, edgecolors='black') | |
if(file_name): | |
plt.savefig(file_name) | |
plt.close() | |
# make_plot(X, y, "Dataset") | |
params_values, cost_history, accuracy_history = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), nn_architecture, 10000, 0.01) | |
Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), params_values, nn_architecture) | |
print(cost_history) | |
print(accuracy_history) | |
acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1)))) | |
print("Test set accuracy: {:.2f} - David".format(acc_test)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment