Last active
December 27, 2017 19:23
-
-
Save duhaime/9854126f79e3f1bb986b1c4894a4892d to your computer and use it in GitHub Desktop.
A Neural Network with N hidden Nodes in < 100 LOC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import make_moons | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import random | |
def get_data(plot_data=False): | |
# data = np array with shape (100, 2) containing 100 2D observations | |
# labels = array of class labels, one for each member of X | |
data, labels = make_moons(noise=0.05) | |
# divide each dimension by its standard deviation to center the features | |
data = data - np.mean(data, axis=0) | |
data /= np.std(data, axis=0) | |
# make labels {-1,1} | |
labels = (labels * 2) - 1 | |
# plot the data | |
if plot_data: | |
plt.scatter(data[:,0], data[:,1], c=labels) | |
plt.show() | |
return data, labels | |
class Model: | |
def __init__(self): | |
self.n_nodes = 5 # number of nodes in hidden layer | |
# initialize weights and bias for hidden nodes | |
self.w1 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1)) | |
self.w2 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1)) | |
self.b1 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1)) | |
# initialize node output values from forward pass | |
self.nodes = np.zeros(self.n_nodes) # node values generated during forward pass | |
# initialize output layer weights and bias | |
self.z = np.random.rand(self.n_nodes) # output layer: 1 weight per node + 1 bias | |
self.b2 = 0 | |
# initialize the step value for gradient descent | |
self.step = 0.01 | |
def forward(self, x, y): | |
# compute each node's output value | |
for i in range(self.n_nodes): | |
self.nodes[i] = max(0, self.w1[i]*x + self.w2[i]*y + self.b1[i] ) | |
return np.dot(self.nodes, self.z) + self.b2 | |
def backward(self, output, x, y, label): | |
# determine the direction of the gradient: + if we need to increase values, else - | |
gradient = 0 | |
if (label == 1 and output < 1): gradient = 1 | |
if (label == -1 and output > -1): gradient = -1 | |
# use product rule to identify derivatives of each node's output | |
dn = gradient * self.z | |
# use product and addition rules to identify derivatives of final layer weights and bias | |
dz = gradient * self.nodes | |
db2 = gradient * 1 | |
# apply ReLU non-linearity to zero out nodes that did not fire | |
for i in range(self.n_nodes): | |
if self.nodes[i] == 0: dn[i] = 0 | |
# backprop each node weight through the node's derivative wrt gradient | |
dx = x * dn # x value derivative | |
dy = y * dn # y value derivative | |
db1 = 1 * dn # bias derivative | |
# update the node weights and biases given the derivative calculations | |
self.w1 += dx.reshape(self.n_nodes, 1) * self.step | |
self.w2 += dy.reshape(self.n_nodes, 1) * self.step | |
self.b1 += db1.reshape(self.n_nodes, 1) * self.step | |
# update the output layer weights and bias | |
self.z += dz * self.step | |
self.b2 += db2 * self.step | |
def evaluate(self): | |
correct = 0 | |
for i in range(len(data)): | |
output = self.forward(data[i][0], data[i][1]) | |
if labels[i] == 1 and output >= 1: correct += 1 | |
if labels[i] == -1 and output <= -1: correct += 1 | |
return correct / len(data) | |
data, labels = get_data() | |
model = Model() | |
for i in range(20000): | |
j = random.randint(0, len(data)-1) | |
x, y = data[j] | |
label = labels[j] | |
output = model.forward(x, y) | |
model.backward(output, x, y, label) | |
if i % 100 == 0: | |
print(i, model.evaluate()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment