Skip to content

Instantly share code, notes, and snippets.

@duhaime
Last active December 27, 2017 19:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save duhaime/9854126f79e3f1bb986b1c4894a4892d to your computer and use it in GitHub Desktop.
Save duhaime/9854126f79e3f1bb986b1c4894a4892d to your computer and use it in GitHub Desktop.
A Neural Network with N hidden Nodes in < 100 LOC
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
import numpy as np
import random
def get_data(plot_data=False):
# data = np array with shape (100, 2) containing 100 2D observations
# labels = array of class labels, one for each member of X
data, labels = make_moons(noise=0.05)
# divide each dimension by its standard deviation to center the features
data = data - np.mean(data, axis=0)
data /= np.std(data, axis=0)
# make labels {-1,1}
labels = (labels * 2) - 1
# plot the data
if plot_data:
plt.scatter(data[:,0], data[:,1], c=labels)
plt.show()
return data, labels
class Model:
def __init__(self):
self.n_nodes = 5 # number of nodes in hidden layer
# initialize weights and bias for hidden nodes
self.w1 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1))
self.w2 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1))
self.b1 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1))
# initialize node output values from forward pass
self.nodes = np.zeros(self.n_nodes) # node values generated during forward pass
# initialize output layer weights and bias
self.z = np.random.rand(self.n_nodes) # output layer: 1 weight per node + 1 bias
self.b2 = 0
# initialize the step value for gradient descent
self.step = 0.01
def forward(self, x, y):
# compute each node's output value
for i in range(self.n_nodes):
self.nodes[i] = max(0, self.w1[i]*x + self.w2[i]*y + self.b1[i] )
return np.dot(self.nodes, self.z) + self.b2
def backward(self, output, x, y, label):
# determine the direction of the gradient: + if we need to increase values, else -
gradient = 0
if (label == 1 and output < 1): gradient = 1
if (label == -1 and output > -1): gradient = -1
# use product rule to identify derivatives of each node's output
dn = gradient * self.z
# use product and addition rules to identify derivatives of final layer weights and bias
dz = gradient * self.nodes
db2 = gradient * 1
# apply ReLU non-linearity to zero out nodes that did not fire
for i in range(self.n_nodes):
if self.nodes[i] == 0: dn[i] = 0
# backprop each node weight through the node's derivative wrt gradient
dx = x * dn # x value derivative
dy = y * dn # y value derivative
db1 = 1 * dn # bias derivative
# update the node weights and biases given the derivative calculations
self.w1 += dx.reshape(self.n_nodes, 1) * self.step
self.w2 += dy.reshape(self.n_nodes, 1) * self.step
self.b1 += db1.reshape(self.n_nodes, 1) * self.step
# update the output layer weights and bias
self.z += dz * self.step
self.b2 += db2 * self.step
def evaluate(self):
correct = 0
for i in range(len(data)):
output = self.forward(data[i][0], data[i][1])
if labels[i] == 1 and output >= 1: correct += 1
if labels[i] == -1 and output <= -1: correct += 1
return correct / len(data)
data, labels = get_data()
model = Model()
for i in range(20000):
j = random.randint(0, len(data)-1)
x, y = data[j]
label = labels[j]
output = model.forward(x, y)
model.backward(output, x, y, label)
if i % 100 == 0:
print(i, model.evaluate())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment