duhaime/numpy_nn.py

## numpy_nn.py
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
import numpy as np
import random

def get_data(plot_data=False):
    # data = np array with shape (100, 2) containing 100 2D observations
    # labels = array of class labels, one for each member of X
    data, labels = make_moons(noise=0.05)

    # divide each dimension by its standard deviation to center the features
    data = data - np.mean(data, axis=0)
    data /= np.std(data, axis=0)

    # make labels {-1,1}
    labels = (labels * 2) - 1

    # plot the data
    if plot_data:
        plt.scatter(data[:,0], data[:,1], c=labels)
        plt.show()
    return data, labels

class Model:
    def __init__(self):
        self.n_nodes = 5 # number of nodes in hidden layer

        # initialize weights and bias for hidden nodes
        self.w1 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1))
        self.w2 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1))
        self.b1 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1))

        # initialize node output values from forward pass
        self.nodes = np.zeros(self.n_nodes) # node values generated during forward pass

        # initialize output layer weights and bias
        self.z = np.random.rand(self.n_nodes) # output layer: 1 weight per node + 1 bias
        self.b2 = 0

        # initialize the step value for gradient descent
        self.step = 0.01

    def forward(self, x, y):
        # compute each node's output value
        for i in range(self.n_nodes):
            self.nodes[i] = max(0, self.w1[i]*x + self.w2[i]*y + self.b1[i] )
        return np.dot(self.nodes, self.z) + self.b2

    def backward(self, output, x, y, label):
        # determine the direction of the gradient: + if we need to increase values, else -
        gradient = 0
        if (label == 1 and output < 1): gradient = 1
        if (label == -1 and output > -1): gradient = -1

        # use product rule to identify derivatives of each node's output
        dn = gradient * self.z

        # use product and addition rules to identify derivatives of final layer weights and bias
        dz = gradient * self.nodes
        db2 = gradient * 1

        # apply ReLU non-linearity to zero out nodes that did not fire
        for i in range(self.n_nodes):
            if self.nodes[i] == 0: dn[i] = 0

        # backprop each node weight through the node's derivative wrt gradient
        dx = x * dn # x value derivative
        dy = y * dn # y value derivative
        db1 = 1 * dn # bias derivative

        # update the node weights and biases given the derivative calculations
        self.w1 += dx.reshape(self.n_nodes, 1) * self.step
        self.w2 += dy.reshape(self.n_nodes, 1) * self.step
        self.b1 += db1.reshape(self.n_nodes, 1) * self.step

        # update the output layer weights and bias
        self.z += dz * self.step
        self.b2 += db2 * self.step

    def evaluate(self):
        correct = 0
        for i in range(len(data)):
            output = self.forward(data[i][0], data[i][1])
            if labels[i] == 1 and output >= 1: correct += 1
            if labels[i] == -1 and output <= -1: correct += 1
        return correct / len(data)

data, labels = get_data()
model = Model()

for i in range(20000):
    j = random.randint(0, len(data)-1)
    x, y = data[j]
    label = labels[j]
    output = model.forward(x, y)
    model.backward(output, x, y, label)

    if i % 100 == 0:
        print(i, model.evaluate())
	from sklearn.datasets import make_moons
	import matplotlib.pyplot as plt
	import numpy as np
	import random

	def get_data(plot_data=False):
	# data = np array with shape (100, 2) containing 100 2D observations
	# labels = array of class labels, one for each member of X
	data, labels = make_moons(noise=0.05)

	# divide each dimension by its standard deviation to center the features
	data = data - np.mean(data, axis=0)
	data /= np.std(data, axis=0)

	# make labels {-1,1}
	labels = (labels * 2) - 1

	# plot the data
	if plot_data:
	plt.scatter(data[:,0], data[:,1], c=labels)
	plt.show()
	return data, labels

	class Model:
	def __init__(self):
	self.n_nodes = 5 # number of nodes in hidden layer

	# initialize weights and bias for hidden nodes
	self.w1 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1))
	self.w2 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1))
	self.b1 = np.random.normal(loc=0, scale=0.5, size=(self.n_nodes, 1))

	# initialize node output values from forward pass
	self.nodes = np.zeros(self.n_nodes) # node values generated during forward pass

	# initialize output layer weights and bias
	self.z = np.random.rand(self.n_nodes) # output layer: 1 weight per node + 1 bias
	self.b2 = 0

	# initialize the step value for gradient descent
	self.step = 0.01

	def forward(self, x, y):
	# compute each node's output value
	for i in range(self.n_nodes):
	self.nodes[i] = max(0, self.w1[i]x + self.w2[i]y + self.b1[i] )
	return np.dot(self.nodes, self.z) + self.b2

	def backward(self, output, x, y, label):
	# determine the direction of the gradient: + if we need to increase values, else -
	gradient = 0
	if (label == 1 and output < 1): gradient = 1
	if (label == -1 and output > -1): gradient = -1

	# use product rule to identify derivatives of each node's output
	dn = gradient * self.z

	# use product and addition rules to identify derivatives of final layer weights and bias
	dz = gradient * self.nodes
	db2 = gradient * 1

	# apply ReLU non-linearity to zero out nodes that did not fire
	for i in range(self.n_nodes):
	if self.nodes[i] == 0: dn[i] = 0

	# backprop each node weight through the node's derivative wrt gradient
	dx = x * dn # x value derivative
	dy = y * dn # y value derivative
	db1 = 1 * dn # bias derivative

	# update the node weights and biases given the derivative calculations
	self.w1 += dx.reshape(self.n_nodes, 1) * self.step
	self.w2 += dy.reshape(self.n_nodes, 1) * self.step
	self.b1 += db1.reshape(self.n_nodes, 1) * self.step

	# update the output layer weights and bias
	self.z += dz * self.step
	self.b2 += db2 * self.step

	def evaluate(self):
	correct = 0
	for i in range(len(data)):
	output = self.forward(data[i][0], data[i][1])
	if labels[i] == 1 and output >= 1: correct += 1
	if labels[i] == -1 and output <= -1: correct += 1
	return correct / len(data)

	data, labels = get_data()
	model = Model()

	for i in range(20000):
	j = random.randint(0, len(data)-1)
	x, y = data[j]
	label = labels[j]
	output = model.forward(x, y)
	model.backward(output, x, y, label)

	if i % 100 == 0:
	print(i, model.evaluate())