Skip to content

Instantly share code, notes, and snippets.

@ljvmiranda921
Last active October 25, 2020 06:32
Show Gist options
  • Save ljvmiranda921/bf191a8572657ff7fd1200c733c81077 to your computer and use it in GitHub Desktop.
Save ljvmiranda921/bf191a8572657ff7fd1200c733c81077 to your computer and use it in GitHub Desktop.
Three layer neural network (I-H-H-O) with tanh activation function in the hidden layers and softmax cross-entropy in the output layer.

Implementing a multi-layer perceptron to solve the two-spiral problem

This utilizes a three-layer neural network (2 hidden layers with tanh and 1 output layer with softmax) to solve the two-spiral problem. Included in this gist is data_utils.py which has the method load_twin_spiral() in order to generate the data. All of the computations in the neural network (feedforward and backpropagation) are done using the numpy package.

Usage

If you wish to use the classes in this gist, simply import the module network and load the class:

from network import *
from data_utils import *

# Load the data
X, y = load_twin_spiral()

# Build the network and train
model = MLP()
stats = model.train(X,y)
y_pred = model.predict(X)

License

All public gists https://gist.github.com/ljvmiranda921
Copyright 2017, Lester James V. Miranda
MIT License, http://www.opensource.org/licenses/mit-license.php

# -*- coding: utf-8 -*-
"""Data Utils
Simple module to load Twin-Spiral Data
__author__ = "Lester James V. Miranda"
__email__ = "lester.miranda@toki.waseda.jp"
"""
import numpy as np
def load_twin_spiral(n_instances=200, degrees=780, start=0, noise=0.2, one_hot=False, randomize=False, random_state=None):
"""Generates the twin-spiral dataset.
Inputs:
- n_instance: number of instances
- degrees: the length of the spirals
- start: offset of the spiral from the origin.
- noise: amount of noise to be introduced in the spiral
- one_hot: perform one_host encoding on data
- randomize: add stochasticity to the generated values.
- random_state: initiates a random seed
Note that for the noise parameter, a value of 0 means no noise
and 1 means high-amount of noise (possibility of the two spirals overlapping).
Returns:
- X, y: numpy nd-array containing the coordinates and labels of shape
(n_instances, 2) for X and
(n_instances) for y
"""
if random_state is not None:
np.random.seed(random_state)
rads = (2 * np.pi) / 360
start = start * rads
if randomize == True:
n = start + np.sqrt(np.random.rand(n_instances, 1)) * degrees * rads
d1x = - np.cos(n) * n + np.random.rand(n_instances,1) * noise
d1y = np.sin(n)*n + np.random.rand(n_instances,1) * noise
else:
i = np.array(list(range(n_instances))).reshape(n_instances,1)
r = (6.5 * (104-i)) / 104
phi = (i * np.pi) / 16
d1x = -r * np.cos(phi)
d1y = -r * np.sin(phi)
# Define class 0 and class 1
if one_hot == True:
class_zero = np.array([[1,0]] * n_instances)
class_one = np.array([[0,1]] * n_instances)
labels = np.vstack((class_zero, class_one))
else:
class_zero = np.zeros(n_instances).astype(int)
class_one = np.ones(n_instances).astype(int)
labels = np.hstack((class_zero, class_one))
return (np.vstack((np.hstack((d1x,d1y)),np.hstack((-d1x,-d1y)))),
labels)
# -*- coding: utf-8 -*-
"""Neural Network class for Assignment 3
This module implements the class MLP, a three layer Neural network
with tanh activation function for solving the two-spiral problem.
__author__ = "Lester James V. Miranda"
__email__ = "lester.miranda@toki.waseda.jp"
"""
import numpy as np
class MLP(object):
"""This is a three-layer neural network for solving the two-spiral problem
for the Neural Networks Class Spring 2017. The network has one hidden layer,
and has a tanh activation function after the first fully-connected net. Thus,
input_layer ---- hidden_layer x 2 ---- output_layer
[tanh] [softmax]
To use this class, simply initialize the model and train it.
model = TwoLayerNet() # Assuming you are using the default parameters
model.fit(X)
pred = model.predict(y)
"""
def __init__(self, n_inputs=2, n_hidden=20, n_classes=2, std=1e-4):
"""Initializes the parameters of the neural network.
Here, we are initializing the weights into small values, whereas
the biases are initialized to zero.
Inputs:
- input_size: dimensions of the input.
- hidden_size: nb. of nodes in the hidden layer.
- num_classes: nb. of classes in the output layer
- std: controls the spread of sampling from a normal distrib.
"""
# Initialize the parameters
self.params = {}
# First layer weights and biases
self.params['W1'] = std * np.random.randn(n_inputs, n_hidden)
self.params['b1'] = np.random.randn(n_hidden)
# Second layer weights and biases
self.params['W2'] = std * np.random.randn(n_hidden, n_hidden)
self.params['b2'] = np.random.randn(n_hidden)
# Output layer weights and biases
self.params['W3'] = std * np.random.randn(n_hidden, n_classes)
self.params['b3'] = np.random.randn(n_classes)
# Initialize the velocities
self.velocity = {}
# First layer velocity
self.velocity['W1'] = np.zeros((n_inputs, n_hidden))
self.velocity['b1'] = np.zeros(n_hidden)
# Second layer weights and biases
self.velocity['W2'] = np.zeros((n_hidden, n_hidden))
self.velocity['b2'] = np.zeros(n_hidden)
# Output layer weights and biases
self.velocity['W3'] = np.zeros((n_hidden, n_classes))
self.velocity['b3'] = np.zeros(n_classes)
def loss(self, X, y=None, reg=0.0):
"""
Compute the loss and gradients for a two layer fully connected neural
network.
Inputs:
- X: input data of shape (n_examples, n_features).
- y: vector of training labels.
- reg: Regularization strength.
Returns:
- If y is None, then returns score matrix.
- If y is not none, return
loss: computed loss (both data loss and regularization loss)
grads: dictionary containing all the gradients.
"""
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
W3, b3 = self.params['W3'], self.params['b3']
N, D = X.shape
#----------------- Forward propagation ------------------
scores = None
z1 = X.dot(W1) + b1 # First layer pre-activation
a1 = np.tanh(z1) # First layer activation (using tanh)
z2 = a1.dot(W2) + b2 # Second layer pre-activation
a2 = np.tanh(z2) # Second layer activation (using tanh)
z3 = a2.dot(W3) + b3 # Third layer pre-activation
logits = z3 # Keep logits
if y is None:
return logits
#------------ Perform softmax cross-entropy ------------
# Compute for the softmax (activation function)
loss = None
exp_scores = np.exp(logits)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Compute for cross-entropy loss
corect_logprobs = -np.log(probs[range(N), y])
data_loss = np.sum(corect_logprobs) / N
reg_loss = 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2) + np.sum(W3 * W3))
loss = data_loss + reg_loss
#------------------- Backpropagation --------------------
grads = {}
# Compute the logits gradients
dlogits = probs
dlogits[range(N),y] -= 1
dlogits /= N
# Propagate the loss back to the output layer
grads['W3'] = np.dot(a2.T, dlogits)
grads['b3'] = np.sum(dlogits, axis=0)
# Compute the hidden layer 2 gradient
dhidden_2 = np.multiply(self._tanh_deriv(z2), np.dot(dlogits, W3.T))
# Propagate the loss back to hidden layer 2
grads['W2'] = np.dot(a1.T, dhidden_2)
grads['b2'] = np.sum(dhidden_2, axis =0)
# Compute the hidden layer 1 gradient
dhidden_1 = np.multiply(self._tanh_deriv(z1), np.dot(dhidden_2, W2.T))
# Propagate the loss back to hidden layer 1
grads['W1'] = np.dot(X.T, dhidden_1)
grads['b1'] = np.sum(dhidden_1, axis=0)
# Accumulate gradients in reg term
grads['W3'] += reg * W3
grads['W2'] += reg * W2
grads['W1'] += reg * W1
return loss, grads
def train(self, X, y, learning_rate=0.5, mu=0.05, num_iters=50000,print_step=1000,
reg_param=0.0,verbose=1):
"""Trains the created neural network model using the parameters stated.
Inputs:
- X: Input data of shape (N, D). Each X[i] is a training sample. If passing a single example signal, use np.array([[i]]).
- y: Label data d(t). If passing a single example signal, use np.array([[i]]).
- learning_rate: learning rate to be used in stochastic gradient descent
- num_iters: number of iterations.
- verbose: prints progress during optimization proper.
"""
# Define history list
loss_history = []
train_acc_history = []
for i in range(num_iters):
# Perform forward propagation and compute for the loss.
loss, grads = self.loss(X,y, reg_param)
# Append all returned variables in a history list.
loss_history.append(loss)
# Add momentum
self.velocity['W1'] = (self.velocity['W1'] * mu) - learning_rate * grads['W1']
self.velocity['b1'] = (self.velocity['b1'] * mu) - learning_rate * grads['b1']
self.velocity['W2'] = (self.velocity['W2'] * mu) - learning_rate * grads['W2']
self.velocity['b2'] = (self.velocity['b2'] * mu) - learning_rate * grads['b2']
self.velocity['W3'] = (self.velocity['W3'] * mu) - learning_rate * grads['W3']
self.velocity['b3'] = (self.velocity['b3'] * mu) - learning_rate * grads['b3']
# Adjust the neural network parameters
self.params['W1'] += self.velocity['W1']
self.params['b1'] += self.velocity['b1']
self.params['W2'] += self.velocity['W2']
self.params['b2'] += self.velocity['b2']
self.params['W3'] += self.velocity['W3']
self.params['b3'] += self.velocity['b3']
# Check accuracy
train_acc = (self.predict(X) == y).mean()
train_acc_history.append(train_acc)
if (verbose>=2) and (i % print_step == 0):
print('Iteration %d / %d: loss %f, acc %f' % (i+1, num_iters, loss, train_acc))
if (verbose>=1):
print('Done! loss: %f, acc: %f' %(loss, train_acc))
return {'loss_history': loss_history,
'acc_history': train_acc_history}
def predict(self, X):
"""Use the trained weights of the neural network to determine the class.
The way this works is that it performs a feedforward propagation to compute
for the logits, and use the logits to get the max.
Inputs:
- X: numpy ndarray of shape (N,D) giving N D-dimensional data points to
clasify.
Returns:
- y_pred: numpy ndarray prediction of shape (N,).
"""
z1 = X.dot(self.params['W1']) + self.params['b1']
a1 = np.tanh(z1)
z2 = a1.dot(self.params['W2']) + self.params['b2']
a2 = np.tanh(z2)
z3 = a2.dot(self.params['W3']) + self.params['b3']
logits = z3
y_pred = np.argmax(logits, axis=1)
return y_pred
def _tanh_deriv(self,x):
"""Helper function to compute for the first-derivative of tanh
Input:
- x: argument to compute the derivative from.
"""
return 1.0 - np.tanh(x)**2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment