Skip to content

Instantly share code, notes, and snippets.

@faizankshaikh
Last active December 23, 2022 10:33
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save faizankshaikh/cb41c84f470ee4a85516a8065970ff86 to your computer and use it in GitHub Desktop.
Save faizankshaikh/cb41c84f470ee4a85516a8065970ff86 to your computer and use it in GitHub Desktop.
code for NN from scratch simplified
# importing required libraries
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
# version of numpy library
print("Version of numpy:", np.__version__)
# We are using sigmoid as an activation function so defining the sigmoid function here
# defining the Sigmoid Function
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# hidden layer activations
hiddenLayer_linearTransform = np.dot(weights_input_hidden.T, X)
hiddenLayer_activations = sigmoid(hiddenLayer_linearTransform)
# calculating the output
outputLayer_linearTransform = np.dot(weights_hidden_output.T, hiddenLayer_activations)
output = sigmoid(outputLayer_linearTransform)
# calculating error
error = np.square(y - output) / 2
error
# rate of change of error w.r.t. output
error_wrt_output = -(y - output)
# rate of change of output w.r.t. Z2
output_wrt_outputLayer_LinearTransform = np.multiply(output, (1 - output))
# rate of change of Z2 w.r.t. weights between hidden and output layer
outputLayer_LinearTransform_wrt_weights_hidden_output = hiddenLayer_activations
# checking the shapes of partial derivatives
error_wrt_output.shape, output_wrt_outputLayer_LinearTransform.shape, outputLayer_LinearTransform_wrt_weights_hidden_output.shape
# shape of weights of output layer
weights_hidden_output.shape
# version of matplotlib library
import matplotlib
print("Version of matplotlib:", matplotlib.__version__)
# rate of change of error w.r.t weight between hidden and output layer
error_wrt_weights_hidden_output = np.dot(
outputLayer_LinearTransform_wrt_weights_hidden_output,
(error_wrt_output * output_wrt_outputLayer_LinearTransform).T,
)
error_wrt_weights_hidden_output.shape
# rate of change of error w.r.t. output
error_wrt_output = -(y - output)
# rate of change of output w.r.t. Z2
output_wrt_outputLayer_LinearTransform = np.multiply(output, (1 - output))
# rate of change of Z2 w.r.t. hidden layer activations
outputLayer_LinearTransform_wrt_hiddenLayer_activations = weights_hidden_output
# rate of change of hidden layer activations w.r.t. Z1
hiddenLayer_activations_wrt_hiddenLayer_linearTransform = np.multiply(
hiddenLayer_activations, (1 - hiddenLayer_activations)
)
# rate of change of Z1 w.r.t. weights between input and hidden layer
hiddenLayer_linearTransform_wrt_weights_input_hidden = X
# checking the shapes of partial derivatives
print(
error_wrt_output.shape,
output_wrt_outputLayer_LinearTransform.shape,
outputLayer_LinearTransform_wrt_hiddenLayer_activations.shape,
hiddenLayer_activations_wrt_hiddenLayer_linearTransform.shape,
hiddenLayer_linearTransform_wrt_weights_input_hidden.shape,
)
# shape of weights of hidden layer
weights_input_hidden.shape
# rate of change of error w.r.t weights between input and hidden layer
error_wrt_weights_input_hidden = np.dot(
hiddenLayer_linearTransform_wrt_weights_input_hidden,
(
hiddenLayer_activations_wrt_hiddenLayer_linearTransform
* np.dot(
outputLayer_LinearTransform_wrt_hiddenLayer_activations,
(output_wrt_outputLayer_LinearTransform * error_wrt_output),
)
).T,
)
# set random seed
np.random.seed(42)
error_wrt_weights_input_hidden.shape
# defining the learning rate
lr = 0.01
# initial weights_hidden_output
weights_hidden_output
# initial weights_input_hidden
weights_input_hidden
# updating the weights of output layer
weights_hidden_output = weights_hidden_output - lr * error_wrt_weights_hidden_output
# updating the weights of hidden layer
weights_input_hidden = weights_input_hidden - lr * error_wrt_weights_input_hidden
# updated weights_hidden_output
weights_hidden_output
# updated weights_input_hidden
weights_input_hidden
# defining the model architecture
inputLayer_neurons = X.shape[0] # number of features in data set
hiddenLayer_neurons = 3 # number of hidden layers neurons
outputLayer_neurons = 1 # number of neurons at output layer
# initializing weight
weights_input_hidden = np.random.uniform(size=(inputLayer_neurons, hiddenLayer_neurons))
weights_hidden_output = np.random.uniform(
size=(hiddenLayer_neurons, outputLayer_neurons)
)
# defining the parameters
lr = 0.1
epochs = 1000
losses = []
for epoch in range(epochs):
## Forward Propogation
# calculating hidden layer activations
hiddenLayer_linearTransform = np.dot(weights_input_hidden.T, X)
hiddenLayer_activations = sigmoid(hiddenLayer_linearTransform)
# calculating the output
outputLayer_linearTransform = np.dot(
weights_hidden_output.T, hiddenLayer_activations
)
output = sigmoid(outputLayer_linearTransform)
## Backward Propagation
# calculating error
error = np.square(y - output) / 2
# calculating rate of change of error w.r.t weight between hidden and output layer
error_wrt_output = -(y - output)
output_wrt_outputLayer_LinearTransform = np.multiply(output, (1 - output))
outputLayer_LinearTransform_wrt_weights_hidden_output = hiddenLayer_activations
error_wrt_weights_hidden_output = np.dot(
outputLayer_LinearTransform_wrt_weights_hidden_output,
(error_wrt_output * output_wrt_outputLayer_LinearTransform).T,
)
# calculating rate of change of error w.r.t weights between input and hidden layer
outputLayer_LinearTransform_wrt_hiddenLayer_activations = weights_hidden_output
hiddenLayer_activations_wrt_hiddenLayer_linearTransform = np.multiply(
hiddenLayer_activations, (1 - hiddenLayer_activations)
)
hiddenLayer_linearTransform_wrt_weights_input_hidden = X
error_wrt_weights_input_hidden = np.dot(
hiddenLayer_linearTransform_wrt_weights_input_hidden,
(
hiddenLayer_activations_wrt_hiddenLayer_linearTransform
* np.dot(
outputLayer_LinearTransform_wrt_hiddenLayer_activations,
(output_wrt_outputLayer_LinearTransform * error_wrt_output),
)
).T,
)
# updating the weights
weights_hidden_output = weights_hidden_output - lr * error_wrt_weights_hidden_output
weights_input_hidden = weights_input_hidden - lr * error_wrt_weights_input_hidden
# print error at every 100th epoch
epoch_loss = np.average(error)
if epoch % 100 == 0:
print(f"Error at epoch {epoch} is {epoch_loss:.5f}")
# appending the error of each epoch
losses.append(epoch_loss)
# creating the input array
X = np.array([[1, 0, 0, 0], [1, 0, 1, 1], [0, 1, 0, 1]])
print("Input:\n", X)
# shape of input array
print("\nShape of Input:", X.shape)
# updated w_ih
weights_input_hidden
# updated w_ho
weights_hidden_output
# visualizing the error after each epoch
plt.plot(np.arange(1, epochs + 1), np.array(losses))
# final output from the model
output
# actual target
y
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=1000, random_state=42, noise=0.1)
plt.scatter(X[:, 0], X[:, 1], s=10, c=y)
X -= X.min()
X /= X.max()
# converting the input in matrix form
X = X.T
print("Input in matrix form:\n", X)
# shape of input matrix
print("\nShape of Input Matrix:", X.shape)
X = X.T
y = y.reshape(1, -1)
# defining the model architecture
inputLayer_neurons = X.shape[0] # number of features in data set
hiddenLayer_neurons = 10 # number of hidden layers neurons
outputLayer_neurons = 1 # number of neurons at output layer
# initializing weight
weights_input_hidden = np.random.uniform(size=(inputLayer_neurons, hiddenLayer_neurons))
weights_hidden_output = np.random.uniform(
size=(hiddenLayer_neurons, outputLayer_neurons)
)
# defining the parameters
lr = 0.1
epochs = 10000
losses = []
for epoch in range(epochs):
## Forward Propogation
# calculating hidden layer activations
hiddenLayer_linearTransform = np.dot(weights_input_hidden.T, X)
hiddenLayer_activations = sigmoid(hiddenLayer_linearTransform)
# calculating the output
outputLayer_linearTransform = np.dot(
weights_hidden_output.T, hiddenLayer_activations
)
output = sigmoid(outputLayer_linearTransform)
## Backward Propagation
# calculating error
error = np.square(y - output) / 2
# calculating rate of change of error w.r.t weight between hidden and output layer
error_wrt_output = -(y - output)
output_wrt_outputLayer_LinearTransform = np.multiply(output, (1 - output))
outputLayer_LinearTransform_wrt_weights_hidden_output = hiddenLayer_activations
error_wrt_weights_hidden_output = np.dot(
outputLayer_LinearTransform_wrt_weights_hidden_output,
(error_wrt_output * output_wrt_outputLayer_LinearTransform).T,
)
# calculating rate of change of error w.r.t weights between input and hidden layer
outputLayer_LinearTransform_wrt_hiddenLayer_activations = weights_hidden_output
hiddenLayer_activations_wrt_hiddenLayer_linearTransform = np.multiply(
hiddenLayer_activations, (1 - hiddenLayer_activations)
)
hiddenLayer_linearTransform_wrt_weights_input_hidden = X
error_wrt_weights_input_hidden = np.dot(
hiddenLayer_linearTransform_wrt_weights_input_hidden,
(
hiddenLayer_activations_wrt_hiddenLayer_linearTransform
* np.dot(
outputLayer_LinearTransform_wrt_hiddenLayer_activations,
(output_wrt_outputLayer_LinearTransform * error_wrt_output),
)
).T,
)
# updating the weights
weights_hidden_output = weights_hidden_output - lr * error_wrt_weights_hidden_output
weights_input_hidden = weights_input_hidden - lr * error_wrt_weights_input_hidden
# print error at every 100th epoch
epoch_loss = np.average(error)
if epoch % 1000 == 0:
print(f"Error at epoch {epoch} is {epoch_loss:.5f}")
# appending the error of each epoch
losses.append(epoch_loss)
# visualizing the error after each epoch
plt.plot(np.arange(1, epochs + 1), np.array(losses))
# final output from the model
output[:, :5]
# Define region of interest by data limits
steps = 1000
x_span = np.linspace(X[0, :].min(), X[0, :].max(), steps)
y_span = np.linspace(X[1, :].min(), X[1, :].max(), steps)
xx, yy = np.meshgrid(x_span, y_span)
# forward pass for region of interest
hiddenLayer_linearTransform = np.dot(
weights_input_hidden.T, np.c_[xx.ravel(), yy.ravel()].T
)
hiddenLayer_activations = sigmoid(hiddenLayer_linearTransform)
outputLayer_linearTransform = np.dot(weights_hidden_output.T, hiddenLayer_activations)
output_span = sigmoid(outputLayer_linearTransform)
# Make predictions across region of interest
labels = (output_span > 0.5).astype(int)
# Plot decision boundary in region of interest
z = labels.reshape(xx.shape)
fig, ax = plt.subplots()
ax.contourf(xx, yy, z, alpha=0.2)
# Get predicted labels on training data and plot
train_labels = (output > 0.5).astype(int)
# create scatter plot
ax.scatter(X[0, :], X[1, :], s=10, c=y.squeeze())
# creating the output array
y = np.array([[1], [1], [0]])
print("Actual Output:\n", y)
# output in matrix form
y = y.T
print("\nOutput in matrix form:\n", y)
# shape of input array
print("\nShape of Output:", y.shape)
inputLayer_neurons = X.shape[0] # number of features in data set
hiddenLayer_neurons = 3 # number of hidden layers neurons
outputLayer_neurons = 1 # number of neurons at output layer
# initializing weight
# Shape of weights_input_hidden should number of neurons at input layer * number of neurons at hidden layer
weights_input_hidden = np.random.uniform(size=(inputLayer_neurons, hiddenLayer_neurons))
# Shape of weights_hidden_output should number of neurons at hidden layer * number of neurons at output layer
weights_hidden_output = np.random.uniform(
size=(hiddenLayer_neurons, outputLayer_neurons)
)
# shape of weight matrix
weights_input_hidden.shape, weights_hidden_output.shape# We are using sigmoid as an activation function so defining the sigmoid function here
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment