Skip to content

Instantly share code, notes, and snippets.

@AbhinavMadahar
Created February 28, 2017 20:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AbhinavMadahar/846307d97a362f8632d722f144381f39 to your computer and use it in GitHub Desktop.
Save AbhinavMadahar/846307d97a362f8632d722f144381f39 to your computer and use it in GitHub Desktop.
from NeuralNetwork import NeuralNetwork
from random import shuffle, choice
k = 6 # length of the word
test_data_training_size = 100
hidden_layers_sizes = [10, 10]
epochs = 15
eta = 0.25
letters = [chr(ordinal) for ordinal in xrange(ord("a"), ord("z") + 1)]
random_letter = lambda: choice(letters)
random_letters = lambda k: [random_letter() for n in xrange(k)]
ordinals = lambda word: [ord(letter) - ord('a') for letter in word]
deordinalize = lambda o: "".join([chr(ordinal + ord('a')) for ordinal in o])
with open("words.txt", "r") as words_file:
# each word has a carriage return "\r\n" at the end, so a word that appears to be k-long is
# actually k+2 long, so the code will take care of that for us
words = [word[0:6] for word in words_file if len(word) == k+2]
shuffle(words)
net = NeuralNetwork([k] + hidden_layers_sizes + [1])
# train the neural network
# don't train with the last test_data_training_size words
n_real_words = len(words) - test_data_training_size
training_data = []
training_data += [(ordinals(word), 1) for word in words[0:-test_data_training_size]] # real words
training_data += [(ordinals(random_letters(k)), 0) for n in xrange(n_real_words)]
shuffle(training_data)
net.train(training_data, epochs, eta)
testing_data = []
testing_data += [(ordinals(word), 1) for word in words[-test_data_training_size:]]
testing_data += [(ordinals(random_letters(k)), 0) for n in xrange(test_data_training_size)]
shuffle(testing_data)
total_correct = 0
for word, exists in testing_data:
if exists == round(net.feedforward(word)[0]):
total_correct += 1
print total_correct, "correct out of", 2 * test_data_training_size
import numpy as np
from random import shuffle
from math import sqrt
sigmoid = lambda z: 1 / (1 + np.exp(-z))
sigmoidprime = lambda z: sigmoid(z) * (1 - sigmoid(z)) # derivate of sigmoid function
def chunk(array, n):
chunks = []
for i in xrange(0, len(array), n):
chunks.append(array[i:i+n])
return chunks
class NeuralNetwork(object):
# sizes is the size of each layer in the network
def __init__(self, sizes):
self.sizes = sizes
self.biases = [np.random.randn(y) for y in sizes[1:]]
self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
# letters is a list of letters that form a word
def feedforward(self, letters):
a = letters
for b, w in zip(self.biases, self.weights):
# because w is a matrix, np.dot(w, a) is equivilant to matrix multiplication
a = sigmoid(np.dot(w, a) + b)
return a
# applies gradient descent on the entire data set at once because it's small
# epochs is the number of times to apply gradient descent
# let words be a list of tuples of the form (x, y) where x is the word or fake word and y be
# 0 if it is fake and 1 if it is true
# eta is learning rate
def train(self, words, epochs, eta):
eta = float(eta) # sorry for the type-casting lol
for n in xrange(epochs): # for epochs number of times
print "Epoch", n, "starting..."
nabla_b = [np.zeros(b.shape) for b in self.biases] # grad. of cost w/ respect to bias
nabla_w = [np.zeros(w.shape) for w in self.weights] # grad. of cost w/ respect to weight
# # calculate and add the necessary changes to bias and weight for each individual word
for word, exists in words:
delta_nabla_b, delta_nabla_w = self.backprop(word, exists)
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
self.weights = [w - (eta / len(words)) * nw for w, nw in zip(self.weights, nabla_w)]
self.biases = [b - (eta / len(words)) * nb for b, nb in zip(self.biases, nabla_b)]
print "Epoch completed"
def backprop(self, x, y):
nabla_w = [np.zeros(w.shape) for w in self.weights]
nabla_b = [np.zeros(b.shape) for b in self.biases]
activation = x # activation is input
activations = [x] # store the activations layer-wise
zs = [] # matrix for z values
for b, w in zip(self.biases, self.weights):
z = np.dot(w, activation) + b # find the new weighed input
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
# now go back
# let's do this literately
# backpropagation is all about delta, the partial derivative of C with respect to z[l][j]
# z[l][j] is the weighed input (w[l][j] dot a + b) of the jth neuron of the lth layer
# a[l][j] = sigmoid(w[l][j] dot a + b)
# a[l][j] = sigmoid(z[l][j])
# we will now use (BP1) to find delta
# we will refer to the partial derivate of C with respect to a[l][j] as dC/da
# dC/dz = dC/da * da/dz
# delta = dC/da * d/dz(sigmoid(z))
# delta = dC/da * sigmoid'(z)
# we can calculate dC/da with self.dCda and we have the sigmoidprime function already
# now, we can calculate delta
dCda = self.dCda(activations[-1], y)
dadz = sigmoidprime(zs[-1])
delta = dCda * dadz
# if b is the bias of a given neuron with weighed input z,
# dz/db = 1
# dC/db = dC/dz * dz/db
# dC/db = dC/dz * 1
# dC/db = dC/dz
# dC/db = delta (i.e. BP3)
# apply this for all the neurons in a given layer to get nabla_b, the gradient of cost with
# respect to the biases of the neurons in the current layer, the output layer
nabla_b[-1] = delta
# we can now calculate the gradient of cost with respect to the weights of the output layer
# self.weights[l][j][k] is the weight of an input from k to j, but error (delta) propagates
# backwards through the neural network, so we want the weight from j to k, which we can
# find by transposing self.weights[l] (i.e. np.transpose(self.weights[l]))
# because we're trying to found the gradient of the cost with respect to the weight of the
# connections going to the output layer, -1, we need to find nabla_w[-1]
# nabla_w[l][j][k] = a[l-1][k] x delta[l][j]
# where a[l-1][k] is the vector of activations of the neurons in the previous layer
nabla_w[-1] = np.matmul([[d] for d in delta], [activations[-2]])
for l in xrange(2, len(self.sizes)):
z = zs[-l]
zp = sigmoidprime(z) # z'
delta = np.dot(np.transpose(self.weights[-l+1]), delta) * zp
nabla_b[-l] = delta
nabla_w[-l] = np.matmul([[d] for d in delta], [activations[-l-1]])
return (nabla_b, nabla_w)
def evaluate(self, test_data):
return sum((y - self.feedforward(x)) ** 2 for x, y in test_data) / (len(test_data) * 2)
def dCda(self, output_activations, y):
return output_activations - y
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment