Created
February 28, 2017 20:55
-
-
Save AbhinavMadahar/846307d97a362f8632d722f144381f39 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from NeuralNetwork import NeuralNetwork | |
from random import shuffle, choice | |
k = 6 # length of the word | |
test_data_training_size = 100 | |
hidden_layers_sizes = [10, 10] | |
epochs = 15 | |
eta = 0.25 | |
letters = [chr(ordinal) for ordinal in xrange(ord("a"), ord("z") + 1)] | |
random_letter = lambda: choice(letters) | |
random_letters = lambda k: [random_letter() for n in xrange(k)] | |
ordinals = lambda word: [ord(letter) - ord('a') for letter in word] | |
deordinalize = lambda o: "".join([chr(ordinal + ord('a')) for ordinal in o]) | |
with open("words.txt", "r") as words_file: | |
# each word has a carriage return "\r\n" at the end, so a word that appears to be k-long is | |
# actually k+2 long, so the code will take care of that for us | |
words = [word[0:6] for word in words_file if len(word) == k+2] | |
shuffle(words) | |
net = NeuralNetwork([k] + hidden_layers_sizes + [1]) | |
# train the neural network | |
# don't train with the last test_data_training_size words | |
n_real_words = len(words) - test_data_training_size | |
training_data = [] | |
training_data += [(ordinals(word), 1) for word in words[0:-test_data_training_size]] # real words | |
training_data += [(ordinals(random_letters(k)), 0) for n in xrange(n_real_words)] | |
shuffle(training_data) | |
net.train(training_data, epochs, eta) | |
testing_data = [] | |
testing_data += [(ordinals(word), 1) for word in words[-test_data_training_size:]] | |
testing_data += [(ordinals(random_letters(k)), 0) for n in xrange(test_data_training_size)] | |
shuffle(testing_data) | |
total_correct = 0 | |
for word, exists in testing_data: | |
if exists == round(net.feedforward(word)[0]): | |
total_correct += 1 | |
print total_correct, "correct out of", 2 * test_data_training_size |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from random import shuffle | |
from math import sqrt | |
sigmoid = lambda z: 1 / (1 + np.exp(-z)) | |
sigmoidprime = lambda z: sigmoid(z) * (1 - sigmoid(z)) # derivate of sigmoid function | |
def chunk(array, n): | |
chunks = [] | |
for i in xrange(0, len(array), n): | |
chunks.append(array[i:i+n]) | |
return chunks | |
class NeuralNetwork(object): | |
# sizes is the size of each layer in the network | |
def __init__(self, sizes): | |
self.sizes = sizes | |
self.biases = [np.random.randn(y) for y in sizes[1:]] | |
self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])] | |
# letters is a list of letters that form a word | |
def feedforward(self, letters): | |
a = letters | |
for b, w in zip(self.biases, self.weights): | |
# because w is a matrix, np.dot(w, a) is equivilant to matrix multiplication | |
a = sigmoid(np.dot(w, a) + b) | |
return a | |
# applies gradient descent on the entire data set at once because it's small | |
# epochs is the number of times to apply gradient descent | |
# let words be a list of tuples of the form (x, y) where x is the word or fake word and y be | |
# 0 if it is fake and 1 if it is true | |
# eta is learning rate | |
def train(self, words, epochs, eta): | |
eta = float(eta) # sorry for the type-casting lol | |
for n in xrange(epochs): # for epochs number of times | |
print "Epoch", n, "starting..." | |
nabla_b = [np.zeros(b.shape) for b in self.biases] # grad. of cost w/ respect to bias | |
nabla_w = [np.zeros(w.shape) for w in self.weights] # grad. of cost w/ respect to weight | |
# # calculate and add the necessary changes to bias and weight for each individual word | |
for word, exists in words: | |
delta_nabla_b, delta_nabla_w = self.backprop(word, exists) | |
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] | |
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] | |
self.weights = [w - (eta / len(words)) * nw for w, nw in zip(self.weights, nabla_w)] | |
self.biases = [b - (eta / len(words)) * nb for b, nb in zip(self.biases, nabla_b)] | |
print "Epoch completed" | |
def backprop(self, x, y): | |
nabla_w = [np.zeros(w.shape) for w in self.weights] | |
nabla_b = [np.zeros(b.shape) for b in self.biases] | |
activation = x # activation is input | |
activations = [x] # store the activations layer-wise | |
zs = [] # matrix for z values | |
for b, w in zip(self.biases, self.weights): | |
z = np.dot(w, activation) + b # find the new weighed input | |
zs.append(z) | |
activation = sigmoid(z) | |
activations.append(activation) | |
# now go back | |
# let's do this literately | |
# backpropagation is all about delta, the partial derivative of C with respect to z[l][j] | |
# z[l][j] is the weighed input (w[l][j] dot a + b) of the jth neuron of the lth layer | |
# a[l][j] = sigmoid(w[l][j] dot a + b) | |
# a[l][j] = sigmoid(z[l][j]) | |
# we will now use (BP1) to find delta | |
# we will refer to the partial derivate of C with respect to a[l][j] as dC/da | |
# dC/dz = dC/da * da/dz | |
# delta = dC/da * d/dz(sigmoid(z)) | |
# delta = dC/da * sigmoid'(z) | |
# we can calculate dC/da with self.dCda and we have the sigmoidprime function already | |
# now, we can calculate delta | |
dCda = self.dCda(activations[-1], y) | |
dadz = sigmoidprime(zs[-1]) | |
delta = dCda * dadz | |
# if b is the bias of a given neuron with weighed input z, | |
# dz/db = 1 | |
# dC/db = dC/dz * dz/db | |
# dC/db = dC/dz * 1 | |
# dC/db = dC/dz | |
# dC/db = delta (i.e. BP3) | |
# apply this for all the neurons in a given layer to get nabla_b, the gradient of cost with | |
# respect to the biases of the neurons in the current layer, the output layer | |
nabla_b[-1] = delta | |
# we can now calculate the gradient of cost with respect to the weights of the output layer | |
# self.weights[l][j][k] is the weight of an input from k to j, but error (delta) propagates | |
# backwards through the neural network, so we want the weight from j to k, which we can | |
# find by transposing self.weights[l] (i.e. np.transpose(self.weights[l])) | |
# because we're trying to found the gradient of the cost with respect to the weight of the | |
# connections going to the output layer, -1, we need to find nabla_w[-1] | |
# nabla_w[l][j][k] = a[l-1][k] x delta[l][j] | |
# where a[l-1][k] is the vector of activations of the neurons in the previous layer | |
nabla_w[-1] = np.matmul([[d] for d in delta], [activations[-2]]) | |
for l in xrange(2, len(self.sizes)): | |
z = zs[-l] | |
zp = sigmoidprime(z) # z' | |
delta = np.dot(np.transpose(self.weights[-l+1]), delta) * zp | |
nabla_b[-l] = delta | |
nabla_w[-l] = np.matmul([[d] for d in delta], [activations[-l-1]]) | |
return (nabla_b, nabla_w) | |
def evaluate(self, test_data): | |
return sum((y - self.feedforward(x)) ** 2 for x, y in test_data) / (len(test_data) * 2) | |
def dCda(self, output_activations, y): | |
return output_activations - y |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment