Created
January 20, 2016 08:30
-
-
Save claymcleod/91f753d6cbf48f7ec3ba to your computer and use it in GitHub Desktop.
Simple NN Theano
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Copyright © 2016 Clay L. McLeod <clay.l.mcleod@gmail.com> | |
# | |
# Distributed under terms of the MIT license. | |
from __future__ import print_function | |
import theano | |
import numpy as np | |
import theano.tensor as T | |
import theano.d3viz as V | |
from sklearn.datasets import make_moons | |
def generate_dataset(): | |
global X, y, train_X, train_y_onehot | |
np.random.seed(0) | |
train_X, train_y = make_moons(5000, noise=0.20) | |
train_y_onehot = np.eye(2)[train_y] | |
train_X = train_X.astype('float32') | |
train_y_onehot = train_y_onehot.astype('float32') | |
X = theano.shared(train_X) | |
y = theano.shared(train_y_onehot) | |
def build_nn(input_dim, hidden_dim, hidden_layers, output_dim, | |
learning_rate=np.float32(0.01)): | |
X_prime = T.matrix('X_prime', 'float32') | |
_w = [] | |
_b = [] | |
# Input -> Hidden weights | |
_w.append(theano.shared(np.random.randn(input_dim, hidden_dim), name='W1')) | |
_b.append(theano.shared(np.zeros(hidden_dim), name='B1')) | |
# Hidden -> Hidden weights | |
for x in range(2, hidden_layers + 2): | |
_w.append(theano.shared(np.random.randn( | |
hidden_dim, hidden_dim), name='W{}'.format(x))) | |
_b.append(theano.shared(np.zeros(hidden_dim), name='B{}'.format(x))) | |
# Hidden -> Output weights | |
_w.append(theano.shared(np.random.randn(hidden_dim, output_dim), | |
name='W{}'.format(hidden_layers + 2))) | |
_b.append(theano.shared(np.zeros(output_dim), | |
name='B{}'.format(hidden_layers + 2))) | |
f = X | |
k = X_prime | |
for index in range(len(_w) - 1): | |
f = T.nnet.relu(f.dot(_w[index]) + _b[index]) | |
k = T.nnet.relu(k.dot(_w[index]) + _b[index]) | |
y_hat = T.nnet.softmax(f.dot(_w[len(_w) - 1]) + _b[len(_w) - 1]) | |
loss = T.nnet.categorical_crossentropy(y_hat, y).mean() | |
prediction = T.argmax(y_hat, axis=1) | |
test_prediction = T.argmax(T.nnet.softmax( | |
k.dot(_w[len(_w) - 1]) + _b[len(_w) - 1]), axis=1) | |
updates = [] | |
for w in _w: | |
updates.append((w, w - T.grad(loss, w) * learning_rate)) | |
for b in _b: | |
updates.append((b, b - T.grad(loss, b) * learning_rate)) | |
predict = theano.function([X_prime], test_prediction) | |
calc_loss = theano.function([], loss) | |
step = theano.function([], updates=updates) | |
return step, calc_loss, predict | |
def train(nn_step_fn, nn_loss_fn, log_every=100, iters=20000): | |
for x in range(iters): | |
nn_step_fn() | |
if x % log_every == 0: | |
print("Loss at iteration {}: {}".format(x, nn_loss_fn())) | |
def output_viz(fn, name): | |
V.d3viz(fn, name) | |
def main(): | |
generate_dataset() | |
nn_step, nn_calc_loss, nn_predict = build_nn(2, 3, 100, 2) | |
train(nn_step, nn_calc_loss) | |
output_viz(nn_step, 'step.html') | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment