Last active
October 2, 2015 12:28
-
-
Save bquast/bcf2a1dac2f77e49696e to your computer and use it in GitHub Desktop.
extended version of the Hilton's dropout code from http://iamtrask.github.io/2015/07/28/dropout/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
alpha = 0.1 | |
hidden_dim = 4 | |
dropout_percent = 0.05 | |
do_dropout = True | |
# compute sigmoid nonlinearity | |
def sigmoid(x): | |
output = 1/(1+np.exp(-x)) | |
return output | |
# convert output of sigmoid function to its derivative | |
def sigmoid_output_to_derivative(output): | |
return output*(1-output) | |
X = np.array([[0,0,1], | |
[0,1,1], | |
[1,0,1], | |
[1,1,1]]) | |
y = np.array([[0], | |
[1], | |
[1], | |
[0]]) | |
print "Training With Alpha:" + str(alpha) | |
np.random.seed(1) | |
# randomly initialize our weights with mean 0 | |
synapse_0 = 2*np.random.random((3,4)) - 1 | |
synapse_1 = 2*np.random.random((4,1)) - 1 | |
for j in xrange(60001): | |
# Feed forward through layers 0, 1 | |
layer_0 = X | |
layer_1 = sigmoid(np.dot(layer_0,synapse_0)) | |
# dropout | |
if(do_dropout): | |
layer_1 *= np.random.binomial([np.ones((len(X),hidden_dim))],1-dropout_percent)[0] * (1.0/(1-dropout_percent)) | |
# Feed forward through layer 2 | |
layer_2 = sigmoid(np.dot(layer_1,synapse_1)) | |
# how much did we miss the target value? | |
layer_2_error = layer_2 - y | |
if (j% 10000) == 0: | |
print "Error after "+str(j)+" iterations:" + str(np.mean(np.abs(layer_2_error))) | |
# in what direction is the target value? | |
# were we really sure? if so, don't change too much. | |
layer_2_delta = layer_2_error*sigmoid_output_to_derivative(layer_2) | |
# how much did each l1 value contribute to the l2 error (according to the weights)? | |
layer_1_error = layer_2_delta.dot(synapse_1.T) | |
# in what direction is the target l1? | |
# were we really sure? if so, don't change too much. | |
layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1) | |
synapse_1 -= alpha * (layer_1.T.dot(layer_2_delta)) | |
synapse_0 -= alpha * (layer_0.T.dot(layer_1_delta)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment