Skip to content

Instantly share code, notes, and snippets.

@yusugomori
Last active March 13, 2023 05:46
Show Gist options
  • Star 33 You must be signed in to star a gist
  • Fork 9 You must be signed in to fork a gist
  • Save yusugomori/cf7bce19b8e16d57488a to your computer and use it in GitHub Desktop.
Save yusugomori/cf7bce19b8e16d57488a to your computer and use it in GitHub Desktop.
Dropout Neural Networks (with ReLU)
# -*- coding: utf-8 -*-
import sys
import numpy
numpy.seterr(all='ignore')
'''
activation function
'''
def sigmoid(x):
return 1. / (1 + numpy.exp(-x))
def dsigmoid(x):
return x * (1. - x)
def tanh(x):
return numpy.tanh(x)
def dtanh(x):
return 1. - x * x
def softmax(x):
e = numpy.exp(x - numpy.max(x)) # prevent overflow
if e.ndim == 1:
return e / numpy.sum(e, axis=0)
else:
return e / numpy.array([numpy.sum(e, axis=1)]).T # ndim = 2
def ReLU(x):
return x * (x > 0)
def dReLU(x):
return 1. * (x > 0)
'''
Dropout
'''
class Dropout(object):
def __init__(self, input, label,\
n_in, hidden_layer_sizes, n_out,\
rng=None, activation=ReLU):
self.x = input
self.y = label
self.hidden_layers = []
self.n_layers = len(hidden_layer_sizes)
if rng is None:
rng = numpy.random.RandomState(1234)
assert self.n_layers > 0
# construct multi-layer
for i in xrange(self.n_layers):
# layer_size
if i == 0:
input_size = n_in
else:
input_size = hidden_layer_sizes[i-1]
# layer_input
if i == 0:
layer_input = self.x
else:
layer_input = self.hidden_layers[-1].output()
# construct hidden_layer
hidden_layer = HiddenLayer(input=layer_input,
n_in=input_size,
n_out=hidden_layer_sizes[i],
rng=rng,
activation=activation)
self.hidden_layers.append(hidden_layer)
# layer for ouput using Logistic Regression (softmax)
self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(),
label=self.y,
n_in=hidden_layer_sizes[-1],
n_out=n_out)
def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None):
for epoch in xrange(epochs):
dropout_masks = [] # create different masks in each training epoch
# forward hidden_layers
for i in xrange(self.n_layers):
if i == 0:
layer_input = self.x
layer_input = self.hidden_layers[i].forward(input=layer_input)
if dropout == True:
mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng)
layer_input *= mask
dropout_masks.append(mask)
# forward & backward log_layer
self.log_layer.train(input=layer_input)
# backward hidden_layers
for i in reversed(xrange(0, self.n_layers)):
if i == self.n_layers-1:
prev_layer = self.log_layer
else:
prev_layer = self.hidden_layers[i+1]
self.hidden_layers[i].backward(prev_layer=prev_layer)
if dropout == True:
self.hidden_layers[i].d_y *= dropout_masks[i] # also mask here
def predict(self, x, dropout=True, p_dropout=0.5):
layer_input = x
for i in xrange(self.n_layers):
if dropout == True:
self.hidden_layers[i].W = p_dropout * self.hidden_layers[i].W
self.hidden_layers[i].b = p_dropout * self.hidden_layers[i].b
layer_input = self.hidden_layers[i].output(input=layer_input)
return self.log_layer.predict(layer_input)
'''
Hidden Layer
'''
class HiddenLayer(object):
def __init__(self, input, n_in, n_out,\
W=None, b=None, rng=None, activation=tanh):
if rng is None:
rng = numpy.random.RandomState(1234)
if W is None:
a = 1. / n_in
W = numpy.array(rng.uniform( # initialize W uniformly
low=-a,
high=a,
size=(n_in, n_out)))
if b is None:
b = numpy.zeros(n_out) # initialize bias 0
self.rng = rng
self.x = input
self.W = W
self.b = b
if activation == tanh:
self.dactivation = dtanh
elif activation == sigmoid:
self.dactivation = dsigmoid
elif activation == ReLU:
self.dactivation = dReLU
else:
raise ValueError('activation function not supported.')
self.activation = activation
def output(self, input=None):
if input is not None:
self.x = input
linear_output = numpy.dot(self.x, self.W) + self.b
return (linear_output if self.activation is None
else self.activation(linear_output))
def sample_h_given_v(self, input=None):
if input is not None:
self.x = input
v_mean = self.output()
h_sample = self.rng.binomial(size=v_mean.shape,
n=1,
p=v_mean)
return h_sample
def forward(self, input=None):
return self.output(input=input)
def backward(self, prev_layer, lr=0.1, input=None):
if input is not None:
self.x = input
d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
self.W += lr * numpy.dot(self.x.T, d_y)
self.b += lr * numpy.mean(d_y, axis=0)
self.d_y = d_y
def dropout(self, input, p, rng=None):
if rng is None:
rng = numpy.random.RandomState(123)
mask = rng.binomial(size=input.shape,
n=1,
p=1-p) # p is the prob of dropping
return mask
'''
Logistic Regression
'''
class LogisticRegression(object):
def __init__(self, input, label, n_in, n_out):
self.x = input
self.y = label
self.W = numpy.zeros((n_in, n_out)) # initialize W 0
self.b = numpy.zeros(n_out) # initialize bias 0
def train(self, lr=0.1, input=None, L2_reg=0.00):
if input is not None:
self.x = input
p_y_given_x = softmax(numpy.dot(self.x, self.W) + self.b)
d_y = self.y - p_y_given_x
self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W
self.b += lr * numpy.mean(d_y, axis=0)
self.d_y = d_y
# cost = self.negative_log_likelihood()
# return cost
def negative_log_likelihood(self):
sigmoid_activation = softmax(numpy.dot(self.x, self.W) + self.b)
cross_entropy = - numpy.mean(
numpy.sum(self.y * numpy.log(sigmoid_activation) +
(1 - self.y) * numpy.log(1 - sigmoid_activation),
axis=1))
return cross_entropy
def predict(self, x):
return softmax(numpy.dot(x, self.W) + self.b)
def output(self, x):
return self.predict(x)
def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5):
# XOR
x = numpy.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
y = numpy.array([[0, 1],
[1, 0],
[1, 0],
[0, 1]])
rng = numpy.random.RandomState(123)
# construct Dropout MLP
classifier = Dropout(input=x, label=y, \
n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \
rng=rng, activation=ReLU)
# train
classifier.train(epochs=n_epochs, dropout=dropout, \
p_dropout=p_dropout, rng=rng)
# test
print classifier.predict(x)
if __name__ == "__main__":
test_dropout()
@ilkarman
Copy link

ilkarman commented Apr 6, 2017

Shouldn't def dsigmoid(x): return sigmoid(x) * (1. - sigmoid(x)) ?

@smakosh
Copy link

smakosh commented Aug 12, 2017

yeah it's def sigmoid_p(x) : return sigmoid(x) * (1-sigmoid(x))

@mouryarishik
Copy link

U r right, but see the use of dsigmoid in the code. There they are passing the predictions of different hidden layers, which are already passed through sigmoid as argument, so we don't need to again pass them through sigmoid function.

@Kaiyoto
Copy link

Kaiyoto commented Nov 20, 2018

Idk why but if I change the data, I am getting nan as the prediction

@umamakhalid
Copy link

which function will be used with softmax to take derivative?

@JohnNesbit
Copy link

Kaiyoto it is probably because of overflow

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment