Last active
August 29, 2015 14:14
-
-
Save lzamparo/8a92e1f57c3e89d1569e to your computer and use it in GitHub Desktop.
help interpreting output of MonitorMode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano.tensor as T | |
from theano import shared, config | |
from theano.tensor.shared_randomstreams import RandomStreams | |
class AutoEncoder(object): | |
def __init__(self, numpy_rng=None, theano_rng=None, input=None, n_visible=784, n_hidden=500, | |
W=None, bhid=None, bvis=None, W_name=None, bvis_name=None, bhid_name=None, sparse_init=-1): | |
""" A de-noising AutoEncoder class from theano tutorials. | |
:type numpy_rng: numpy.random.RandomState | |
:param numpy_rng: number random generator used to generate weights | |
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams | |
:param theano_rng: Theano random generator; if None is given one is generated | |
based on a seed drawn from `rng` | |
:type input: theano.tensor.TensorType | |
:paran input: a symbolic description of the input or None for standalone | |
dA | |
:type n_visible: int | |
:param n_visible: number of visible units | |
:type n_hidden: int | |
:param n_hidden: number of hidden units | |
:type W: theano.tensor.TensorType | |
:param W: Theano variable pointing to a set of weights that should be | |
shared Theano variables connecting the visible and hidden layers. | |
:type bhid: theano.tensor.TensorType | |
:param bhid: Theano variable pointing to a set of biases values (for | |
hidden units). | |
:type bvis: theano.tensor.TensorType | |
:param bvis: Theano variable pointing to a set of biases values (for | |
visible units). | |
:type W_name: string | |
:param W_name: name to be assigned to the W matrix. | |
:type bvis_name: string | |
:param bvis_name: name to be assigned to the b vector for the visible units. | |
:type bhid_name: string | |
:param bhid_name: name to be assigned to the b vector for the hidden units. | |
:type sparse_init: int | |
:param sparse_init: Initialize the weight matrices using Martens sparse initialization (Martens ICML 2010) | |
>0 specifies the number of units in the layer that have initial weights drawn from | |
a N(0,1). Use -1 for Glorot & Bengio (i.e dense) init. | |
""" | |
self.n_visible = n_visible | |
self.n_hidden = n_hidden | |
if numpy_rng is None: | |
raise AssertionError("numpy_rng cannot be unspecified in AutoEncoder.__init__") | |
# create a Theano random generator that gives symbolic random values | |
if theano_rng is None: | |
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) | |
self.theano_rng = theano_rng | |
if W_name is None: | |
W_name = 'W' | |
if bvis_name is None: | |
bvis_name = 'bvis' | |
if bhid_name is None: | |
bhid_name = 'bhid' | |
if not W: | |
if sparse_init > 0: | |
initial_W = self.sparse_w(n_visible, n_hidden, sparse_init) | |
else: | |
initial_W = self.dense_w(n_visible, n_hidden, numpy_rng) | |
W = shared(value=initial_W, name=W_name) | |
self.W = W | |
# Tie the weights of the decoder to the encoder | |
self.W_prime = self.W.T | |
# Bias of the visible units | |
if not bvis: | |
bvis = shared(value=np.zeros(n_visible, | |
dtype = config.floatX), name = bvis_name) | |
self.b_prime = bvis | |
# Bias of the hidden units | |
if not bhid: | |
bhid = shared(value=np.zeros(n_hidden, | |
dtype = config.floatX), name = bhid_name) | |
self.b = bhid | |
if input is None: | |
self.x = T.dmatrix(name='input') | |
else: | |
self.x = input | |
self.params = [self.W, self.b, self.b_prime] | |
def get_corrupted_input(self, input, corruption_level): | |
""" This function keeps ``1-corruption_level`` entries of the inputs the same | |
and zero-out randomly selected subset of size ``coruption_level`` | |
Note : first argument of theano.rng.binomial is the shape(size) of | |
random numbers that it should produce | |
second argument is the number of trials | |
third argument is the probability of success of any trial | |
this will produce an array of 0s and 1s where 1 has a probability of | |
1 - ``corruption_level`` and 0 with ``corruption_level`` | |
""" | |
return T.cast(self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level),config.floatX) * input | |
def get_hidden_values(self, input): | |
""" Compute the values of the hidden layer """ | |
raise NotImplementedError(str(type(self))+ " does not implement get_hidden_values.") | |
def get_reconstructed_input(self, hidden): | |
""" Compute the reconstructed input given the hidden rep'n """ | |
raise NotImplementedError(str(type(self))+ " does not implement get_reconstructed_input.") | |
def get_cost_updates(self, corruption_level, learning_rate): | |
""" Compute the reconstruction error over the mini-batched input | |
taking into account a certain level of corruption of the input """ | |
raise NotImplementedError(str(type(self))+ " does not implement get_cost_updates.") | |
def __getstate__(self): | |
""" Return a tuple of all the important parameters that define this dA """ | |
return (self.W.get_value(), self.b.get_value(), self.b_prime.get_value(), self.n_visible, self.n_hidden) | |
def __setstate__(self, state): | |
""" Set the state of this dA from values returned from a deserialization process like unpickle. """ | |
W, b, b_prime, n_visible, n_hidden = state | |
self.W = shared(value=W, name='W') | |
self.b = shared(value=b, name = 'bvis') | |
self.b_prime = shared(value=b_prime, name= 'bhid') | |
self.n_visible = n_visible | |
self.n_hidden = n_hidden | |
numpy_rng = np.random.RandomState(123) | |
self.theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) | |
self.W_prime = self.W.T | |
self.params = [self.W, self.b, self.b_prime] | |
def get_params(self): | |
""" Return the params of this dA. This is for pickling testing purposes """ | |
return self.params | |
def set_input(self, input): | |
""" Set the input for an unpickled dA """ | |
self.x = input | |
def dropout_from_layer(self, layer, prob): | |
""" Apply masking noise to the hidden (i.e output) layer for this dA. | |
:type layer: theano.shared | |
:param layer: number random generator used to generate weights | |
:type prob: float | |
:param prob: retain each unit in this layer with probability prob """ | |
return T.cast(self.theano_rng.binomial(size=layer.shape, n=1, p=prob),config.floatX) * layer | |
def sparse_w(self, n_visible, n_hidden, sparsity): | |
''' Return a numpy array for a sparse W matrix, the method of Martens (ICML 2010) ''' | |
initial_W = np.zeros((n_visible,n_hidden),dtype = config.floatX) | |
# Make only sparse_init connections from each hidden unit back to each visible unit | |
idx = np.arange(n_hidden) | |
# Don't make more connections than there are hidden units | |
n_connections = min(sparsity,n_hidden) | |
for j in xrange(n_visible): | |
np.random.shuffle(idx) | |
initial_W[j,idx[:n_connections]] = np.random.randn(n_connections) | |
print "... returned sparse init matrix " | |
return initial_W | |
def dense_w(self, n_visible, n_hidden, numpy_rng): | |
''' Return a numpy array for a dense W matrix, the method of Glorot and Bengio (AISTATS2010) ''' | |
initial_W = np.asarray(numpy_rng.uniform( | |
low = -4 * np.sqrt(6. / (n_hidden + n_visible)), | |
high = 4 * np.sqrt(6. / (n_hidden + n_visible)), | |
size = (n_visible, n_hidden)), dtype = config.floatX) | |
print "... returned dense init matrix " | |
return initial_W | |
class BernoulliAutoEncoder(AutoEncoder): | |
def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500, | |
W=None, bhid=None, bvis=None, W_name=None, bvis_name=None, bhid_name=None, sparse_init=-1): | |
""" | |
A de-noising AutoEncoder with [0,1] inputs and hidden values | |
:type numpy_rng: numpy.random.RandomState | |
:param numpy_rng: number random generator used to generate weights | |
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams | |
:param theano_rng: Theano random generator; if None is given one is generated | |
based on a seed drawn from `rng` | |
:type input: theano.tensor.TensorType | |
:paran input: a symbolic description of the input or None for standalone | |
dA | |
:type n_visible: int | |
:param n_visible: number of visible units | |
:type n_hidden: int | |
:param n_hidden: number of hidden units | |
:type W: theano.tensor.TensorType | |
:param W: Theano variable pointing to a set of weights that should be | |
shared Theano variables connecting the visible and hidden layers. | |
:type bhid: theano.tensor.TensorType | |
:param bhid: Theano variable pointing to a set of biases values (for | |
hidden units). | |
:type bvis: theano.tensor.TensorType | |
:param bvis: Theano variable pointing to a set of biases values (for | |
visible units). | |
:type W_name: string | |
:param W_name: name to be assigned to the W matrix. | |
:type bvis_name: string | |
:param bvis_name: name to be assigned to the bvis vector. | |
:type bhid_name: string | |
:param bhid_name: name to be assigned to the bhid vector. | |
:type sparse_init: int | |
:param sparse_init: Initialize the weight matrices using Martens sparse | |
initialization (Martens ICML 2010) >0 specifies the number of units | |
in the layer that have initial weights drawn from a N(0,1). | |
Use -1 for Glorot & Bengio (i.e dense) init. | |
""" | |
super(BernoulliAutoEncoder,self).__init__(numpy_rng, theano_rng, input, n_visible, n_hidden, W, bhid, bvis, W_name, bvis_name, bhid_name,sparse_init) | |
self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b) | |
@classmethod | |
def class_from_values(cls, *args, **kwargs): | |
""" This constructor is intended for dynamically constructing a dA layer subclass | |
Args that always get specified in this constructor: numpy_rng, theano_rng, input, n_visible, n_hidden, W_name, bvis_name, bhid_name | |
Args that *sometimes* get specified in this constructor: W, bvis, bhid, sparse_init | |
""" | |
keys = kwargs.keys() | |
if 'W' not in keys: | |
kwargs['W'] = None | |
if 'bhid' not in keys: | |
kwargs['bhid'] = None | |
if 'bvis' not in keys: | |
kwargs['bvis'] = None | |
if 'sparse_init' not in keys: | |
kwargs['sparse_init'] = -1 | |
return cls(numpy_rng=kwargs['numpy_rng'], theano_rng=kwargs['theano_rng'], input=kwargs['input'], | |
n_visible=kwargs['n_visible'], n_hidden=kwargs['n_hidden'], W=kwargs['W'], | |
bhid=kwargs['bhid'], bvis=kwargs['bvis'], W_name=kwargs['W_name'], | |
bvis_name=kwargs['bvis_name'], bhid_name=kwargs['bhid_name'], sparse_init=kwargs['sparse_init']) | |
def get_hidden_values(self, input): | |
""" Compute the values of the hidden layer """ | |
return T.nnet.sigmoid(T.dot(input, self.W) + self.b) | |
def get_reconstructed_input(self, hidden): | |
""" Compute the reconstructed input given the hidden rep'n """ | |
return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime) | |
def get_cost_updates(self, corruption_level, learning_rate): | |
""" Compute the reconstruction error over the mini-batched input | |
taking into account a certain level of corruption of the input """ | |
x_corrupted = super(BernoulliAutoEncoder,self).get_corrupted_input(self.x, corruption_level) | |
y = self.get_hidden_values(x_corrupted) | |
z = self.get_reconstructed_input(y) | |
# Use the cross entropy loss | |
L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1) | |
cost = T.mean(L) | |
# compute the gradients of the cost of the dA w.r.t the params | |
gparams = T.grad(cost, self.params) | |
# populate the list of updates to each param | |
updates = [] | |
for param, gparam in zip(self.params, gparams): | |
updates.append((param, param - learning_rate * gparam)) | |
return (cost, updates) | |
def get_cost_gparams(self, corruption_level, learning_rate): | |
""" Compute the reconstruction error over the mini-batched input (with corruption). | |
Instead of returning a list of tuples (updates) were the rval has the form of an update to | |
a theano.tensor variable (param, update_value), return instead (param, gparam).""" | |
x_corrupted = super(BernoulliAutoEncoder,self).get_corrupted_input(self.x, corruption_level) | |
y = self.get_hidden_values(x_corrupted) | |
z = self.get_reconstructed_input(y) | |
# Take the sum over columns | |
# Use the squared error loss function | |
L = T.sum((self.x - z) **2, axis = 1) | |
cost = T.mean(L) | |
# compute the gradients of the cost of the dA w.r.t the params | |
gparams = T.grad(cost, self.params) | |
# populate the list of parameter, gradient tuples | |
updates = [] | |
for param, gparam in zip(self.params, gparams): | |
updates.append((param, gparam)) | |
return (cost, updates) | |
class GaussianAutoEncoder(AutoEncoder): | |
def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500, | |
W=None, bhid=None, bvis=None, W_name=None, bvis_name=None, bhid_name=None, sparse_init=-1): | |
""" A de-noising AutoEncoder with Gaussian visible units | |
:type numpy_rng: numpy.random.RandomState | |
:param numpy_rng: number random generator used to generate weights | |
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams | |
:param theano_rng: Theano random generator; if None is given one is generated | |
based on a seed drawn from `rng` | |
:type input: theano.tensor.TensorType | |
:paran input: a symbolic description of the input or None for standalone | |
dA | |
:type n_visible: int | |
:param n_visible: number of visible units | |
:type n_hidden: int | |
:param n_hidden: number of hidden units | |
:type W: theano.tensor.TensorType | |
:param W: Theano variable pointing to a set of weights that should be | |
shared Theano variables connecting the visible and hidden layers. | |
:type bhid: theano.tensor.TensorType | |
:param bhid: Theano variable pointing to a set of biases values (for | |
hidden units). | |
:type bvis: theano.tensor.TensorType | |
:param bvis: Theano variable pointing to a set of biases values (for | |
visible units). | |
:type W_name: string | |
:param W_name: name to be assigned to the W matrix. | |
:type bvis_name: string | |
:param bvis_name: name to be assigned to the bvis vector. | |
:type bhid_name: string | |
:param bhid_name: name to be assigned to the bhid vector. | |
:type sparse_init: int | |
:param sparse_init: Initialize the weight matrices using Martens sparse | |
initialization (Martens ICML 2010) >0 specifies the number of units | |
in the layer that have initial weights drawn from a N(0,1). | |
Use -1 for Glorot & Bengio (i.e dense) init. | |
""" | |
super(GaussianAutoEncoder,self).__init__(numpy_rng, theano_rng, input, n_visible, n_hidden, W, bhid, bvis, W_name, bvis_name, bhid_name,sparse_init) | |
self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b) | |
@classmethod | |
def class_from_values(cls, *args, **kwargs): | |
""" This constructor is intended for dynamically constructing a dA layer subclass | |
Args that always get specified through this constructor: numpy_rng, theano_rng, input, n_visible, n_hidden, W_name, bvis_name, bhid_name. | |
Args that *might* be specified: W, bhid, bvis, sparse_init. | |
""" | |
keys = kwargs.keys() | |
if 'W' not in keys: | |
kwargs['W'] = None | |
if 'bhid' not in keys: | |
kwargs['bhid'] = None | |
if 'bvis' not in keys: | |
kwargs['bvis'] = None | |
if 'sparse_init' not in keys: | |
kwargs['sparse_init'] = -1 | |
return cls(numpy_rng=kwargs['numpy_rng'], theano_rng=kwargs['theano_rng'], input=kwargs['input'], | |
n_visible=kwargs['n_visible'], n_hidden=kwargs['n_hidden'],W=kwargs['W'], | |
bhid=kwargs['bhid'], bvis=kwargs['bvis'],W_name=kwargs['W_name'], | |
bvis_name=kwargs['bvis_name'], bhid_name=kwargs['bhid_name'], sparse_init=kwargs['sparse_init']) | |
def get_hidden_values(self, input): | |
""" Compute the values of the hidden layer """ | |
return T.nnet.sigmoid(T.dot(input, self.W) + self.b) | |
def get_reconstructed_input(self, hidden): | |
""" Use a linear decoder to compute the reconstructed input given the hidden rep'n """ | |
return T.dot(hidden, self.W_prime) + self.b_prime | |
def get_cost_updates(self, corruption_level, learning_rate): | |
""" Compute the reconstruction error over the mini-batched input | |
taking into account a certain level of corruption of the input """ | |
x_corrupted = super(GaussianAutoEncoder,self).get_corrupted_input(self.x, corruption_level) | |
y = self.get_hidden_values(x_corrupted) | |
z = self.get_reconstructed_input(y) | |
# Take the sum over columns | |
# Use the squared error loss function | |
L = T.sum((self.x - z) **2, axis = 1) | |
cost = T.mean(L) | |
# compute the gradients of the cost of the dA w.r.t the params | |
gparams = T.grad(cost, self.params) | |
# populate the list of updates to each param | |
updates = [] | |
for param, gparam in zip(self.params, gparams): | |
updates.append((param, param - learning_rate * gparam)) | |
return (cost, updates) | |
def get_cost_updates_debug(self, corruption_level, learning_rate): | |
""" Compute the reconstruction error over the mini-batched input | |
taking into account a certain level of corruption of the input, return intermediate results """ | |
x_corrupted = super(GaussianAutoEncoder,self).get_corrupted_input(self.x, corruption_level) | |
y = self.get_hidden_values(x_corrupted) | |
z = self.get_reconstructed_input(y) | |
# Take the sum over columns | |
# Use the squared error loss function | |
L = T.sum((self.x - z) **2, axis = 1) | |
cost = T.mean(L) | |
# compute the gradients of the cost of the dA w.r.t the params | |
gparams = T.grad(cost, self.params) | |
# populate the list of updates to each param | |
updates = [] | |
for param, gparam in zip(self.params, gparams): | |
updates.append((param, param - learning_rate * gparam)) | |
return (cost, y, z, updates) | |
def get_cost_gparams(self, corruption_level, learning_rate): | |
""" Compute the reconstruction error over the mini-batched input (with corruption). | |
Instead of returning a list of tuples (updates) were the rval has the form of an update to | |
a theano.tensor variable (param, update_value), return instead (param, gparam).""" | |
x_corrupted = super(GaussianAutoEncoder,self).get_corrupted_input(self.x, corruption_level) | |
y = self.get_hidden_values(x_corrupted) | |
z = self.get_reconstructed_input(y) | |
# Take the sum over columns | |
# Use the squared error loss function | |
L = T.sum((self.x - z) **2, axis = 1) | |
cost = T.mean(L) | |
# compute the gradients of the cost of the dA w.r.t the params | |
gparams = T.grad(cost, self.params) | |
# populate the list of parameter, gradient tuples | |
updates = [] | |
for param, gparam in zip(self.params, gparams): | |
updates.append((param, gparam)) | |
return (cost, updates) | |
class ReluAutoEncoder(AutoEncoder): | |
def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500, | |
W=None, bhid=None, bvis=None, W_name=None, bvis_name=None, bhid_name=None, sparse_init=-1): | |
""" A de-noising AutoEncoder with ReLu visible units | |
:type numpy_rng: numpy.random.RandomState | |
:param numpy_rng: number random generator used to generate weights | |
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams | |
:param theano_rng: Theano random generator; if None is given one is generated | |
based on a seed drawn from `rng` | |
:type input: theano.tensor.TensorType | |
:paran input: a symbolic description of the input or None for standalone | |
dA | |
:type n_visible: int | |
:param n_visible: number of visible units | |
:type n_hidden: int | |
:param n_hidden: number of hidden units | |
:type W: theano.tensor.TensorType | |
:param W: Theano variable pointing to a set of weights that should be | |
shared Theano variables connecting the visible and hidden layers. | |
:type bhid: theano.tensor.TensorType | |
:param bhid: Theano variable pointing to a set of biases values (for | |
hidden units). | |
:type bvis: theano.tensor.TensorType | |
:param bvis: Theano variable pointing to a set of biases values (for | |
visible units). | |
:type W_name: string | |
:param W_name: name to be assigned to the W matrix. | |
:type bvis_name: string | |
:param bvis_name: name to be assigned to the bvis vector. | |
:type bhid_name: string | |
:param bhid_name: name to be assigned to the bhid vector. | |
:type sparse_init: int | |
:param sparse_init: Initialize the weight matrices using Martens sparse | |
initialization (Martens ICML 2010) >0 specifies the number of units | |
in the layer that have initial weights drawn from a N(0,1). | |
Use -1 for Glorot & Bengio (i.e dense) init. | |
""" | |
# ReLU units require a different weight matrix initialization scheme | |
#if W_name is None: | |
#W_name = 'W' | |
#if bvis_name is None: | |
#bvis_name = 'bvis' | |
#if bhid_name is None: | |
#bhid_name = 'bhid' | |
#if W is None: | |
#if sparse_init > 0: | |
#initial_W = super(ReluAutoEncoder,self).sparse_w(n_visible,n_hidden,sparse_init) | |
#else: | |
#initial_W = super(ReluAutoEncoder,self).dense_w(n_visible,n_hidden,numpy_rng) | |
#W = shared(value=initial_W, name=W_name) | |
#if bvis is None: | |
#bvis = shared(value=np.zeros(n_visible, dtype = config.floatX), name = bvis_name) | |
#if bhid is None: | |
#bhid = shared(value=np.zeros(n_hidden, dtype = config.floatX), name = bhid_name) | |
super(ReluAutoEncoder,self).__init__(numpy_rng, theano_rng, input, n_visible, n_hidden, W, bhid, bvis, W_name, bvis_name, bhid_name,sparse_init) | |
self.output = T.maximum(T.dot(input, self.W) + self.b, 0.0) | |
@classmethod | |
def class_from_values(cls, *args, **kwargs): | |
""" This constructor is intended for dynamically constructing a dA layer subclass | |
Args that always get specified through this version of the constructor: | |
numpy_rng, theano_rng, input, n_visible, n_hidden, W_name, bvis_name, bhid_name. | |
Args that *might* be specified: W, bhid, bvis. | |
""" | |
keys = kwargs.keys() | |
if 'W' not in keys: | |
kwargs['W'] = None | |
if 'bhid' not in keys: | |
kwargs['bhid'] = None | |
if 'bvis' not in keys: | |
kwargs['bvis'] = None | |
if 'sparse_init' not in keys: | |
kwargs['sparse_init'] = -1 | |
return cls(numpy_rng=kwargs['numpy_rng'], theano_rng=kwargs['theano_rng'], input=kwargs['input'], | |
n_visible=kwargs['n_visible'], n_hidden=kwargs['n_hidden'], W=kwargs['W'], | |
bhid=kwargs['bhid'], bvis=kwargs['bvis'], W_name=kwargs['W_name'], | |
bvis_name=kwargs['bvis_name'], bhid_name=kwargs['bhid_name'], | |
sparse_init=kwargs['sparse_init']) | |
def get_reconstructed_input(self, hidden): | |
""" Use a linear decoder to compute the reconstructed input given the hidden rep'n """ | |
return T.dot(hidden, self.W_prime) + self.b_prime | |
def get_hidden_values(self, input): | |
""" Apply ReLu elementwise to the transformed input """ | |
return T.maximum(T.dot(input, self.W) + self.b, 0.0) | |
def get_cost_updates(self, corruption_level, learning_rate): | |
""" Compute the reconstruction error over the mini-batched input | |
taking into account a certain level of corruption of the input """ | |
x_corrupted = super(ReluAutoEncoder,self).get_corrupted_input(self.x, corruption_level) | |
y = self.get_hidden_values(x_corrupted) | |
z = self.get_reconstructed_input(y) | |
# Take the sum over columns | |
# Use the squared error loss function | |
L = T.sum((self.x - z) **2, axis = 1) | |
cost = T.mean(L) | |
# compute the gradients of the cost of the dA w.r.t the params | |
gparams = T.grad(cost, self.params) | |
# populate the list of updates to each param | |
updates = [] | |
for param, gparam in zip(self.params, gparams): | |
updates.append((param, param - learning_rate * gparam)) | |
return (cost, updates) | |
def get_cost_updates_debug(self, corruption_level, learning_rate): | |
""" Compute the reconstruction error over the mini-batched input | |
taking into account a certain level of corruption of the input, return intermediate results """ | |
x_corrupted = super(ReluAutoEncoder,self).get_corrupted_input(self.x, corruption_level) | |
y = self.get_hidden_values(x_corrupted) | |
z = self.get_reconstructed_input(y) | |
# Take the sum over columns | |
# Use the squared error loss function | |
L = T.sum((self.x - z) **2, axis = 1) | |
cost = T.mean(L) | |
# compute the gradients of the cost of the dA w.r.t the params | |
gparams = T.grad(cost, self.params) | |
# populate the list of updates to each param | |
updates = [] | |
for param, gparam in zip(self.params, gparams): | |
updates.append((param, param - learning_rate * gparam)) | |
return (cost, y, z, updates) | |
def get_cost_gparams(self, corruption_level, learning_rate): | |
""" Compute the reconstruction error over the mini-batched input (with corruption) | |
But instead of returning a list of tuples (updates) were the rval has the form of an update to | |
a theano.tensor variable (param, update_value), return instead (param, gparam).""" | |
x_corrupted = super(ReluAutoEncoder,self).get_corrupted_input(self.x, corruption_level) | |
y = self.get_hidden_values(x_corrupted) | |
z = self.get_reconstructed_input(y) | |
# Take the sum over columns | |
# Use the squared error loss function | |
L = T.sum((self.x - z) **2, axis = 1) | |
cost = T.mean(L) | |
# compute the gradients of the cost of the dA w.r.t the params | |
gparams = T.grad(cost, self.params) | |
# populate the list of parameter, gradient tuples | |
updates = [] | |
for param, gparam in zip(self.params, gparams): | |
updates.append((param, gparam)) | |
return (cost, updates) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Traceback (most recent call last): | |
File "ReLU_vs_GB_test_script.py", line 184, in <module> | |
drive_dA() | |
File "ReLU_vs_GB_test_script.py", line 169, in drive_dA | |
c.append(train_da(batch_index)) | |
File "/home/z/zhaolei/lzamparo/lib/python2.7/site-packages/theano/compile/function_module.py", line 579, in __call__ | |
outputs = self.fn() | |
File "/home/z/zhaolei/lzamparo/lib/python2.7/site-packages/theano/gof/link.py", line 706, in f | |
raise_with_op(node, thunk) | |
File "/home/z/zhaolei/lzamparo/lib/python2.7/site-packages/theano/gof/link.py", line 704, in f | |
wrapper(i, node, *thunks) | |
File "/home/z/zhaolei/lzamparo/lib/python2.7/site-packages/theano/gof/link.py", line 719, in wrapper | |
f(*args) | |
File "/home/z/zhaolei/lzamparo/lib/python2.7/site-packages/theano/compile/monitormode.py", line 64, in eval | |
self.post_func(i, node, fn) | |
File "ReLU_vs_GB_test_script.py", line 140, in detect_nan | |
if numpy.isnan(output[0]).any(): | |
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule 'safe' | |
Apply node that caused the error: RandomFunction{binomial}(<RandomStateType>, Elemwise{Cast{int32}}.0, TensorConstant{1}, TensorConstant{1.0}) | |
Inputs shapes: [(911, 800), (1, 1), 'No shapes', 'No shapes'] | |
Inputs strides: [(800, 1), (0, 0), 'No strides', 'No strides'] | |
Inputs types: [<theano.tensor.raw_random.RandomStateType object at 0x5e23750>, TensorType(int32, vector), TensorType(int8, scalar), TensorType(float32, scalar)] | |
Debugprint of the apply node: | |
RandomFunction{binomial}.0 [@A] <RandomStateType> '' | |
|<RandomStateType> [@B] <RandomStateType> | |
|Elemwise{Cast{int32}} [@C] <TensorType(int32, vector)> '' | |
| |MakeVector [@D] <TensorType(int64, vector)> '' | |
| |Elemwise{Composite{[Composite{[Composite{[Composite{[Composite{[Composite{[sub(i0, Switch(i1, i2, i0))]}(i0, LT(i1, i0), i1)]}(Composite{[Switch(LT(i0, i1), i1, i0)]}(i0, i1), Composite{[Switch(LT(i0, i1), i1, i0)]}(i2, i1))]}(Composite{[Switch(GE(i0, i1), i1, i0)]}(i0, i1), i2, Composite{[Switch(GE(i0, i1), i1, i0)]}(i3, i1))]}(Composite{[Switch(LT(i0, i1), i2, i0)]}(i0, i1, i2), i3, i1, Composite{[Switch(LT(i0, i1), i1, i0)]}(i4, i1))]}(Composite{[Switch(LT(i0, i1), add(i0, i2), i0)]}(i0, i1, i2), i1, i3, i2, Composite{[Switch(LT(i0, i1), add(i0, i2), i0)]}(i4, i1, i2))]}}[(0, 0)] [@E] <TensorType(int64, scalar)> '' | |
| | |Elemwise{Composite{[mul(i0, add(i1, i2))]}} [@F] <TensorType(int64, scalar)> '' | |
| | | |TensorConstant{20} [@G] <TensorType(int64, scalar)> | |
| | | |TensorConstant{1} [@H] <TensorType(int64, scalar)> | |
| | | |<TensorType(int64, scalar)> [@I] <TensorType(int64, scalar)> | |
| | |TensorConstant{0} [@J] <TensorType(int8, scalar)> | |
| | |Shape_i{0} [@K] <TensorType(int64, scalar)> '' | |
| | | |<CudaNdarrayType(float32, matrix)> [@L] <CudaNdarrayType(float32, matrix)> | |
| | |TensorConstant{-1} [@M] <TensorType(int8, scalar)> | |
| | |Elemwise{mul,no_inplace} [@N] <TensorType(int64, scalar)> '' | |
| | |TensorConstant{20} [@G] <TensorType(int64, scalar)> | |
| | |<TensorType(int64, scalar)> [@I] <TensorType(int64, scalar)> | |
| |Shape_i{1} [@O] <TensorType(int64, scalar)> '' | |
| |<CudaNdarrayType(float32, matrix)> [@L] <CudaNdarrayType(float32, matrix)> | |
|TensorConstant{1} [@P] <TensorType(int8, scalar)> | |
|TensorConstant{1.0} [@Q] <TensorType(float32, scalar)> | |
RandomFunction{binomial}.1 [@A] <TensorType(int64, matrix)> '' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Experiment script to train ReLU autoencoder in MonitorMode | |
import numpy | |
import theano | |
import theano.tensor as T | |
from theano.tensor.shared_randomstreams import RandomStreams | |
from AutoEncoder import AutoEncoder | |
from AutoEncoder import ReluAutoEncoder | |
import os | |
import sys | |
import time | |
from datetime import datetime | |
from optparse import OptionParser | |
def drive_dA(learning_rate=0.001, training_epochs=50, | |
batch_size=20, data_shape=(10000,916)): | |
""" | |
This dA is driven with foci data | |
:type learning_rate: float | |
:param learning_rate: learning rate used for training the DeNosing | |
AutoEncoder | |
:type training_epochs: int | |
:param training_epochs: number of epochs used for training | |
:type batch_size: int | |
:param batch_size: size of each minibatch | |
""" | |
parser = OptionParser() | |
parser.add_option("-d", "--dir", dest="dir", help="test output directory") | |
parser.add_option("-c", "--corruption", dest="corruption", help="use this amount of corruption for the denoising AE", type="float") | |
(options, args) = parser.parse_args() | |
today = datetime.today() | |
day = str(today.date()) | |
hour = str(today.time()) | |
corruptn = str(options.corruption) | |
# generate simulated data with a given shape | |
data = numpy.random.randn(data_shape[0],data_shape[1]) | |
train_set_x = theano.shared(data,dtype=theano.config.floatX,borrow=True) | |
# compute number of minibatches for training, validation and testing | |
n_train_batches = data_shape[0] / batch_size | |
n_cols = data_shape[1] | |
# allocate symbolic variables for the data | |
index = T.lscalar() # index to a [mini]batch | |
x = T.matrix('x') # the data matrix | |
########## | |
# Build the ReLU dA | |
########## | |
output_filename = "relu_da." + "corruption_" + corruptn + "_" + day + "." + hour | |
current_dir = os.getcwd() | |
os.chdir(options.dir) | |
output_file = open(output_filename,'w') | |
os.chdir(current_dir) | |
print >> output_file, "Run on " + str(datetime.now()) | |
rng = numpy.random.RandomState(6789) | |
theano_rng = RandomStreams(rng.randint(2 ** 30)) | |
da = ReluAutoEncoder(numpy_rng=rng, theano_rng=theano_rng, input=x, | |
n_visible=n_cols, n_hidden=800) | |
cost, updates = da.get_cost_updates(corruption_level=float(options.corruption), | |
learning_rate=learning_rate) | |
# monitor for NaN here | |
def detect_nan(i, node, fn): | |
for output in fn.outputs: | |
if numpy.isnan(output[0]).any(): | |
print '*** NaN detected ***' | |
theano.printing.debugprint(node) | |
print 'Inputs : %s' % [input[0] for input in fn.inputs] | |
print 'Outputs: %s' % [output[0] for output in fn.outputs] | |
break | |
train_da = theano.function([index], cost, updates=updates, | |
givens={x: train_set_x[index * batch_size: | |
(index + 1) * batch_size]}, | |
mode=theano.compile.MonitorMode(post_func=detect_nan)) | |
start_time = time.clock() | |
########## | |
# Train the model | |
########## | |
# go through training epochs | |
for epoch in xrange(training_epochs): | |
# go through training set | |
c = [] | |
for batch_index in xrange(n_train_batches): | |
c.append(train_da(batch_index)) | |
print >> output_file, 'Training epoch %d, cost ' % epoch, numpy.mean(c) | |
end_time = time.clock() | |
training_time = (end_time - start_time) | |
print >> output_file, ('The ' + str(options.corruption) + ' corruption code for file ' + | |
os.path.split(__file__)[1] + | |
' ran for %.2fm' % ((training_time) / 60.)) | |
output_file.close() | |
if __name__ == '__main__': | |
drive_dA() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment