# Example for my blog post at: | |
# http://danijar.com/introduction-to-recurrent-networks-in-tensorflow/ | |
import functools | |
import sets | |
import tensorflow as tf | |
def lazy_property(function): | |
attribute = '_' + function.__name__ | |
@property | |
@functools.wraps(function) | |
def wrapper(self): | |
if not hasattr(self, attribute): | |
setattr(self, attribute, function(self)) | |
return getattr(self, attribute) | |
return wrapper | |
class SequenceLabelling: | |
def __init__(self, data, target, dropout, num_hidden=200, num_layers=3): | |
self.data = data | |
self.target = target | |
self.dropout = dropout | |
self._num_hidden = num_hidden | |
self._num_layers = num_layers | |
self.prediction | |
self.error | |
self.optimize | |
@lazy_property | |
def prediction(self): | |
# Recurrent network. | |
network = tf.nn.rnn_cell.GRUCell(self._num_hidden) | |
network = tf.nn.rnn_cell.DropoutWrapper( | |
network, output_keep_prob=self.dropout) | |
network = tf.nn.rnn_cell.MultiRNNCell([network] * self._num_layers) | |
output, _ = tf.nn.dynamic_rnn(network, data, dtype=tf.float32) | |
# Softmax layer. | |
max_length = int(self.target.get_shape()[1]) | |
num_classes = int(self.target.get_shape()[2]) | |
weight, bias = self._weight_and_bias(self._num_hidden, num_classes) | |
# Flatten to apply same weights to all time steps. | |
output = tf.reshape(output, [-1, self._num_hidden]) | |
prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) | |
prediction = tf.reshape(prediction, [-1, max_length, num_classes]) | |
return prediction | |
@lazy_property | |
def cost(self): | |
cross_entropy = -tf.reduce_sum( | |
self.target * tf.log(self.prediction), [1, 2]) | |
cross_entropy = tf.reduce_mean(cross_entropy) | |
return cross_entropy | |
@lazy_property | |
def optimize(self): | |
learning_rate = 0.003 | |
optimizer = tf.train.RMSPropOptimizer(learning_rate) | |
return optimizer.minimize(self.cost) | |
@lazy_property | |
def error(self): | |
mistakes = tf.not_equal( | |
tf.argmax(self.target, 2), tf.argmax(self.prediction, 2)) | |
return tf.reduce_mean(tf.cast(mistakes, tf.float32)) | |
@staticmethod | |
def _weight_and_bias(in_size, out_size): | |
weight = tf.truncated_normal([in_size, out_size], stddev=0.01) | |
bias = tf.constant(0.1, shape=[out_size]) | |
return tf.Variable(weight), tf.Variable(bias) | |
def read_dataset(): | |
dataset = sets.Ocr() | |
dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target']) | |
dataset['data'] = dataset.data.reshape( | |
dataset.data.shape[:-2] + (-1,)).astype(float) | |
train, test = sets.Split(0.66)(dataset) | |
return train, test | |
if __name__ == '__main__': | |
train, test = read_dataset() | |
_, length, image_size = train.data.shape | |
num_classes = train.target.shape[2] | |
data = tf.placeholder(tf.float32, [None, length, image_size]) | |
target = tf.placeholder(tf.float32, [None, length, num_classes]) | |
dropout = tf.placeholder(tf.float32) | |
model = SequenceLabelling(data, target, dropout) | |
sess = tf.Session() | |
sess.run(tf.initialize_all_variables()) | |
for epoch in range(10): | |
for _ in range(100): | |
batch = train.sample(10) | |
sess.run(model.optimize, { | |
data: batch.data, target: batch.target, dropout: 0.5}) | |
error = sess.run(model.error, { | |
data: test.data, target: test.target, dropout: 1}) | |
print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error)) |
@YiruS: I find this a bit confusing, as the description of the softmax_cross_entropy_with_logits function states "This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. Do not call this op with the output of softmax, as it will produce incorrect results." and here self.prediction is the output of a softmax, so according to the documentation we shouldn't use this function here.
Hi, Thanks very much for the example. It helps me a lot for creating my own training model.
I don't fully understand the setting of weight and bias. Is that only 1 weight and 1 bias are needed even for multiple layers network?
@adelsalehali1982 You may want to install [sets] like this:
$ pip install -e git+https://github.com/danijar/sets/#egg=sets
Now, [AttributeError: 'module' object has no attribute 'Ocr'] problem should disappear.
Thank you for the post.
I am learning RNN but I have a ValueError when I run this code:
I have a hard time to debug this. Could you please help with it?
ValueError: Dimensions must be equal, but are 400 and 328 for 'rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/gru_cell/MatMul_2' (op: 'MatMul') with input shapes: [?,400], [328,400].
@zdarktknight try the below. I refactored the original implementation and commented heavily for easy understanding. Danijar's sets library makes the job super easy as allows us to focus purely on understanding the algorithm. Good luck.
https://github.com/surfertas/deep_learning/blob/master/experiments/1-char_sequence_labeling_lstm.ipynb
@QoT, @adelsalehali1982 I suspect it is due to the name shadowing issue with the standard python library. I'm facing the same problem here. No idea how every other guys run the code successfully...I'm using python 2.7.13, and virtualenv, in a virtual environment dedicated for tensorflow. Running the following command tries to compile danijar's sets library from the source, am I guessing right?
$ pip install -e git+https://github.com/danijar/sets/#egg=sets
Frustratingly, it gives me the following error:
...
File "/Users/jx/ProgramData/tensorflow/src/sets/setup.py", line 94, in finalize_options
super().finalize_options()
TypeError: super() takes at least 1 argument (0 given)
----------------------------------------
Rolling back uninstall of sets
Command "/Users/jx/ProgramData/tensorflow/bin/python2.7 -c "import setuptools,
tokenize;__file__='/Users/jx/ProgramData/tensorflow/src/sets/setup.py';f=getattr(tokenize, 'open', open)
(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" develop --no-deps" failed with error code 1 in
/Users/jx/ProgramData/tensorflow/src/sets/
@sufertas, What you did is awesome. Thank you!!
excuse me,i 'm a new hand in tensorflow, thanks for your bolg which helps me a lot. And i just want to know how can i save the model i trained and use it in another job? looking forward to your reply!