Instantly share code, notes, and snippets.

Embed
What would you like to do?
A TensorFlow implementation of MinimalRNN.
# -*- coding: utf-8 -*-
# Copyright (C) 2017 by Akira TAMAMORI
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.
import tensorflow as tf
from tensorflow.python.ops.rnn_cell import RNNCell
class MinimalRNNCell(RNNCell):
"""MinimalRNN.
This implementation is based on:
Minmin Chen,
"MinimalRNN: Toward More Interpretable and
Trainable Recurrent Neural Networks,"
https://arxiv.org/abs/1711.06788
"""
def __init__(self, num_units, activation=tf.tanh, reuse=None):
self._num_units = num_units
self._activation = activation
@property
def output_size(self):
return self._num_units
@property
def state_size(self):
return self._num_units
def __call__(self, inputs, h_prev, scope=None):
"""Run one step of MinimalRNN."""
with tf.variable_scope(scope or type(self).__name__):
with tf.variable_scope("Inputs"):
z = self._activation(
linear([inputs], self._num_units, use_bias=True))
with tf.variable_scope("Gate"):
u = tf.sigmoid(linear([z, h_prev],
self._num_units, use_bias=True,
bias_start=1.0))
h = u * h_prev + (1 - u) * z
return h, h
def linear(args, output_size, use_bias=False, bias_start=0.0, scope=None):
"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
Args:
args: a 2D Tensor or a list of 2D, batch x n, Tensors.
output_size: int, second dimension of W[i].
bias: boolean, whether to add a bias term or not.
bias_start: starting value to initialize the bias; 0 by default.
scope: VariableScope for the created subgraph; defaults to "Linear".
Returns:
A 2D Tensor with shape [batch x output_size] equal to
sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
Raises:
ValueError: if some of the arguments has unspecified or wrong shape.
"""
if args is None or (isinstance(args, (list, tuple)) and not args):
raise ValueError("`args` must be specified")
if not isinstance(args, (list, tuple)):
args = [args]
# Calculate the total size of arguments on dimension 1.
total_arg_size = 0
shapes = [a.get_shape().as_list() for a in args]
for shape in shapes:
if len(shape) != 2:
raise ValueError(
"Linear is expecting 2D arguments: %s" % str(shapes))
if not shape[1]:
raise ValueError(
"Linear expects shape[1] of arguments: %s" % str(shapes))
else:
total_arg_size += shape[1]
# Now the computation.
with tf.variable_scope(scope or "Linear"):
matrix = tf.get_variable("Matrix", [total_arg_size, output_size])
if len(args) == 1:
res = tf.matmul(args[0], matrix)
else:
res = tf.matmul(tf.concat(1, args), matrix)
if use_bias is False:
return res
bias_term = tf.get_variable(
"Bias", [output_size],
initializer=tf.constant_initializer(bias_start))
return res + bias_term
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment