mokemokechicken/rnnlib.py

## rnnlib.py
#!/usr/bin/env python
# coding: utf-8
from cPickle import load

import numpy as np
import tensorflow as tf
from tensorflow.models.rnn import rnn_cell, seq2seq
from scipy.stats import entropy


def load_data(data_filename):
    with open(data_filename) as f:
        train_data, validation_data, test_data = load(f)

    def flat(ary):
        return reduce(lambda t, x: t+x, ary, [])

    return flat(train_data), flat(validation_data), flat(test_data)


def JSD(P, Q):
    _P = P / np.linalg.norm(P, ord=1)
    _Q = Q / np.linalg.norm(Q, ord=1)
    _M = 0.5 * (_P + _Q)
    return 0.5 * (entropy(_P, _M) + entropy(_Q, _M))


def ptb_iterator(raw_data, batch_size, num_steps):
    """Iterate on the raw PTB data.
    This generates batch_size pointers into the raw PTB data, and allows
    minibatch iteration along these pointers.
    Args:
      raw_data: one of the raw data outputs from ptb_raw_data.
      batch_size: int, the batch size.
      num_steps: int, the number of unrolls.
    Yields:
      Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
      The second element of the tuple is the same data time-shifted to the
      right by one.
    Raises:
      ValueError: if batch_size or num_steps are too high.
    """
    raw_data = np.array(raw_data, dtype=np.int32)

    data_len = len(raw_data)
    batch_len = data_len // batch_size
    data = np.zeros([batch_size, batch_len], dtype=np.int32)
    for i in range(batch_size):
        data[i] = raw_data[batch_len * i:batch_len * (i + 1)]

    epoch_size = (batch_len - 1) // num_steps

    if epoch_size == 0:
        raise ValueError("epoch_size == 0, decrease batch_size or num_steps")

    for i in range(epoch_size):
        x = data[:, i * num_steps:(i + 1) * num_steps]
        y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1]
        yield (x, y)


# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/ptb_word_lm.py
class PTBModel(object):
    """The PTB model."""

    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                    lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        # self.inputs_shape = tf.shape(inputs)  # [batch_size, num_steps, size]

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        from tensorflow.models.rnn import rnn
        inputs = [tf.squeeze(input_, [1])
                  for input_ in tf.split(1, num_steps, inputs)]
        outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)

        # outputs = []
        # states = []
        # state = self._initial_state
        # with tf.variable_scope("RNN"):
        #     for time_step in range(num_steps):
        #         if time_step > 0: tf.get_variable_scope().reuse_variables()
        #         (cell_output, state) = cell(inputs[:, time_step, :], state)
        #         outputs.append(cell_output)
        #         states.append(state)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        self._logits = logits = tf.nn.xw_plus_b(output,
                                 tf.get_variable("softmax_w", [size, vocab_size]),
                                 tf.get_variable("softmax_b", [vocab_size]))
        self._prob = tf.nn.softmax(logits, "prob")
        loss = seq2seq.sequence_loss_by_example([logits],
                                                [tf.reshape(self._targets, [-1])],
                                                [tf.ones([batch_size * num_steps])],
                                                vocab_size)
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = states[-1]

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

    def assign_lr(self, session, lr_value):
        session.run(tf.assign(self.lr, lr_value))

    @property
    def input_data(self):
        return self._input_data

    @property
    def targets(self):
        return self._targets

    @property
    def initial_state(self):
        return self._initial_state

    @property
    def cost(self):
        return self._cost

    @property
    def final_state(self):
        return self._final_state

    @property
    def lr(self):
        return self._lr

    @property
    def train_op(self):
        return self._train_op

    @property
    def logits(self):
        return self._logits

    @property
    def prob(self):
        return self._prob


class SmallConfig(object):
    """Small config."""
    num_steps = 25
    batch_size = 20
    num_layers = 2
    hidden_size = 20
    init_scale = 0.1
    learning_rate = 1.0
    max_grad_norm = 5
    max_epoch = 4
    max_max_epoch = 13
    keep_prob = 1.0
    lr_decay = 0.5
    vocab_size = 11
	#!/usr/bin/env python
	# coding: utf-8
	from cPickle import load

	import numpy as np
	import tensorflow as tf
	from tensorflow.models.rnn import rnn_cell, seq2seq
	from scipy.stats import entropy


	def load_data(data_filename):
	with open(data_filename) as f:
	train_data, validation_data, test_data = load(f)

	def flat(ary):
	return reduce(lambda t, x: t+x, ary, [])

	return flat(train_data), flat(validation_data), flat(test_data)


	def JSD(P, Q):
	_P = P / np.linalg.norm(P, ord=1)
	_Q = Q / np.linalg.norm(Q, ord=1)
	_M = 0.5 * (_P + _Q)
	return 0.5 * (entropy(_P, _M) + entropy(_Q, _M))


	def ptb_iterator(raw_data, batch_size, num_steps):
	"""Iterate on the raw PTB data.
	This generates batch_size pointers into the raw PTB data, and allows
	minibatch iteration along these pointers.
	Args:
	raw_data: one of the raw data outputs from ptb_raw_data.
	batch_size: int, the batch size.
	num_steps: int, the number of unrolls.
	Yields:
	Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
	The second element of the tuple is the same data time-shifted to the
	right by one.
	Raises:
	ValueError: if batch_size or num_steps are too high.
	"""
	raw_data = np.array(raw_data, dtype=np.int32)

	data_len = len(raw_data)
	batch_len = data_len // batch_size
	data = np.zeros([batch_size, batch_len], dtype=np.int32)
	for i in range(batch_size):
	data[i] = raw_data[batch_len * i:batch_len * (i + 1)]

	epoch_size = (batch_len - 1) // num_steps

	if epoch_size == 0:
	raise ValueError("epoch_size == 0, decrease batch_size or num_steps")

	for i in range(epoch_size):
	x = data[:, i * num_steps:(i + 1) * num_steps]
	y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1]
	yield (x, y)


	# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/ptb_word_lm.py
	class PTBModel(object):
	"""The PTB model."""

	def __init__(self, is_training, config):
	self.batch_size = batch_size = config.batch_size
	self.num_steps = num_steps = config.num_steps
	size = config.hidden_size
	vocab_size = config.vocab_size

	self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
	self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

	# Slightly better results can be obtained with forget gate biases
	# initialized to 1 but the hyperparameters of the model would need to be
	# different than reported in the paper.
	lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
	if is_training and config.keep_prob < 1:
	lstm_cell = rnn_cell.DropoutWrapper(
	lstm_cell, output_keep_prob=config.keep_prob)
	cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

	self._initial_state = cell.zero_state(batch_size, tf.float32)

	with tf.device("/cpu:0"):
	embedding = tf.get_variable("embedding", [vocab_size, size])
	inputs = tf.nn.embedding_lookup(embedding, self._input_data)

	# self.inputs_shape = tf.shape(inputs) # [batch_size, num_steps, size]

	if is_training and config.keep_prob < 1:
	inputs = tf.nn.dropout(inputs, config.keep_prob)

	# Simplified version of tensorflow.models.rnn.rnn.py's rnn().
	# This builds an unrolled LSTM for tutorial purposes only.
	# In general, use the rnn() or state_saving_rnn() from rnn.py.
	#
	# The alternative version of the code below is:
	#
	from tensorflow.models.rnn import rnn
	inputs = [tf.squeeze(input_, [1])
	for input_ in tf.split(1, num_steps, inputs)]
	outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)

	# outputs = []
	# states = []
	# state = self._initial_state
	# with tf.variable_scope("RNN"):
	# for time_step in range(num_steps):
	# if time_step > 0: tf.get_variable_scope().reuse_variables()
	# (cell_output, state) = cell(inputs[:, time_step, :], state)
	# outputs.append(cell_output)
	# states.append(state)

	output = tf.reshape(tf.concat(1, outputs), [-1, size])
	self._logits = logits = tf.nn.xw_plus_b(output,
	tf.get_variable("softmax_w", [size, vocab_size]),
	tf.get_variable("softmax_b", [vocab_size]))
	self._prob = tf.nn.softmax(logits, "prob")
	loss = seq2seq.sequence_loss_by_example([logits],
	[tf.reshape(self._targets, [-1])],
	[tf.ones([batch_size * num_steps])],
	vocab_size)
	self._cost = cost = tf.reduce_sum(loss) / batch_size
	self._final_state = states[-1]

	if not is_training:
	return

	self._lr = tf.Variable(0.0, trainable=False)
	tvars = tf.trainable_variables()
	grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
	config.max_grad_norm)
	optimizer = tf.train.GradientDescentOptimizer(self.lr)
	self._train_op = optimizer.apply_gradients(zip(grads, tvars))

	def assign_lr(self, session, lr_value):
	session.run(tf.assign(self.lr, lr_value))

	@property
	def input_data(self):
	return self._input_data

	@property
	def targets(self):
	return self._targets

	@property
	def initial_state(self):
	return self._initial_state

	@property
	def cost(self):
	return self._cost

	@property
	def final_state(self):
	return self._final_state

	@property
	def lr(self):
	return self._lr

	@property
	def train_op(self):
	return self._train_op

	@property
	def logits(self):
	return self._logits

	@property
	def prob(self):
	return self._prob


	class SmallConfig(object):
	"""Small config."""
	num_steps = 25
	batch_size = 20
	num_layers = 2
	hidden_size = 20
	init_scale = 0.1
	learning_rate = 1.0
	max_grad_norm = 5
	max_epoch = 4
	max_max_epoch = 13
	keep_prob = 1.0
	lr_decay = 0.5
	vocab_size = 11