Created
January 24, 2016 05:06
-
-
Save mokemokechicken/026d2a674223b4a74df1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
from cPickle import load | |
import numpy as np | |
import tensorflow as tf | |
from tensorflow.models.rnn import rnn_cell, seq2seq | |
from scipy.stats import entropy | |
def load_data(data_filename): | |
with open(data_filename) as f: | |
train_data, validation_data, test_data = load(f) | |
def flat(ary): | |
return reduce(lambda t, x: t+x, ary, []) | |
return flat(train_data), flat(validation_data), flat(test_data) | |
def JSD(P, Q): | |
_P = P / np.linalg.norm(P, ord=1) | |
_Q = Q / np.linalg.norm(Q, ord=1) | |
_M = 0.5 * (_P + _Q) | |
return 0.5 * (entropy(_P, _M) + entropy(_Q, _M)) | |
def ptb_iterator(raw_data, batch_size, num_steps): | |
"""Iterate on the raw PTB data. | |
This generates batch_size pointers into the raw PTB data, and allows | |
minibatch iteration along these pointers. | |
Args: | |
raw_data: one of the raw data outputs from ptb_raw_data. | |
batch_size: int, the batch size. | |
num_steps: int, the number of unrolls. | |
Yields: | |
Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. | |
The second element of the tuple is the same data time-shifted to the | |
right by one. | |
Raises: | |
ValueError: if batch_size or num_steps are too high. | |
""" | |
raw_data = np.array(raw_data, dtype=np.int32) | |
data_len = len(raw_data) | |
batch_len = data_len // batch_size | |
data = np.zeros([batch_size, batch_len], dtype=np.int32) | |
for i in range(batch_size): | |
data[i] = raw_data[batch_len * i:batch_len * (i + 1)] | |
epoch_size = (batch_len - 1) // num_steps | |
if epoch_size == 0: | |
raise ValueError("epoch_size == 0, decrease batch_size or num_steps") | |
for i in range(epoch_size): | |
x = data[:, i * num_steps:(i + 1) * num_steps] | |
y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1] | |
yield (x, y) | |
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/ptb_word_lm.py | |
class PTBModel(object): | |
"""The PTB model.""" | |
def __init__(self, is_training, config): | |
self.batch_size = batch_size = config.batch_size | |
self.num_steps = num_steps = config.num_steps | |
size = config.hidden_size | |
vocab_size = config.vocab_size | |
self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) | |
self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) | |
# Slightly better results can be obtained with forget gate biases | |
# initialized to 1 but the hyperparameters of the model would need to be | |
# different than reported in the paper. | |
lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) | |
if is_training and config.keep_prob < 1: | |
lstm_cell = rnn_cell.DropoutWrapper( | |
lstm_cell, output_keep_prob=config.keep_prob) | |
cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) | |
self._initial_state = cell.zero_state(batch_size, tf.float32) | |
with tf.device("/cpu:0"): | |
embedding = tf.get_variable("embedding", [vocab_size, size]) | |
inputs = tf.nn.embedding_lookup(embedding, self._input_data) | |
# self.inputs_shape = tf.shape(inputs) # [batch_size, num_steps, size] | |
if is_training and config.keep_prob < 1: | |
inputs = tf.nn.dropout(inputs, config.keep_prob) | |
# Simplified version of tensorflow.models.rnn.rnn.py's rnn(). | |
# This builds an unrolled LSTM for tutorial purposes only. | |
# In general, use the rnn() or state_saving_rnn() from rnn.py. | |
# | |
# The alternative version of the code below is: | |
# | |
from tensorflow.models.rnn import rnn | |
inputs = [tf.squeeze(input_, [1]) | |
for input_ in tf.split(1, num_steps, inputs)] | |
outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) | |
# outputs = [] | |
# states = [] | |
# state = self._initial_state | |
# with tf.variable_scope("RNN"): | |
# for time_step in range(num_steps): | |
# if time_step > 0: tf.get_variable_scope().reuse_variables() | |
# (cell_output, state) = cell(inputs[:, time_step, :], state) | |
# outputs.append(cell_output) | |
# states.append(state) | |
output = tf.reshape(tf.concat(1, outputs), [-1, size]) | |
self._logits = logits = tf.nn.xw_plus_b(output, | |
tf.get_variable("softmax_w", [size, vocab_size]), | |
tf.get_variable("softmax_b", [vocab_size])) | |
self._prob = tf.nn.softmax(logits, "prob") | |
loss = seq2seq.sequence_loss_by_example([logits], | |
[tf.reshape(self._targets, [-1])], | |
[tf.ones([batch_size * num_steps])], | |
vocab_size) | |
self._cost = cost = tf.reduce_sum(loss) / batch_size | |
self._final_state = states[-1] | |
if not is_training: | |
return | |
self._lr = tf.Variable(0.0, trainable=False) | |
tvars = tf.trainable_variables() | |
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), | |
config.max_grad_norm) | |
optimizer = tf.train.GradientDescentOptimizer(self.lr) | |
self._train_op = optimizer.apply_gradients(zip(grads, tvars)) | |
def assign_lr(self, session, lr_value): | |
session.run(tf.assign(self.lr, lr_value)) | |
@property | |
def input_data(self): | |
return self._input_data | |
@property | |
def targets(self): | |
return self._targets | |
@property | |
def initial_state(self): | |
return self._initial_state | |
@property | |
def cost(self): | |
return self._cost | |
@property | |
def final_state(self): | |
return self._final_state | |
@property | |
def lr(self): | |
return self._lr | |
@property | |
def train_op(self): | |
return self._train_op | |
@property | |
def logits(self): | |
return self._logits | |
@property | |
def prob(self): | |
return self._prob | |
class SmallConfig(object): | |
"""Small config.""" | |
num_steps = 25 | |
batch_size = 20 | |
num_layers = 2 | |
hidden_size = 20 | |
init_scale = 0.1 | |
learning_rate = 1.0 | |
max_grad_norm = 5 | |
max_epoch = 4 | |
max_max_epoch = 13 | |
keep_prob = 1.0 | |
lr_decay = 0.5 | |
vocab_size = 11 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment