Skip to content

Instantly share code, notes, and snippets.

@fedden
Created April 21, 2018 17:27
Show Gist options
  • Save fedden/852d1670d8f112faba4bd4d4caaad543 to your computer and use it in GitHub Desktop.
Save fedden/852d1670d8f112faba4bd4d4caaad543 to your computer and use it in GitHub Desktop.
class CharRNN():
def __init__(self,
vocabulary_size,
sequence_length,
dropout_rate=0.0,
batch_size=32,
rnn_size=128,
amount_layers=2,
embedding_size=32,
learning_rate=0.001,
clip_norm=5.0):
"""Construct the CharRNN.
Sets the key member fields and then builds the Keras network.
Params:
vocabulary_size: int - the number of unique chars in the dictionary.
sequence_length: int - the length of the sequences to be input of the RNN.
dropout_rate: float - the drop rate of neurons in the network.
batch_size: int - the size of the batch of sequences passed to the network.
rnn_size: int - the size of the RNN memory.
amount_layers: int - the depth of the RNN.
embedding_size: int - the size of the embedding vectors at the input of the RNN.
learning_rate: float - the size of the gradient updates to the neural network.
clip_norm: float - the magnitude the gradient is clipped by the maximum l2 norm.
"""
# Set the member fields.
self.vocabulary_size = vocabulary_size
self.embedding_size = embedding_size
self.batch_size = batch_size
self.dropout_rate = dropout_rate
self.amount_layers = amount_layers
self.rnn_size = rnn_size
self.sequence_length = sequence_length
self.learning_rate = learning_rate
self.clip_norm = clip_norm
# Build the model.
self.model = self.build_model()
def build_model(self):
"""Builds the model.
Called in the constructor.
Returns:
model: Keras Sequential - the RNN used to model the biographies!
"""
# Model container object.
model = Sequential()
# Create an embedding layer to map sequences of tokens to sequences of vectors, of
# embedding size length.
model.add(Embedding(self.vocabulary_size,
self.embedding_size,
input_length=self.sequence_length))
# Add dropout (if the rate is greater than zero!)
model.add(Dropout(self.dropout_rate))
# Stack layers of RNNs.
for layer_number in range(self.amount_layers):
# If last layer, return last value, not a sequence.
many = layer_number != (self.amount_layers - 1)
model.add(LSTM(self.rnn_size, return_sequences=many))
model.add(Dropout(self.dropout_rate))
# Add a dense layer to resize the output to be the number of unique chars in the bio dataset.
model.add(Dense(self.vocabulary_size, activation="softmax"))
# RMS prop is good for RNNs!
optimiser = RMSprop(self.learning_rate, clipnorm=self.clip_norm)
# This allows us to use tokens as targets with categorical cross entropy.
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimiser)
return model
def inference(self,
start_tokens,
dataset,
inference_length=400,
temperature=0.2,
include_start_tokens=True):
"""Use the trained model.
Samples the model with a temperature, the greater the value the more random the output is.
The start tokens could either be completely random or sampled from the dataset.
Params:
start_tokens: list(int) - the tokens to start the model with.
dataset: SequenceDataset - the dataset class to convert the tokens to chars.
inference_length: int - the length of the generated material.
temperature: float - the randomness of the genrated material.
Returns:
generated: str - string of generated characters.
"""
# Helper function to sample an index from a probability array (the output layer of the network!)
def sample(preds, temperature=1.0):
preds = np.asarray(preds).astype('float64')
preds = np.log(preds) / temperature
exp_preds = np.exp(preds)
preds = exp_preds / np.sum(exp_preds)
probas = np.random.multinomial(1, preds, 1)
return np.argmax(probas)
# Sanity check.
if len(start_tokens) != self.sequence_length:
raise ValueError('Argument start_tokens must be sequence length!')
# Don't operate on batches of sequences, just a single one. Convert numpy
# arrays to lists.
if type(start_tokens) is np.ndarray:
if start_tokens.ndim > 1:
raise ValueError('Pass in a single sequence, not >=2D')
start_tokens = start_tokens.tolist()
# String to hold the generated chars.
generated = ''
# Tokens to be inputted into the model.
tokens = start_tokens
# Loop and generate the material.
for _ in range(inference_length):
# Create the inputs in a batch shape.
model_inputs = np.array(tokens).reshape((1, self.sequence_length))
# Get the predicted vector based on the inputs.
preds = self.model.predict(model_inputs, verbose=0)[0]
# Derive next token and then char from predicted vector.
next_token = sample(preds, temperature)
next_char = dataset.token_to_char[next_token]
# Store char and create next input token sequence.
generated += next_char
tokens = tokens[1:] + [next_token]
return generated
def save(self, path='model.h5'):
self.model.save(path)
def load(self, path='model.h5'):
self.model = load_model(path)
def train(self, dataset, epochs=60, print_inference_progress=False):
"""Optimise the model on the dataset.
Params:
dataset: SequenceDataset - the dataset class to convert the tokens to chars.
epochs: int - the number of times the model will see each data point in the test set.
print_inference_progress: bool - print the progress of the model at the end of the epoch.
"""
# Function invoked at end of each epoch. Prints generated text.
def on_epoch_end(epoch, logs):
print()
print('----- Generating text after Epoch: %d' % epoch)
for temperature in [0.0, 0.25, 0.5, 0.8]:
print('----- temperature:', temperature)
results = self.inference(dataset.empty_start_tokens,
dataset,
inference_length=400,
temperature=temperature,
include_start_tokens=True)
print(results)
# Potentially print progress of model during training.
callbacks = [LambdaCallback(on_epoch_end=on_epoch_end)] if print_inference_progress else []
# Fit the model on the dataset.
self.model.fit(dataset.dataset_x,
dataset.dataset_y,
batch_size=self.batch_size,
epochs=epochs,
callbacks=callbacks)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment