Created
June 21, 2018 04:13
-
-
Save chatrapathik/fbd37f42984c6310dde200528f36c4f5 to your computer and use it in GitHub Desktop.
keras fit_generator script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pickle | |
import numpy as np | |
import tensorflow as tf | |
from keras.backend.tensorflow_backend import set_session | |
config = tf.ConfigProto() | |
config.gpu_options.per_process_gpu_memory_fraction = 0.4 | |
set_session(tf.Session(config=config)) | |
from keras.models import Sequential, load_model | |
from keras.layers import Dense, LSTM, Dropout | |
from keras.utils import to_categorical | |
from keras import optimizers | |
class StringEmbeddingsScript(object): | |
CHAR_NONE = '\x00' | |
CHAR_START = '\x01' | |
CHAR_END = '\x02' | |
WORDS = ['India', 'keras', 'stackoverflow', 'tensorflow'] | |
def create_model(self, num_units, word_len, num_unique_chars): | |
input_shape = (word_len, num_unique_chars) | |
model = Sequential() | |
model.add(LSTM(num_units, input_shape=input_shape, unroll=True)) | |
model.add(Dense(num_unique_chars, activation='softmax')) | |
model.compile(optimizer=optimizers.Adam(lr=0.003), | |
loss='categorical_crossentropy', | |
metrics=['mse']) | |
return model | |
def get_char_to_int(self): | |
words = self.WORDS | |
max_len = max(len(w) for w in words) + 2 # adding 2 for start and end chars | |
nwords = len(words) | |
chars = list(sorted(list(set(list(''.join(words)))))) | |
chars = [self.CHAR_NONE, self.CHAR_START, self.CHAR_END] + chars | |
charmap = { c: i for i, c in enumerate(chars) } | |
nchars = len(chars) | |
return max_len, nchars, nwords, words, charmap | |
def generator(self, max_len, nchars, nwords, words, charmap, b_size): | |
while 1: | |
char_none = to_categorical(charmap[self.CHAR_NONE], num_classes=nchars) | |
num_batches = (nwords//b_size) + 1 | |
for i in range(num_batches): | |
start = b_size * i | |
end = b_size * (i + 1) | |
split_words = words[start:end] | |
n = len(split_words) | |
data = np.zeros(shape=(n, max_len, nchars), dtype=np.float32) | |
labels = np.zeros(shape=(n, nchars), dtype=np.float32) | |
for i in range(n): | |
w = split_words[i][:-1] | |
last_char = split_words[i][-1] | |
w = '%s%s%s' % (self.CHAR_START, w, self.CHAR_END) | |
w = [to_categorical(charmap[x], num_classes=nchars) for x in w] | |
w = w + ([char_none] * (max_len - len(w))) | |
data[i] = w | |
labels[i] = to_categorical(charmap[last_char], num_classes=nchars | |
yield data, labels | |
def run(self): | |
neurons = 128 | |
max_len, nchars, nwords, words, charmap = self.get_char_to_int() | |
model = self.create_model(neurons, max_len, nchars) | |
generator = self.generator(max_len, nchars, nwords, words, charmap, 2) | |
model.fit_generator(generator, steps_per_epoch= nwords/2, epochs=10) | |
if __name__ == '__main__': | |
StringEmbeddingsScript().run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment