Skip to content

Instantly share code, notes, and snippets.

@kjanjua26
Created August 8, 2017 10:50
Show Gist options
  • Save kjanjua26/b46388bbde9ded5cf1f077a9f0dedc4f to your computer and use it in GitHub Desktop.
Save kjanjua26/b46388bbde9ded5cf1f077a9f0dedc4f to your computer and use it in GitHub Desktop.
#Handling the imports
import sklearn
from sklearn.model_selection import train_test_split
import pandas
import seaborn as sb
import matplotlib as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
import cv2
from PIL import Image
from keras import backend as K
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation
from keras.layers import Reshape, Lambda
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import GRU
from keras.optimizers import SGD
from keras.utils.data_utils import get_file
from keras.preprocessing import image
import keras.callbacks
import editdistance
import datetime
read_file = pandas.read_csv("/home/kamranjanjua/ownKeras/data.csv")
print "Info: "
print read_file.info()
pandas.isnull(read_file)
#x = read_file.ix[:,0:10]
#y = read_file['gt']
#y = np.asarray(y)
"""
y = np.ravel(read_file.type)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=42)
print "X Train: ", x_train.shape
print "X Test: ", x_test.shape
print "Y Train: ", y_train.shape
print "Y Test: ", y_test.shape
"""
"""
in_data = read_file['path']
in_data = np.asarray(in_data)
print in_data
print "Shape: ", in_data.shape
"""
print "\n"
x = []
y = []
print "AUNN"
print read_file
#Getting the shape of the images and then resizing those to a common height and width
for i in range(0,len(read_file)):
path = read_file['path'][i]
label = read_file['gt'][i]
path = path.strip('\n')
img = cv2.imread(path,0)
#Re-sizing the images
#height = 64, width = 128
#res_img = cv2.resize(img, (128,64))
#cv2.imwrite(i,res_img)
h,w = img.shape
x.append(img)
y.append(label)
size = img.size
"""
print "Height: ", h #Height
print "Width: ", w #Width
print "Channel: ", c #Channel
print "Size: ", size
print "\n"
"""
print "H: ", h
print "W: ", w
print "S: ", size
x = np.array(x).astype(np.float32)
y = np.array(y)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=42)
x_train = np.array(x_train).astype(np.float32)
y_train = np.array(y_train)
x_train = np.array(x_train)
x_test = np.array(x_test)
y_test = np.array(y_test)
print "Printing the shapes. \n"
print "X_train shape: ", x_train.shape
print "Y_train shape: ", y_train.shape
print "X_test shape: ", x_test.shape
print "Y_test shape: ", y_test.shape
print "\n"
#Input Shape for CNN
def getShape(w,h):
input_shape = [w,h,1]
input_shape = np.asarray(input_shape)
input_shape = np.array(input_shape).astype(np.float32)
return input_shape
#print "Input_Shape: ", input_shape
print "Input_Shape: ", getShape(w,h)
"""
#Stacking the images
for i in read_file['path']:
i = i.strip('\n')
img = cv2.imread(i)
h,w,c = img.shape
input_shape = getShape(w,h,c)
stacked_list.append(input_shape)
stacked_list = np.array(input_shape).astype(np.float32)
print stacked_list
"""
#Neural Network Model
def next_train(self):
while 1:
ret = self.get_batch(self.cur_train_index, self.minibatch_size, train=True)
self.cur_train_index += self.minibatch_size
if self.cur_train_index >= self.val_split:
self.cur_train_index = self.cur_train_index % 32
(self.X_text, self.Y_data, self.Y_len) = shuffle_mats_or_lists(
[self.X_text, self.Y_data, self.Y_len], self.val_split)
yield ret
def next_val(self):
while 1:
ret = self.get_batch(self.cur_val_index, self.minibatch_size, train=False)
self.cur_val_index += self.minibatch_size
if self.cur_val_index >= self.num_words:
self.cur_val_index = self.val_split + self.cur_val_index % 32
yield ret
def on_train_begin(self, logs={}):
self.build_word_list(16000, 4, 1)
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
rotate=False, ud=False, multi_fonts=False)
def on_epoch_begin(self, epoch, logs={}):
# rebind the paint function to implement curriculum learning
if epoch >= 3 and epoch < 6:
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
rotate=False, ud=True, multi_fonts=False)
elif epoch >= 6 and epoch < 9:
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
rotate=False, ud=True, multi_fonts=True)
elif epoch >= 9:
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,
rotate=True, ud=True, multi_fonts=True)
if epoch >= 21 and self.max_string_len < 12:
self.build_word_list(32000, 12, 0.5)
def decode_batch(test_func, word_batch):
out = test_func([word_batch])[0]
ret = []
for j in range(out.shape[0]):
out_best = list(np.argmax(out[j, 2:], 1))
out_best = [k for k, g in itertools.groupby(out_best)]
# 26 is space, 27 is CTC blank char
outstr = ''
for c in out_best:
if c >= 0 and c < 26:
outstr += chr(c + ord('a'))
elif c == 26:
outstr += ' '
ret.append(outstr)
return ret
def get_batch(self, index, size, train):
# width and height are backwards from typical Keras convention
# because width is the time dimension when it gets fed into the RNN
if K.image_data_format() == 'channels_first':
X_data = np.ones([size, 1, self.img_w, self.img_h])
else:
X_data = np.ones([size, self.img_w, self.img_h, 1])
labels = np.ones([size, 8])
input_length = np.zeros([size, 1])
label_length = np.zeros([size, 1])
source_str = []
for i in range(0, size):
# Mix in some blank inputs. This seems to be important for
# achieving translational invariance
if train and i > size - 4:
if K.image_data_format() == 'channels_first':
X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[0, :, :].T
else:
X_data[i, 0:self.img_w, :, 0] = self.paint_func('',)[0, :, :].T
labels[i, 0] = self.blank_label
input_length[i] = self.img_w // self.downsample_factor - 2
label_length[i] = 1
source_str.append('')
else:
if K.image_data_format() == 'channels_first':
X_data[i, 0, 0:self.img_w, :] = self.paint_func(self.X_text[index + i])[0, :, :].T
else:
X_data[i, 0:self.img_w, :, 0] = self.paint_func(self.X_text[index + i])[0, :, :].T
labels[i, :] = self.Y_data[index + i]
input_length[i] = self.img_w // self.downsample_factor - 2
label_length[i] = self.Y_len[index + i]
source_str.append(self.X_text[index + i])
inputs = {'the_input': X_data,
'the_labels': labels,
'input_length': input_length,
'label_length': label_length,
'source_str': source_str # used for visualization only
}
outputs = {'ctc': np.zeros([size])} # dummy data for dummy loss function
return (inputs, outputs)
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
y_pred = y_pred[:, 2:, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def train(run_name, start_epoch, stop_epoch, img_w):
# Input Parameters
img_h = h
words_per_epoch = 16000
val_split = 0.2
val_words = int(words_per_epoch * (val_split))
val_split=words_per_epoch - val_words
input_length = np.zeros([size,1])
label_length = np.zeros([size,1])
# Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
if K.image_data_format() == 'channels_first':
input_shape = (1, img_w, img_h)
else:
input_shape = (img_w, img_h, 1)
"""
fdir = os.path.dirname(get_file('wordlists.tgz',
origin='http://www.mythic-ai.com/datasets/wordlists.tgz', untar=True))
img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
minibatch_size=32,
img_w=img_w,
img_h=img_h,
downsample_factor=(pool_size ** 2),
val_split=words_per_epoch - val_words
)
"""
act = 'relu'
print "INPUT TO CONV"
print input_shape
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv1')(input_data)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv2')(inner)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
# cuts down input size going into RNN:
inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
# Two layers of bidirectional GRUs
#gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)
# transforms RNN output to character activations:
inner = Dense(26, kernel_initializer='he_normal',
name='dense2')(concatenate([gru_2, gru_2b]))
y_pred = Activation('softmax', name='softmax')(inner)
Model(inputs=input_data, outputs=y_pred).summary()
#Give the maximum string length
labels = Input(name='the_labels', shape=[8], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
# clipnorm seems to speeds up convergence
sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
#model = Model(inputs=input_data, outputs=loss_out)
model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
if start_epoch > 0:
weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
model.load_weights(weight_file)
# captures output of softmax so we can decode the output during visualization
test_func = K.function([input_data], [y_pred])
#viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
#model.fit(generator=(x_train, y_train, epochs=stop_epoch, validation_data=None, validation_steps=val_words, initial_epoch=start_epoch)
model.fit(next_train(x_train), y_train, batch_size=7, epochs=20, verbose=1, validation_split=0.1, shuffle=True, initial_epoch=0)
"""
model.fit_generator(generator=img_gen.next_train(), steps_per_epoch=(words_per_epoch - val_words),
epochs=stop_epoch, validation_data=img_gen.next_val(), validation_steps=val_words,
callbacks=[viz_cb, img_gen], initial_epoch=start_epoch)
#score, evalute line
"""
if __name__ == '__main__':
run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S')
train(run_name, 0, 20, w) GRU seems to work as well, if not better than LSTM:
@takenoko-str
Copy link

I want to run your awesome code.
would you have any plan for uploading your csv file?
thank you

@yashkumaratri
Copy link

Is this working fine or the error is still present? that you posted on keras issues?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment