bhaettasch/keras_rnn_multi_input_sample.py

## keras_rnn_multi_input_sample.py
import math

import numpy as np
from keras.callbacks import EarlyStopping
from keras.layers import recurrent
from keras.layers.core import Dense, Merge
from keras.models import Sequential


VALIDATION_SPLIT = 0.1
EVALUATION_SPLIT = 0.95

MULTI_INPUT_COUNT = 4

INPUT_DIM = 300 # size of a w2v-Vector
BATCH_SIZE = 32
EPOCHS = 20


# Generate and use sample data of the needed structure: list(np.array(np.array(float))) and list(int)
# The outer list holds all sentences. After spliting training and eval data, this list will be transformed into an
# numpy array. The middle array holds all words of the sentence and the inner one is the word vector
# To make this work, the word count (len of middle array) must be the same for all sentences
# This can be archived by padding (pre or post) with np.zeros(INPUT_DIM),
# where INPUT_DIM is the count of dimensions each word vector has
# This example should deliver an accuracy of 1.00, exchange np.zeros with np.ones to get 0.5 accuracy
x = [np.array([np.ones(INPUT_DIM)] * 5), np.array([np.zeros(INPUT_DIM)] * 5)] * 250
X = [x] * MULTI_INPUT_COUNT
y = [1, 0] * 250


# Calculate split point for training and evaluation data
train_eval_border = math.floor(len(y) * EVALUATION_SPLIT)
print("Set size: {0}, Training/Test set size: {1}, Evaluation set size: {2}".format(len(y), train_eval_border, len(y)-train_eval_border))

# Build model
print('Build model...')

# Create an input network processing an arbitrary (but through the run fixed) count of sentences
# For every sentence that should be processed at the same time, there is an own input network
input_models = []
for i in range(MULTI_INPUT_COUNT):
    input_model = Sequential()
    input_model.add(recurrent.GRU(200, init='uniform', input_dim=INPUT_DIM))
    input_models.append(input_model)

# The main model combines those individual input networks
# and computes a combined binary value, deciding whether there should be a new paragraph in the
# middle of the input or not
model = Sequential()
model.add(Merge(input_models, mode='concat'))
model.add(Dense(1, activation="hard_sigmoid"))

# Compile model, using optimizers and regression/loss functions optimized for a binary decision
model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")


# Train...
print('Training...')
model.fit([np.array(x[:train_eval_border]) for x in X],
          np.array(y[:train_eval_border]),
          batch_size=BATCH_SIZE,
          nb_epoch=EPOCHS,
          validation_split=VALIDATION_SPLIT,
          show_accuracy=True,
          shuffle="batch",
          callbacks=[EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto')],
          verbose=1)


# ...and evaluate
loss, acc = model.evaluate([np.array(x[train_eval_border:]) for x in X],
                           np.array(y[train_eval_border:]),
                           batch_size=BATCH_SIZE,
                           show_accuracy=True)
print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))
	import math

	import numpy as np
	from keras.callbacks import EarlyStopping
	from keras.layers import recurrent
	from keras.layers.core import Dense, Merge
	from keras.models import Sequential


	VALIDATION_SPLIT = 0.1
	EVALUATION_SPLIT = 0.95

	MULTI_INPUT_COUNT = 4

	INPUT_DIM = 300 # size of a w2v-Vector
	BATCH_SIZE = 32
	EPOCHS = 20


	# Generate and use sample data of the needed structure: list(np.array(np.array(float))) and list(int)
	# The outer list holds all sentences. After spliting training and eval data, this list will be transformed into an
	# numpy array. The middle array holds all words of the sentence and the inner one is the word vector
	# To make this work, the word count (len of middle array) must be the same for all sentences
	# This can be archived by padding (pre or post) with np.zeros(INPUT_DIM),
	# where INPUT_DIM is the count of dimensions each word vector has
	# This example should deliver an accuracy of 1.00, exchange np.zeros with np.ones to get 0.5 accuracy
	x = [np.array([np.ones(INPUT_DIM)] * 5), np.array([np.zeros(INPUT_DIM)] * 5)] * 250
	X = [x] * MULTI_INPUT_COUNT
	y = [1, 0] * 250


	# Calculate split point for training and evaluation data
	train_eval_border = math.floor(len(y) * EVALUATION_SPLIT)
	print("Set size: {0}, Training/Test set size: {1}, Evaluation set size: {2}".format(len(y), train_eval_border, len(y)-train_eval_border))

	# Build model
	print('Build model...')

	# Create an input network processing an arbitrary (but through the run fixed) count of sentences
	# For every sentence that should be processed at the same time, there is an own input network
	input_models = []
	for i in range(MULTI_INPUT_COUNT):
	input_model = Sequential()
	input_model.add(recurrent.GRU(200, init='uniform', input_dim=INPUT_DIM))
	input_models.append(input_model)

	# The main model combines those individual input networks
	# and computes a combined binary value, deciding whether there should be a new paragraph in the
	# middle of the input or not
	model = Sequential()
	model.add(Merge(input_models, mode='concat'))
	model.add(Dense(1, activation="hard_sigmoid"))

	# Compile model, using optimizers and regression/loss functions optimized for a binary decision
	model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")


	# Train...
	print('Training...')
	model.fit([np.array(x[:train_eval_border]) for x in X],
	np.array(y[:train_eval_border]),
	batch_size=BATCH_SIZE,
	nb_epoch=EPOCHS,
	validation_split=VALIDATION_SPLIT,
	show_accuracy=True,
	shuffle="batch",
	callbacks=[EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto')],
	verbose=1)


	# ...and evaluate
	loss, acc = model.evaluate([np.array(x[train_eval_border:]) for x in X],
	np.array(y[train_eval_border:]),
	batch_size=BATCH_SIZE,
	show_accuracy=True)
	print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))