lettergram/mlp_sentence_type_classifcation.py

## mlp_sentence_type_classifcation.py
max_words, batch_size, epochs = 10000, 256, 3

# Generate split training and testing data (80% training, 20% testing)
x_train, x_test, y_train, y_test = load_encoded_data(data_split=0.8)

# Determine the number of classes (i.e. sentence types)
num_classes = np.max(y_train) + 1

# Vectorizing the data into a Keras readable format (sequence matrix)
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')

# Set the classifcation to a Keras readable format (categorical)
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# I(max_words) --(W1)--> H(512)
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('tanh'))

# H(512) --(W2)--> O(num_classes)
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# Add Optimization method, loss function, and optimization variable
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

# "Fit Model" (i.e. train model), using training data (80% of dataset)
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs)

# Evaluate the trained model, using the test data (20% of dataset)
score = model.evaluate(x_test, y_test,batch_size=batch_size)

# Final testing accuracy, using the resevered 20% testing data
print('Test accuracy:', score[1])
	max_words, batch_size, epochs = 10000, 256, 3

	# Generate split training and testing data (80% training, 20% testing)
	x_train, x_test, y_train, y_test = load_encoded_data(data_split=0.8)

	# Determine the number of classes (i.e. sentence types)
	num_classes = np.max(y_train) + 1

	# Vectorizing the data into a Keras readable format (sequence matrix)
	tokenizer = Tokenizer(num_words=max_words)
	x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
	x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')

	# Set the classifcation to a Keras readable format (categorical)
	y_train = keras.utils.to_categorical(y_train, num_classes)
	y_test = keras.utils.to_categorical(y_test, num_classes)

	# I(max_words) --(W1)--> H(512)
	model = Sequential()
	model.add(Dense(512, input_shape=(max_words,)))
	model.add(Activation('tanh'))

	# H(512) --(W2)--> O(num_classes)
	model.add(Dropout(0.5))
	model.add(Dense(num_classes))
	model.add(Activation('softmax'))

	# Add Optimization method, loss function, and optimization variable
	model.compile(loss='categorical_crossentropy',
	optimizer='adam', metrics=['accuracy'])

	# "Fit Model" (i.e. train model), using training data (80% of dataset)
	model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs)

	# Evaluate the trained model, using the test data (20% of dataset)
	score = model.evaluate(x_test, y_test,batch_size=batch_size)

	# Final testing accuracy, using the resevered 20% testing data
	print('Test accuracy:', score[1])