zckkte/imdb_sentiment_analyser.py

## imdb_sentiment_analyser.py
import keras
from keras import layers
from keras import optimizers
import matplotlib.pyplot as plt
import numpy as np
from keras.models import load_model
from keras.regularizers import l2
from keras.callbacks import EarlyStopping
from keras.layers import Dropout

NUM_WORDS = 10000
NUM_EPOCHS = 100

def plot_acc(res):
    epochs = range(0,len(res.history['val_acc']))
    plt.plot(epochs, res.history['acc'], marker='o', color='red', label="Training accuracy")
    plt.plot(epochs, res.history['val_acc'], marker='o', color='blue', label="Validation accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.show()

(train_data, train_labels), (test_data, test_labels) = keras.datasets.imdb.load_data(num_words=NUM_WORDS)

model = keras.Sequential()
model.add(layers.Dense(16, kernel_regularizer=l2(0.001), activation='relu', input_shape=(NUM_WORDS,))) # kernel_regularizer is equvalient to weight decay reg.
model.add(Dropout(0.2))
model.add(layers.Dense(16, kernel_regularizer=l2(0.001), activation='relu'))
model.add(Dropout(0.2))
model.add(layers.Dense(1,  kernel_regularizer=l2(0.001), activation='sigmoid'))

def multi_hot_sequences(sequences, dimension):
    # Create an all-zero matrix of shape (len(sequences), dimension)
    results = np.zeros((len(sequences), dimension))
    for i, word_indices in enumerate(sequences):
        results[i, word_indices] = 1.0  # set specific indices of results[i] to 1s
    return results

train_data = multi_hot_sequences(train_data, dimension=NUM_WORDS)
test_data = multi_hot_sequences(test_data, dimension=NUM_WORDS)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
res = model.fit(train_data, train_labels, epochs=NUM_EPOCHS,
    validation_data=(test_data, test_labels), callbacks=[es])

model.save('imdb_review_sentiment_with_regularisation.h5')

plot_acc(res)
	import keras
	from keras import layers
	from keras import optimizers
	import matplotlib.pyplot as plt
	import numpy as np
	from keras.models import load_model
	from keras.regularizers import l2
	from keras.callbacks import EarlyStopping
	from keras.layers import Dropout

	NUM_WORDS = 10000
	NUM_EPOCHS = 100

	def plot_acc(res):
	epochs = range(0,len(res.history['val_acc']))
	plt.plot(epochs, res.history['acc'], marker='o', color='red', label="Training accuracy")
	plt.plot(epochs, res.history['val_acc'], marker='o', color='blue', label="Validation accuracy")
	plt.xlabel("Epochs")
	plt.ylabel("Accuracy")
	plt.legend()
	plt.show()

	(train_data, train_labels), (test_data, test_labels) = keras.datasets.imdb.load_data(num_words=NUM_WORDS)

	model = keras.Sequential()
	model.add(layers.Dense(16, kernel_regularizer=l2(0.001), activation='relu', input_shape=(NUM_WORDS,))) # kernel_regularizer is equvalient to weight decay reg.
	model.add(Dropout(0.2))
	model.add(layers.Dense(16, kernel_regularizer=l2(0.001), activation='relu'))
	model.add(Dropout(0.2))
	model.add(layers.Dense(1, kernel_regularizer=l2(0.001), activation='sigmoid'))

	def multi_hot_sequences(sequences, dimension):
	# Create an all-zero matrix of shape (len(sequences), dimension)
	results = np.zeros((len(sequences), dimension))
	for i, word_indices in enumerate(sequences):
	results[i, word_indices] = 1.0 # set specific indices of results[i] to 1s
	return results

	train_data = multi_hot_sequences(train_data, dimension=NUM_WORDS)
	test_data = multi_hot_sequences(test_data, dimension=NUM_WORDS)

	model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

	es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
	res = model.fit(train_data, train_labels, epochs=NUM_EPOCHS,
	validation_data=(test_data, test_labels), callbacks=[es])

	model.save('imdb_review_sentiment_with_regularisation.h5')

	plot_acc(res)