Last active
August 9, 2019 09:39
-
-
Save zckkte/3ea3ff13b73ff76059199c6d47cc9ee7 to your computer and use it in GitHub Desktop.
Simple imdb review sentiment analyser using a feed-forward neural network with regularisation (built in Keras)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import keras | |
from keras import layers | |
from keras import optimizers | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from keras.models import load_model | |
from keras.regularizers import l2 | |
from keras.callbacks import EarlyStopping | |
from keras.layers import Dropout | |
NUM_WORDS = 10000 | |
NUM_EPOCHS = 100 | |
def plot_acc(res): | |
epochs = range(0,len(res.history['val_acc'])) | |
plt.plot(epochs, res.history['acc'], marker='o', color='red', label="Training accuracy") | |
plt.plot(epochs, res.history['val_acc'], marker='o', color='blue', label="Validation accuracy") | |
plt.xlabel("Epochs") | |
plt.ylabel("Accuracy") | |
plt.legend() | |
plt.show() | |
(train_data, train_labels), (test_data, test_labels) = keras.datasets.imdb.load_data(num_words=NUM_WORDS) | |
model = keras.Sequential() | |
model.add(layers.Dense(16, kernel_regularizer=l2(0.001), activation='relu', input_shape=(NUM_WORDS,))) # kernel_regularizer is equvalient to weight decay reg. | |
model.add(Dropout(0.2)) | |
model.add(layers.Dense(16, kernel_regularizer=l2(0.001), activation='relu')) | |
model.add(Dropout(0.2)) | |
model.add(layers.Dense(1, kernel_regularizer=l2(0.001), activation='sigmoid')) | |
def multi_hot_sequences(sequences, dimension): | |
# Create an all-zero matrix of shape (len(sequences), dimension) | |
results = np.zeros((len(sequences), dimension)) | |
for i, word_indices in enumerate(sequences): | |
results[i, word_indices] = 1.0 # set specific indices of results[i] to 1s | |
return results | |
train_data = multi_hot_sequences(train_data, dimension=NUM_WORDS) | |
test_data = multi_hot_sequences(test_data, dimension=NUM_WORDS) | |
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) | |
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1) | |
res = model.fit(train_data, train_labels, epochs=NUM_EPOCHS, | |
validation_data=(test_data, test_labels), callbacks=[es]) | |
model.save('imdb_review_sentiment_with_regularisation.h5') | |
plot_acc(res) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment