Created
June 7, 2020 23:14
-
-
Save CagriAldemir/60d9e58f66ecebeed7603ee8362e1c11 to your computer and use it in GitHub Desktop.
Classification of IMDB Movie Review Dataset Using LSTM and CNN (Machine Learning Final Project)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import keras | |
getCurrentTime = lambda: int(round(time.time() * 1000)) | |
class TimeHistory(keras.callbacks.Callback): | |
def on_epoch_begin(self, batch, logs={}): | |
self.epoch_time_start = getCurrentTime() | |
def on_epoch_end(self, batch, logs={}): | |
print("Epoch Number: ", batch + 1) | |
print("Time: ", getCurrentTime() - self.epoch_time_start) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import os | |
from keras.datasets import imdb | |
from keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D, Activation, Dense, Dropout, Embedding, LSTM, Bidirectional | |
from keras.models import Sequential | |
from keras.preprocessing import sequence | |
from EpochTimeHistory import TimeHistory | |
from NeuralNetworkTypesEnum import NeuralNetworkTypes | |
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' | |
# Değişkenler | |
neuralNetworkType = NeuralNetworkTypes.CNN | |
time_callback = TimeHistory() | |
max_features = 20000 | |
maxlen = 100 | |
batch_size = 64 | |
embedding_size = 128 | |
filters = 64 | |
kernel_size = 5 | |
hidden_dims = 250 | |
epochs = 5 | |
pool_size = 4 | |
lstm_output_size = 64 | |
dropout_rate = 0.25 | |
verboseMode = 2 | |
# End Değişkenler | |
print('Loading data...') | |
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) | |
x_train = sequence.pad_sequences(x_train, maxlen=maxlen) | |
x_test = sequence.pad_sequences(x_test, maxlen=maxlen) | |
if neuralNetworkType == NeuralNetworkTypes.BidirectionalLSTM: | |
print("Type: BidirectionalLSTM") | |
model = Sequential() | |
model.add(Embedding(max_features, embedding_size, input_length=maxlen)) | |
model.add(Bidirectional(LSTM(lstm_output_size))) | |
model.add(Dropout(dropout_rate)) | |
model.add(Dense(1, activation='sigmoid')) | |
model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) | |
model.fit(x_train, y_train, | |
batch_size=batch_size, | |
epochs=epochs, | |
validation_data=[x_test, y_test], | |
verbose=verboseMode, callbacks=[time_callback], validation_split=0.3, shuffle=True) | |
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) | |
print('Test score:', score) | |
print('Test accuracy:', acc) | |
elif neuralNetworkType == NeuralNetworkTypes.CNN: | |
print("Type: CNN") | |
model = Sequential() | |
model.add(Embedding(max_features, | |
embedding_size, | |
input_length=maxlen)) | |
model.add(Dropout(dropout_rate)) | |
model.add(Conv1D(filters, | |
kernel_size, | |
padding='valid', | |
activation='relu', | |
strides=1)) | |
model.add(GlobalMaxPooling1D()) | |
model.add(Dense(hidden_dims)) | |
model.add(Dropout(dropout_rate)) | |
model.add(Activation('relu')) | |
model.add(Dense(1)) | |
model.add(Activation('sigmoid')) | |
model.compile(loss='binary_crossentropy', | |
optimizer='adam', | |
metrics=['accuracy']) | |
model.fit(x_train, y_train, | |
batch_size=batch_size, | |
epochs=epochs, | |
validation_data=(x_test, y_test), | |
verbose=verboseMode, callbacks=[time_callback], validation_split=0.3, shuffle=True) | |
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) | |
print('Test score:', score) | |
print('Test accuracy:', acc) | |
elif neuralNetworkType == NeuralNetworkTypes.CNNandLSTM: | |
print("Type: CNNandLSTM") | |
model = Sequential() | |
model.add(Embedding(max_features, embedding_size, input_length=maxlen)) | |
model.add(Dropout(dropout_rate)) | |
model.add(Conv1D(filters, | |
kernel_size, | |
padding='valid', | |
activation='relu', | |
strides=1)) | |
model.add(MaxPooling1D(pool_size=pool_size)) | |
model.add(LSTM(lstm_output_size)) | |
model.add(Dense(1)) | |
model.add(Activation('sigmoid')) | |
model.compile(loss='binary_crossentropy', | |
optimizer='adam', | |
metrics=['accuracy']) | |
model.fit(x_train, y_train, | |
batch_size=batch_size, | |
epochs=epochs, | |
validation_data=(x_test, y_test), | |
verbose=verboseMode, callbacks=[time_callback], validation_split=0.3, shuffle=True) | |
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) | |
print('Test score:', score) | |
print('Test accuracy:', acc) | |
elif neuralNetworkType == NeuralNetworkTypes.LSTM: | |
print("Type: LSTM") | |
model = Sequential() | |
model.add(Embedding(max_features, embedding_size)) | |
model.add(LSTM(lstm_output_size, dropout=0.2, recurrent_dropout=0.2)) | |
model.add(Dense(1, activation='sigmoid')) | |
model.compile(loss='binary_crossentropy', | |
optimizer='adam', | |
metrics=['accuracy']) | |
model.fit(x_train, y_train, | |
batch_size=batch_size, | |
epochs=epochs, | |
validation_data=(x_test, y_test), | |
verbose=verboseMode, callbacks=[time_callback], validation_split=0.3, shuffle=True) | |
score, acc = model.evaluate(x_test, y_test, | |
batch_size=batch_size) | |
print('Test score:', score) | |
print('Test accuracy:', acc) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import enum | |
class NeuralNetworkTypes(enum.Enum): | |
BidirectionalLSTM = 1 | |
CNN = 2 | |
CNNandLSTM = 3 | |
LSTM = 4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment