This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
filename = 'model.h1.24_jan_19' | |
# set checkpoint | |
checkpoint = ModelCheckpoint(filename, monitor='val_loss', | |
verbose=1, save_best_only=True, | |
mode='min') | |
# train model | |
history = model.fit(trainX, trainY.reshape(trainY.shape[0], trainY.shape[1], 1), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = load_model('model.h1.24_jan_19') | |
preds = model.predict_classes(testX.reshape((testX.shape[0], testX.shape[1]))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_word(n, tokenizer): | |
for word, index in tokenizer.word_index.items(): | |
if index == n: | |
return word | |
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
preds_text = [] | |
for i in preds: | |
temp = [] | |
for j in range(len(i)): | |
t = get_word(i[j], eng_tokenizer) | |
if j > 0: | |
if (t==get_word(i[j-1],eng_tokenizer))or(t== None): | |
temp.append('') | |
else: | |
temp.append(t) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import spacy | |
from tqdm import tqdm | |
import re | |
import time | |
import pickle | |
pd.set_option('display.max_colwidth', 200) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# remove URL's from train and test | |
train['clean_tweet'] = train['tweet'].apply(lambda x: re.sub(r'http\S+', '', x)) | |
test['clean_tweet'] = test['tweet'].apply(lambda x: re.sub(r'http\S+', '', x)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# just a random sentence | |
x = ["Roasted ants are a popular snack in Columbia"] | |
# Extract ELMo features | |
embeddings = elmo(x, signature="default", as_dict=True)["elmo"] | |
embeddings.shape |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def elmo_vectors(x): | |
embeddings = elmo(x.tolist(), signature="default", as_dict=True)["elmo"] | |
with tf.Session() as sess: | |
sess.run(tf.global_variables_initializer()) | |
sess.run(tf.tables_initializer()) | |
# return average of ELMo features | |
return sess.run(tf.reduce_mean(embeddings,1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# save elmo_train_new | |
pickle_out = open("elmo_train_03032019.pickle","wb") | |
pickle.dump(elmo_train_new, pickle_out) | |
pickle_out.close() | |
# save elmo_test_new | |
pickle_out = open("elmo_test_03032019.pickle","wb") | |
pickle.dump(elmo_test_new, pickle_out) | |
pickle_out.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load elmo_train_new | |
pickle_in = open("elmo_train_03032019.pickle", "rb") | |
elmo_train_new = pickle.load(pickle_in) | |
# load elmo_train_new | |
pickle_in = open("elmo_test_03032019.pickle", "rb") | |
elmo_test_new = pickle.load(pickle_in) |