This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# just a random sentence | |
x = ["Roasted ants are a popular snack in Columbia"] | |
# Extract ELMo features | |
embeddings = elmo(x, signature="default", as_dict=True)["elmo"] | |
embeddings.shape |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def elmo_vectors(x): | |
embeddings = elmo(x.tolist(), signature="default", as_dict=True)["elmo"] | |
with tf.Session() as sess: | |
sess.run(tf.global_variables_initializer()) | |
sess.run(tf.tables_initializer()) | |
# return average of ELMo features | |
return sess.run(tf.reduce_mean(embeddings,1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# save elmo_train_new | |
pickle_out = open("elmo_train_03032019.pickle","wb") | |
pickle.dump(elmo_train_new, pickle_out) | |
pickle_out.close() | |
# save elmo_test_new | |
pickle_out = open("elmo_test_03032019.pickle","wb") | |
pickle.dump(elmo_test_new, pickle_out) | |
pickle_out.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load elmo_train_new | |
pickle_in = open("elmo_train_03032019.pickle", "rb") | |
elmo_train_new = pickle.load(pickle_in) | |
# load elmo_train_new | |
pickle_in = open("elmo_test_03032019.pickle", "rb") | |
elmo_test_new = pickle.load(pickle_in) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
xtrain, xvalid, ytrain, yvalid = train_test_split(elmo_train_new, | |
train['label'], | |
random_state=42, | |
test_size=0.2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import f1_score | |
lreg = LogisticRegression() | |
lreg.fit(xtrain, ytrain) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# prepare submission dataframe | |
sub = pd.DataFrame({'id':test['id'], 'label':preds_test}) | |
# write predictions to a CSV file | |
sub.to_csv("sub_lreg.csv", index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import json | |
import nltk | |
import re | |
import csv | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from tqdm import tqdm | |
from sklearn.feature_extraction.text import TfidfVectorizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plots = [] | |
with open("plot_summaries.txt", 'r') as f: | |
reader = csv.reader(f, dialect='excel-tab') | |
for row in tqdm(reader): | |
plots.append(row) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
movie_id = [] | |
plot = [] | |
# extract movie Ids and plot summaries | |
for i in tqdm(plots): | |
movie_id.append(i[0]) | |
plot.append(i[1]) | |
# create dataframe | |
movies = pd.DataFrame({'movie_id': movie_id, 'plot': plot}) |