Skip to content

Instantly share code, notes, and snippets.

View prateekjoshi565's full-sized avatar
🎯
Focusing

Prateek Joshi prateekjoshi565

🎯
Focusing
View GitHub Profile
# just a random sentence
x = ["Roasted ants are a popular snack in Columbia"]
# Extract ELMo features
embeddings = elmo(x, signature="default", as_dict=True)["elmo"]
embeddings.shape
@prateekjoshi565
prateekjoshi565 / elmo_vectors_func.py
Created March 6, 2019 18:49
elmo vectors function
def elmo_vectors(x):
embeddings = elmo(x.tolist(), signature="default", as_dict=True)["elmo"]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
# return average of ELMo features
return sess.run(tf.reduce_mean(embeddings,1))
# save elmo_train_new
pickle_out = open("elmo_train_03032019.pickle","wb")
pickle.dump(elmo_train_new, pickle_out)
pickle_out.close()
# save elmo_test_new
pickle_out = open("elmo_test_03032019.pickle","wb")
pickle.dump(elmo_test_new, pickle_out)
pickle_out.close()
# load elmo_train_new
pickle_in = open("elmo_train_03032019.pickle", "rb")
elmo_train_new = pickle.load(pickle_in)
# load elmo_train_new
pickle_in = open("elmo_test_03032019.pickle", "rb")
elmo_test_new = pickle.load(pickle_in)
from sklearn.model_selection import train_test_split
xtrain, xvalid, ytrain, yvalid = train_test_split(elmo_train_new,
train['label'],
random_state=42,
test_size=0.2)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
lreg = LogisticRegression()
lreg.fit(xtrain, ytrain)
# prepare submission dataframe
sub = pd.DataFrame({'id':test['id'], 'label':preds_test})
# write predictions to a CSV file
sub.to_csv("sub_lreg.csv", index=False)
@prateekjoshi565
prateekjoshi565 / genre_import_libraries.py
Created April 21, 2019 11:52
genre_import_libraries
import pandas as pd
import numpy as np
import json
import nltk
import re
import csv
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.feature_extraction.text import TfidfVectorizer
plots = []
with open("plot_summaries.txt", 'r') as f:
reader = csv.reader(f, dialect='excel-tab')
for row in tqdm(reader):
plots.append(row)
@prateekjoshi565
prateekjoshi565 / genre_split_id_plot.py
Created April 21, 2019 12:00
genre_split_id_plot
movie_id = []
plot = []
# extract movie Ids and plot summaries
for i in tqdm(plots):
movie_id.append(i[0])
plot.append(i[1])
# create dataframe
movies = pd.DataFrame({'movie_id': movie_id, 'plot': plot})