This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
reindexed_data = df['Review Text'] | |
tfidf_vectorizer = TfidfVectorizer(stop_words='english', use_idf=True, smooth_idf=True) | |
reindexed_data = reindexed_data.values | |
document_term_matrix = tfidf_vectorizer.fit_transform(reindexed_data) | |
n_topics = 6 | |
lsa_model = TruncatedSVD(n_components=n_topics) | |
lsa_topic_matrix = lsa_model.fit_transform(document_term_matrix) | |
def get_keys(topic_matrix): | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.corpus import stopwords | |
from sklearn.metrics.pairwise import linear_kernel | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.decomposition import LatentDirichletAllocation | |
import random | |
import re, nltk, spacy, gensim | |
import pyLDAvis | |
import pyLDAvis.sklearn | |
import matplotlib.pyplot as plt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import radians, cos, sin, asin, sqrt | |
import numpy as np | |
def haversine_np(lon1, lat1, lon2, lat2): | |
""" | |
Calculate the great circle distance between two points | |
on the earth (specified in decimal degrees) | |
All args must be of equal length. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def HitRate(topNPredicted, leftOutPredictions): | |
hits = 0 | |
total = 0 | |
# For each left-out rating | |
for leftOut in leftOutPredictions: | |
userID = leftOut[0] | |
leftOutMovieID = leftOut[1] | |
# Is it in the predicted top 10 for this user? | |
hit = False |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def recommend(customer_id, sparse_customer_item, customer_vecs, item_vecs, num_items=10): | |
customer_interactions = sparse_customer_item[customer_id,:].toarray() | |
customer_interactions = customer_interactions.reshape(-1) + 1 | |
customer_interactions[customer_interactions > 1] = 0 | |
rec_vector = customer_vecs[customer_id,:].dot(item_vecs.T).toarray() | |
min_max = MinMaxScaler() | |
rec_vector_scaled = min_max.fit_transform(rec_vector.reshape(-1,1))[:,0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
outliers_fraction = 0.01 | |
xx , yy = np.meshgrid(np.linspace(0, 1, 100), np.linspace(0, 1, 100)) | |
clf = CBLOF(contamination=outliers_fraction,check_estimator=False, random_state=0) | |
clf.fit(X) | |
scores_pred = clf.decision_function(X) * -1 | |
y_pred = clf.predict(X) | |
n_inliers = len(y_pred) - np.count_nonzero(y_pred) | |
n_outliers = np.count_nonzero(y_pred == 1) | |
plt.figure(figsize=(8, 8)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig = px.line(df_g7_melt, x="DATE", y="VALUE", color='COUNTRY', title='Real Residential Property Prices - All G7 Countries') | |
fig.update_layout(title=dict(x=0.5,y=0.95)) | |
fig.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
countries = {'US':'United States', 'GB':'United Kingdom', 'FR':'France', 'CA':'Canada', 'DE':'Germany', 'JP':'Japan', 'IT':'Italy'} | |
def check_country(x): | |
for country in countries: | |
if country.lower() in x.lower(): | |
return countries[country] | |
return '' | |
df_g7_melt['COUNTRY'] = df_g7_melt['SERIES_ID'].map(lambda x: check_country(x)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
g7_list = df_g7['series_id'].tolist() | |
start_date = '1970-01-01' | |
end_date = '2021-10-01' | |
df_g7 = get_fred_data(series_list = g7_list, | |
start_date = start_date, | |
end_date = end_date) | |
df_g7_melt = pd.melt(df_g7, id_vars = ['DATE'], value_vars = g7_list, var_name = 'SERIES_ID', value_name = 'VALUE') |
NewerOlder