Skip to content

Instantly share code, notes, and snippets.

View deansublett's full-sized avatar

Dean Sublett deansublett

View GitHub Profile
# Testing our content-based recommendation system with the seminal film Spy Kids
give_rec('Spy Kids')
from sklearn.metrics.pairwise import sigmoid_kernel
# Compute the sigmoid kernel
sig = sigmoid_kernel(tfv_matrix, tfv_matrix)
# Reverse mapping of indices and movie titles
indices = pd.Series(movies_clean.index, index=movies_clean['original_title']).drop_duplicates()
# Credit to Ibtesam Ahmed for the skeleton code
def give_rec(title, sig=sig):
from sklearn.feature_extraction.text import TfidfVectorizer
# Using Abhishek Thakur's arguments for TF-IDF
tfv = TfidfVectorizer(min_df=3, max_features=None,
strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
ngram_range=(1, 3), use_idf=1,smooth_idf=1,sublinear_tf=1,
stop_words = 'english')
# Filling NaNs with empty string
movies_clean['overview'] = movies_clean['overview'].fillna('')
scored = movies_clean.sort_values('score', ascending=False)
plt.figure(figsize=(16,6))
ax = sns.barplot(x=scored['score'].head(10), y=scored['original_title'].head(10), data=scored, palette='deep')
#plt.xlim(3.55, 5.25)
plt.title('Best Rated & Most Popular Blend', weight='bold')
plt.xlabel('Score', weight='bold')
plt.ylabel('Movie Title', weight='bold')
# My own recommender system
# half/half recommendation based on scaled weighted average & popularity score
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
movies_scaled = min_max_scaler.fit_transform(movies_clean[['weighted_average', 'popularity']])
movies_norm = pd.DataFrame(movies_scaled, columns=['weighted_average', 'popularity'])
movies_norm.head()
popular = movies_ranked.sort_values('popularity', ascending=False)
plt.figure(figsize=(16,6))
ax = sns.barplot(x=popular['popularity'].head(10), y=popular['original_title'].head(10), data=popular, palette='deep')
plt.title('"Most Popular" Movies by TMDB Votes', weight='bold')
plt.xlabel('Popularity Score', weight='bold')
plt.ylabel('Movie Title', weight='bold')
import matplotlib.pyplot as plt
import seaborn as sns
wavg = movies_ranked.sort_values('weighted_average', ascending=False)
plt.figure(figsize=(16,6))
ax = sns.barplot(x=wavg['weighted_average'].head(10), y=wavg['original_title'].head(10), data=wavg, palette='deep')
plt.xlim(6.75, 8.35)
V = movies_clean['vote_count']
R = movies_clean['vote_average']
C = movies_clean['vote_average'].mean()
m = movies_clean['vote_count'].quantile(0.70)
movies_clean['weighted_average'] = (V/(V+m) * R) + (m/(m+V) * C)
@deansublett
deansublett / rec_system_movies.ipynb
Created June 5, 2019 17:13
rec_system_movies.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.