Skip to content

Instantly share code, notes, and snippets.

df_combine['bias'] = movie_bias
df_combine['bias_bin'] = pd.cut(df_combine.bias,5,labels=[0,1,2,3,4])
cluster_colors = {0: 'blue', 1: 'green', 2: 'yellow', 3: 'red', 4: 'black'}
df_combine['colors'] = df_combine['bias_bin'].apply(lambda l: cluster_colors[l])
df_combine.head()
top_labels1 = {0: 'V.low', 1:'Low', 2:'Medium', 3:'High',4:'V. High'}
source = ColumnDataSource(dict(
user_547 = ratings[ratings['userId']==547]
u547MovieIdx = np.array([cf.item2idx[o] for o in user_547.movieId]) #id for the top movies to link it to the embedding and bias matrix created by the model
u547Ratings = user_547.rating
df_combine_u547 = pd.DataFrame([movie_names[i] for i in user_547.movieId])
df_combine_u547.columns = ['title']
movie_emb_u547 = to_np(m.i(V(u547MovieIdx))) #converting the torch embedding to numpy matrix
#applying tsne to movie embeddings for movies rated by user 547
tsne = TSNE(n_components=2, verbose=1, perplexity=30, n_iter=1000,learning_rate=10)
tsne_results = tsne.fit_transform(movie_emb_u547)
movie_bias = to_np(m.ib(V(topMovieIdx))) #extracting movie bias and converting it to numpy matrix
movie_ratings = [(b[0], movie_names[i]) for i,b in zip(topMovies,movie_bias)]
sorted(movie_ratings, key=lambda o: o[0])[:15] #worst movies based on bias ranking
sorted(movie_ratings, key=lambda o: o[0], reverse=True)[:15] #top movies based on bias ranking
from sklearn.manifold import TSNE
movies = pd.read_csv(f'{path}movies.csv') #loading movies file
movie_names = movies.set_index('movieId')['title'].to_dict() #creating dictionary of movieid: movie title
g=ratings.groupby('movieId')['rating'].count() #counting the number of ratings for each movie
topMovies=g.sort_values(ascending=False).index.values[:3000] #top 3000 movies based on number of ratings
topMovieIdx = np.array([cf.item2idx[o] for o in topMovies]) #id for the top movies to link it to the embedding and bias matrix created by the model
m=learn.model; m.cuda() #retrieving the model and shifting to gpu
movie_emb = to_np(m.i(V(topMovieIdx))) #converting the torch embedding to numpy matrix
#imports
import torch
from bokeh.plotting import figure, show, output_notebook, save
from bokeh.models import HoverTool, value, LabelSet, Legend, ColumnDataSource
output_notebook()
#fastai related imports
from fastai.learner import *
from fastai.column_data import *
path='data/movielens/ml-latest-small/' #path to the dataset