This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_combine['bias'] = movie_bias | |
df_combine['bias_bin'] = pd.cut(df_combine.bias,5,labels=[0,1,2,3,4]) | |
cluster_colors = {0: 'blue', 1: 'green', 2: 'yellow', 3: 'red', 4: 'black'} | |
df_combine['colors'] = df_combine['bias_bin'].apply(lambda l: cluster_colors[l]) | |
df_combine.head() | |
top_labels1 = {0: 'V.low', 1:'Low', 2:'Medium', 3:'High',4:'V. High'} | |
source = ColumnDataSource(dict( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
user_547 = ratings[ratings['userId']==547] | |
u547MovieIdx = np.array([cf.item2idx[o] for o in user_547.movieId]) #id for the top movies to link it to the embedding and bias matrix created by the model | |
u547Ratings = user_547.rating | |
df_combine_u547 = pd.DataFrame([movie_names[i] for i in user_547.movieId]) | |
df_combine_u547.columns = ['title'] | |
movie_emb_u547 = to_np(m.i(V(u547MovieIdx))) #converting the torch embedding to numpy matrix | |
#applying tsne to movie embeddings for movies rated by user 547 | |
tsne = TSNE(n_components=2, verbose=1, perplexity=30, n_iter=1000,learning_rate=10) | |
tsne_results = tsne.fit_transform(movie_emb_u547) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
movie_bias = to_np(m.ib(V(topMovieIdx))) #extracting movie bias and converting it to numpy matrix | |
movie_ratings = [(b[0], movie_names[i]) for i,b in zip(topMovies,movie_bias)] | |
sorted(movie_ratings, key=lambda o: o[0])[:15] #worst movies based on bias ranking | |
sorted(movie_ratings, key=lambda o: o[0], reverse=True)[:15] #top movies based on bias ranking |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.manifold import TSNE | |
movies = pd.read_csv(f'{path}movies.csv') #loading movies file | |
movie_names = movies.set_index('movieId')['title'].to_dict() #creating dictionary of movieid: movie title | |
g=ratings.groupby('movieId')['rating'].count() #counting the number of ratings for each movie | |
topMovies=g.sort_values(ascending=False).index.values[:3000] #top 3000 movies based on number of ratings | |
topMovieIdx = np.array([cf.item2idx[o] for o in topMovies]) #id for the top movies to link it to the embedding and bias matrix created by the model | |
m=learn.model; m.cuda() #retrieving the model and shifting to gpu | |
movie_emb = to_np(m.i(V(topMovieIdx))) #converting the torch embedding to numpy matrix |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#imports | |
import torch | |
from bokeh.plotting import figure, show, output_notebook, save | |
from bokeh.models import HoverTool, value, LabelSet, Legend, ColumnDataSource | |
output_notebook() | |
#fastai related imports | |
from fastai.learner import * | |
from fastai.column_data import * | |
path='data/movielens/ml-latest-small/' #path to the dataset |