Shikhar Gupta shik3519

## gist:2af152e1013906750e62f6063509c77c
df_combine['bias'] = movie_bias
df_combine['bias_bin'] = pd.cut(df_combine.bias,5,labels=[0,1,2,3,4])

cluster_colors = {0: 'blue', 1: 'green', 2: 'yellow', 3: 'red', 4: 'black'}
df_combine['colors'] = df_combine['bias_bin'].apply(lambda l: cluster_colors[l])
df_combine.head()

top_labels1 = {0: 'V.low', 1:'Low', 2:'Medium', 3:'High',4:'V. High'}

source = ColumnDataSource(dict(

## user547_embeddings.txt
user_547 = ratings[ratings['userId']==547]
u547MovieIdx = np.array([cf.item2idx[o] for o in user_547.movieId]) #id for the top movies to link it to the embedding and bias matrix created by the model
u547Ratings = user_547.rating
df_combine_u547 = pd.DataFrame([movie_names[i] for i in user_547.movieId])
df_combine_u547.columns = ['title']

movie_emb_u547 = to_np(m.i(V(u547MovieIdx))) #converting the torch embedding to numpy matrix
#applying tsne to movie embeddings for movies rated by user 547
tsne = TSNE(n_components=2, verbose=1, perplexity=30, n_iter=1000,learning_rate=10)
tsne_results = tsne.fit_transform(movie_emb_u547)

## bias
movie_bias = to_np(m.ib(V(topMovieIdx))) #extracting movie bias and converting it to numpy matrix
movie_ratings = [(b[0], movie_names[i]) for i,b in zip(topMovies,movie_bias)]
sorted(movie_ratings, key=lambda o: o[0])[:15] #worst movies based on bias ranking
sorted(movie_ratings, key=lambda o: o[0], reverse=True)[:15] #top movies based on bias ranking

## tsne-embeddings
from sklearn.manifold import TSNE
movies = pd.read_csv(f'{path}movies.csv') #loading movies file

movie_names = movies.set_index('movieId')['title'].to_dict() #creating dictionary of movieid: movie title
g=ratings.groupby('movieId')['rating'].count() #counting the number of ratings for each movie
topMovies=g.sort_values(ascending=False).index.values[:3000] #top 3000 movies based on number of ratings
topMovieIdx = np.array([cf.item2idx[o] for o in topMovies]) #id for the top movies to link it to the embedding and bias matrix created by the model

m=learn.model; m.cuda() #retrieving the model and shifting to gpu
movie_emb = to_np(m.i(V(topMovieIdx))) #converting the torch embedding to numpy matrix

## loading data
#imports
import torch
from bokeh.plotting import figure, show, output_notebook, save
from bokeh.models import HoverTool, value, LabelSet, Legend, ColumnDataSource
output_notebook()
#fastai related imports
from fastai.learner import *
from fastai.column_data import *

path='data/movielens/ml-latest-small/' #path to the dataset
	df_combine['bias'] = movie_bias
	df_combine['bias_bin'] = pd.cut(df_combine.bias,5,labels=[0,1,2,3,4])

	cluster_colors = {0: 'blue', 1: 'green', 2: 'yellow', 3: 'red', 4: 'black'}
	df_combine['colors'] = df_combine['bias_bin'].apply(lambda l: cluster_colors[l])
	df_combine.head()

	top_labels1 = {0: 'V.low', 1:'Low', 2:'Medium', 3:'High',4:'V. High'}

	source = ColumnDataSource(dict(
	user_547 = ratings[ratings['userId']==547]
	u547MovieIdx = np.array([cf.item2idx[o] for o in user_547.movieId]) #id for the top movies to link it to the embedding and bias matrix created by the model
	u547Ratings = user_547.rating
	df_combine_u547 = pd.DataFrame([movie_names[i] for i in user_547.movieId])
	df_combine_u547.columns = ['title']

	movie_emb_u547 = to_np(m.i(V(u547MovieIdx))) #converting the torch embedding to numpy matrix
	#applying tsne to movie embeddings for movies rated by user 547
	tsne = TSNE(n_components=2, verbose=1, perplexity=30, n_iter=1000,learning_rate=10)
	tsne_results = tsne.fit_transform(movie_emb_u547)
	movie_bias = to_np(m.ib(V(topMovieIdx))) #extracting movie bias and converting it to numpy matrix
	movie_ratings = [(b[0], movie_names[i]) for i,b in zip(topMovies,movie_bias)]
	sorted(movie_ratings, key=lambda o: o[0])[:15] #worst movies based on bias ranking
	sorted(movie_ratings, key=lambda o: o[0], reverse=True)[:15] #top movies based on bias ranking
	from sklearn.manifold import TSNE
	movies = pd.read_csv(f'{path}movies.csv') #loading movies file

	movie_names = movies.set_index('movieId')['title'].to_dict() #creating dictionary of movieid: movie title
	g=ratings.groupby('movieId')['rating'].count() #counting the number of ratings for each movie
	topMovies=g.sort_values(ascending=False).index.values[:3000] #top 3000 movies based on number of ratings
	topMovieIdx = np.array([cf.item2idx[o] for o in topMovies]) #id for the top movies to link it to the embedding and bias matrix created by the model

	m=learn.model; m.cuda() #retrieving the model and shifting to gpu
	movie_emb = to_np(m.i(V(topMovieIdx))) #converting the torch embedding to numpy matrix
	#imports
	import torch
	from bokeh.plotting import figure, show, output_notebook, save
	from bokeh.models import HoverTool, value, LabelSet, Legend, ColumnDataSource
	output_notebook()
	#fastai related imports
	from fastai.learner import *
	from fastai.column_data import *

	path='data/movielens/ml-latest-small/' #path to the dataset