Skip to content

Instantly share code, notes, and snippets.

@vaibhavgehani
Created July 19, 2020 05:07
Show Gist options
  • Save vaibhavgehani/fd0685215efaecdf30068e961d8069be to your computer and use it in GitHub Desktop.
Save vaibhavgehani/fd0685215efaecdf30068e961d8069be to your computer and use it in GitHub Desktop.
movie_data = pd.read_csv('movies.csv')
#Vectorization of the Words
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english')
movie_data.overview=movie_data.overview.fillna('')
tfidf_matrix = tfidf.fit_transform(movie_data.overview)
#importing linear_kernel from sklearn to get the coorelation between each movie according the overview feature of dataset
from sklearn.metrics.pairwise import linear_kernel
indices = pd.Series(movie_data.index,index=movie_data['title']).drop_duplicates()
cosine_sim = linear_kernel(tfidf_matrix,tfidf_matrix)
print(cosine_sim.shape)
def recommend_movie(movieName,cosine_sim=cosine_sim):
try:
indx=indices[movieName]
score_tuple=list(enumerate(cosine_sim[indx]))
sorted_tuple=sorted(score_tuple,key=lambda x: x[1],reverse=True)
top_10_score=sorted_tuple[1:6]
top_10_index=[i[0] for i in top_10_score]
return movie_data[['title','spoken_languages','popularity','release_date','runtime','poster_path']].iloc[top_10_index]
except(Exception):
print('Erorr')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment