This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dot_product(vector_1, vector_2): | |
return sum([ i*j for i,j in zip(vector_1, vector_2)]) | |
def get_movie_score(movie_features, user_preferences): | |
return dot_product(movie_features, user_preferences) | |
def get_movie_recommendations(user_preferences, n_recommendations): | |
#We create a new column in the dataset with the value of each movie for the user | |
movies_df['score'] = movies_df[movie_categories].apply(get_movie_score, | |
args=([user_preferences.values()]), axis=1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import OrderedDict | |
user_preferences = OrderedDict(zip(movie_categories, [])) | |
user_preferences['Action'] = 5 | |
user_preferences['Adventure'] = 5 | |
user_preferences['Animation'] = 1 | |
user_preferences["Children's"] = 1 | |
user_preferences["Comedy"] = 3 | |
user_preferences['Crime'] = 2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We convert the genero variable into a dummy variable for your treatment | |
# The get_dummies function converts a categorical variable into multiple columns | |
# For each movie, these dummy columns will have a value of 0 except for those genres that have the movie | |
movies_df = pd.concat([movies_df, movies_df.movie_genre.str.get_dummies(sep='|')], axis=1) | |
# A variable is created with the categories | |
movie_categories = movies_df.columns[3:] | |
movies_df.head() | |
print(movie_categories) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from zipfile import ZipFile | |
from io import StringIO | |
import io | |
from urllib.request import urlopen | |
import requests | |
#We check what content the zip of the data has, before downloading and unzip | |
zip_url ='http://files.grouplens.org/datasets/movielens/ml-1m.zip' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import StandardScaler, Normalizer, PolynomialFeatures | |
from sklearn.decomposition import PCA | |
from sklearn.ensemble import RandomForestClassifier | |
pipe = Pipeline([ | |
('scaler', StandardScaler()), | |
('norm', Normalizer()), | |
('poly', PolynomialFeatures(degree=2)), | |
('norm2',Normalizer()), | |
('pca', PCA(n_components=3)), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parameters = dict(kBest__k = range(1, len(features)), | |
svm__kernel = ['rbf', 'sigmoid'], | |
svm__C = [0.1, 1, 10, 100, 1000], | |
svm__gamma = [0.1, 0.01, 0.001, 0.0001, 0.00001], | |
svm__random_state = [0]) | |
grid = GridSearchCV(pipe, param_grid = parameters, cv = 5) | |
grid.fit(x_train, y_train) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.svm import SVC | |
pipe = Pipeline([('kBest', SelectKBest(f_classif, k = 5)), | |
('svm', SVC(kernel = 'rbf'))]) | |
pipe.fit(x_train, y_train) | |
print(u'The performance of the model is: %0.5f' % pipe.score(x_test, y_test)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parameters = dict(kBest__k = range(1, len(features)), | |
rf__n_estimators = [25, 50, 75], | |
rf__max_depth = [1, 2, 3, 4, 5], | |
rf__random_state = [0]) | |
grid = GridSearchCV(pipe, param_grid = parameters, cv = 5) | |
grid.fit(x_train, y_train) | |
print(u'The performance of the model is: %0.5f' % grid.score(x_test, y_test)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import RandomForestClassifier | |
pipe = Pipeline([('kBest', SelectKBest(f_classif, k = 5)), | |
('rf', RandomForestClassifier(max_depth = 2))]) | |
pipe.fit(x_train, y_train) | |
print(u'The performance of the model is: %0.5f' % pipe.score(x_test, y_test)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import GridSearchCV | |
parameters = dict(kBest__k = range(1, len(features))) | |
grid = GridSearchCV(pipe, param_grid = parameters, cv = 5) | |
grid.fit(x_train, y_train) | |
print(u'The performance of the model is: %0.5f' % grid.score(x_test, y_test)) |