This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Pull user 432's features out of the user features matrix and predict movie ranks for just that user | |
u432_features = sparse.csr_matrix(user_indicator_features)[432] | |
u432_rankings = hybrid_model.predict_rank(user_features=u432_features, | |
item_features=full_item_features)[0] | |
# Get internal IDs of User 432's top 10 recommendations | |
# These are sorted by item ID, not by rank | |
# This may contain items with which User 432 has already interacted | |
u432_top_ten_recs = numpy.where(u432_rankings <= 10)[0] | |
print("User 432 recommendations:") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Try concatenating the genres on to the indicator features for a hybrid recommender system | |
full_item_features = sparse.hstack([item_indicator_features, movie_genre_features]) | |
print("Training hybrid recommender") | |
hybrid_model = tensorrec.TensorRec( | |
n_components=5, | |
loss_graph=tensorrec.loss_graphs.WMRBLossGraph() | |
) | |
hybrid_model.fit(interactions=sparse_train_ratings_4plus, | |
user_features=user_indicator_features, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fit a content-based model using the genres as item features | |
print("Training content-based recommender") | |
content_model = tensorrec.TensorRec( | |
n_components=n_genres, | |
item_repr_graph=tensorrec.representation_graphs.FeaturePassThroughRepresentationGraph(), | |
loss_graph=tensorrec.loss_graphs.WMRBLossGraph() | |
) | |
content_model.fit(interactions=sparse_train_ratings_4plus, | |
user_features=user_indicator_features, | |
item_features=movie_genre_features, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Map the MovieLens IDs to our internal IDs and keep track of the genres and titles | |
movie_genres_by_internal_id = {} | |
movie_titles_by_internal_id = {} | |
for row in raw_movie_metadata: | |
row[0] = movielens_to_internal_item_ids[int(row[0])] # Map to IDs | |
row[2] = row[2].split('|') # Split up the genres | |
movie_genres_by_internal_id[row[0]] = row[2] | |
movie_titles_by_internal_id[row[0]] = row[1] | |
# Look at an example movie metadata row |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Let's try a new loss function: WMRB | |
print("Training collaborative filter with WMRB loss") | |
ranking_cf_model = tensorrec.TensorRec(n_components=5, | |
loss_graph=tensorrec.loss_graphs.WMRBLossGraph()) | |
ranking_cf_model.fit(interactions=sparse_train_ratings_4plus, | |
user_features=user_indicator_features, | |
item_features=item_indicator_features, | |
n_sampled_items=int(n_items * .01)) | |
# Check the results of the WMRB MF CF model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create sets of train/test interactions that are only ratings >= 4.0 | |
sparse_train_ratings_4plus = sparse_train_ratings.multiply(sparse_train_ratings >= 4.0) | |
sparse_test_ratings_4plus = sparse_test_ratings.multiply(sparse_test_ratings >= 4.0) | |
# This method consumes item ranks for each user and prints out recall@10 train/test metrics | |
def check_results(ranks): | |
train_recall_at_10 = tensorrec.eval.recall_at_k( | |
test_interactions=sparse_train_ratings_4plus, | |
predicted_ranks=ranks, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Construct indicator features for users and items | |
user_indicator_features = sparse.identity(n_users) | |
item_indicator_features = sparse.identity(n_items) | |
# Build a matrix factorization collaborative filter model | |
cf_model = tensorrec.TensorRec(n_components=5) | |
# Fit the collaborative filter model | |
print("Training collaborative filter") | |
cf_model.fit(interactions=sparse_train_ratings, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This method converts a list of (user, item, rating, time) to a sparse matrix | |
def interactions_list_to_sparse_matrix(interactions): | |
users_column, items_column, ratings_column, _ = zip(*interactions) | |
return sparse.coo_matrix((ratings_column, (users_column, items_column)), | |
shape=(n_users, n_items)) | |
# Create sparse matrices of interaction data | |
sparse_train_ratings = interactions_list_to_sparse_matrix(train_ratings) | |
sparse_test_ratings = interactions_list_to_sparse_matrix(test_ratings) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Shuffle the ratings and split them in to train/test sets 80%/20% | |
random.shuffle(raw_ratings) # Shuffles the list in-place | |
cutoff = int(.8 * len(raw_ratings)) | |
train_ratings = raw_ratings[:cutoff] | |
test_ratings = raw_ratings[cutoff:] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Open and read in the ratings file | |
print('Loading ratings') | |
with open('ratings.csv', 'r') as ratings_file: | |
ratings_file_reader = csv.reader(ratings_file) | |
raw_ratings = list(ratings_file_reader) | |
raw_ratings_header = raw_ratings.pop(0) | |
# Iterate through the input to map MovieLens IDs to new internal IDs | |
# The new internal IDs will be created by the defaultdict on insertion | |
movielens_to_internal_user_ids = defaultdict(lambda: len(movielens_to_internal_user_ids)) |
NewerOlder