Skip to content

Instantly share code, notes, and snippets.

View jfkirk's full-sized avatar

James Kirk jfkirk

View GitHub Profile
@jfkirk
jfkirk / tensorrec_keras_blog_example.py
Last active May 9, 2018 13:45
TensorRec Keras Blog Example
import keras as ks
from tensorrec import TensorRec
from tensorrec.representation_graphs import AbstractKerasRepresentationGraph
# My new deep net representation graph
class DeepRepresentationGraph(AbstractKerasRepresentationGraph):
def create_layers(self, n_features, n_components):
return [
ks.layers.Dense(n_components * 16, activation='relu'),
ks.layers.Dense(n_components * 8, activation='relu'),
# Open and read in the ratings file
print('Loading ratings')
with open('ratings.csv', 'r') as ratings_file:
ratings_file_reader = csv.reader(ratings_file)
raw_ratings = list(ratings_file_reader)
raw_ratings_header = raw_ratings.pop(0)
# Iterate through the input to map MovieLens IDs to new internal IDs
# The new internal IDs will be created by the defaultdict on insertion
movielens_to_internal_user_ids = defaultdict(lambda: len(movielens_to_internal_user_ids))
# Shuffle the ratings and split them in to train/test sets 80%/20%
random.shuffle(raw_ratings) # Shuffles the list in-place
cutoff = int(.8 * len(raw_ratings))
train_ratings = raw_ratings[:cutoff]
test_ratings = raw_ratings[cutoff:]
# This method converts a list of (user, item, rating, time) to a sparse matrix
def interactions_list_to_sparse_matrix(interactions):
users_column, items_column, ratings_column, _ = zip(*interactions)
return sparse.coo_matrix((ratings_column, (users_column, items_column)),
shape=(n_users, n_items))
# Create sparse matrices of interaction data
sparse_train_ratings = interactions_list_to_sparse_matrix(train_ratings)
sparse_test_ratings = interactions_list_to_sparse_matrix(test_ratings)
# Construct indicator features for users and items
user_indicator_features = sparse.identity(n_users)
item_indicator_features = sparse.identity(n_items)
# Build a matrix factorization collaborative filter model
cf_model = tensorrec.TensorRec(n_components=5)
# Fit the collaborative filter model
print("Training collaborative filter")
cf_model.fit(interactions=sparse_train_ratings,
# Create sets of train/test interactions that are only ratings >= 4.0
sparse_train_ratings_4plus = sparse_train_ratings.multiply(sparse_train_ratings >= 4.0)
sparse_test_ratings_4plus = sparse_test_ratings.multiply(sparse_test_ratings >= 4.0)
# This method consumes item ranks for each user and prints out recall@10 train/test metrics
def check_results(ranks):
train_recall_at_10 = tensorrec.eval.recall_at_k(
test_interactions=sparse_train_ratings_4plus,
predicted_ranks=ranks,
# Let's try a new loss function: WMRB
print("Training collaborative filter with WMRB loss")
ranking_cf_model = tensorrec.TensorRec(n_components=5,
loss_graph=tensorrec.loss_graphs.WMRBLossGraph())
ranking_cf_model.fit(interactions=sparse_train_ratings_4plus,
user_features=user_indicator_features,
item_features=item_indicator_features,
n_sampled_items=int(n_items * .01))
# Check the results of the WMRB MF CF model
# Map the MovieLens IDs to our internal IDs and keep track of the genres and titles
movie_genres_by_internal_id = {}
movie_titles_by_internal_id = {}
for row in raw_movie_metadata:
row[0] = movielens_to_internal_item_ids[int(row[0])] # Map to IDs
row[2] = row[2].split('|') # Split up the genres
movie_genres_by_internal_id[row[0]] = row[2]
movie_titles_by_internal_id[row[0]] = row[1]
# Look at an example movie metadata row
# Fit a content-based model using the genres as item features
print("Training content-based recommender")
content_model = tensorrec.TensorRec(
n_components=n_genres,
item_repr_graph=tensorrec.representation_graphs.FeaturePassThroughRepresentationGraph(),
loss_graph=tensorrec.loss_graphs.WMRBLossGraph()
)
content_model.fit(interactions=sparse_train_ratings_4plus,
user_features=user_indicator_features,
item_features=movie_genre_features,
# Try concatenating the genres on to the indicator features for a hybrid recommender system
full_item_features = sparse.hstack([item_indicator_features, movie_genre_features])
print("Training hybrid recommender")
hybrid_model = tensorrec.TensorRec(
n_components=5,
loss_graph=tensorrec.loss_graphs.WMRBLossGraph()
)
hybrid_model.fit(interactions=sparse_train_ratings_4plus,
user_features=user_indicator_features,