James Kirk jfkirk

## tensorrec_getting_started_prediction.py
# Pull user 432's features out of the user features matrix and predict movie ranks for just that user
u432_features = sparse.csr_matrix(user_indicator_features)[432]
u432_rankings = hybrid_model.predict_rank(user_features=u432_features,
                                          item_features=full_item_features)[0]

# Get internal IDs of User 432's top 10 recommendations
# These are sorted by item ID, not by rank
# This may contain items with which User 432 has already interacted
u432_top_ten_recs = numpy.where(u432_rankings <= 10)[0]
print("User 432 recommendations:")

## tensorrec_getting_started_hybrid.py
# Try concatenating the genres on to the indicator features for a hybrid recommender system
full_item_features = sparse.hstack([item_indicator_features, movie_genre_features])

print("Training hybrid recommender")
hybrid_model = tensorrec.TensorRec(
    n_components=5,
    loss_graph=tensorrec.loss_graphs.WMRBLossGraph()
)
hybrid_model.fit(interactions=sparse_train_ratings_4plus,
                 user_features=user_indicator_features,

## tensorrec_getting_started_contentbased.py
# Fit a content-based model using the genres as item features
print("Training content-based recommender")
content_model = tensorrec.TensorRec(
    n_components=n_genres,
    item_repr_graph=tensorrec.representation_graphs.FeaturePassThroughRepresentationGraph(),
    loss_graph=tensorrec.loss_graphs.WMRBLossGraph()
)
content_model.fit(interactions=sparse_train_ratings_4plus,
                  user_features=user_indicator_features,
                  item_features=movie_genre_features,

## tensorrec_getting_started_genres.py
# Map the MovieLens IDs to our internal IDs and keep track of the genres and titles
movie_genres_by_internal_id = {}
movie_titles_by_internal_id = {}
for row in raw_movie_metadata:
    row[0] = movielens_to_internal_item_ids[int(row[0])]  # Map to IDs
    row[2] = row[2].split('|')  # Split up the genres
    movie_genres_by_internal_id[row[0]] = row[2]
    movie_titles_by_internal_id[row[0]] = row[1]

# Look at an example movie metadata row

## tensorrec_getting_started_wmrb.py
# Let's try a new loss function: WMRB
print("Training collaborative filter with WMRB loss")
ranking_cf_model = tensorrec.TensorRec(n_components=5,
                                       loss_graph=tensorrec.loss_graphs.WMRBLossGraph())
ranking_cf_model.fit(interactions=sparse_train_ratings_4plus,
                     user_features=user_indicator_features,
                     item_features=item_indicator_features,
                     n_sampled_items=int(n_items * .01))

# Check the results of the WMRB MF CF model

## tensorrec_getting_started_mf_cf_results.py
# Create sets of train/test interactions that are only ratings >= 4.0
sparse_train_ratings_4plus = sparse_train_ratings.multiply(sparse_train_ratings >= 4.0)
sparse_test_ratings_4plus = sparse_test_ratings.multiply(sparse_test_ratings >= 4.0)


# This method consumes item ranks for each user and prints out recall@10 train/test metrics
def check_results(ranks):
    train_recall_at_10 = tensorrec.eval.recall_at_k(
        test_interactions=sparse_train_ratings_4plus,
        predicted_ranks=ranks,

## tensorrec_getting_started_mf_cf.py
# Construct indicator features for users and items
user_indicator_features = sparse.identity(n_users)
item_indicator_features = sparse.identity(n_items)

# Build a matrix factorization collaborative filter model
cf_model = tensorrec.TensorRec(n_components=5)

# Fit the collaborative filter model
print("Training collaborative filter")
cf_model.fit(interactions=sparse_train_ratings,

## tensorrec_getting_started_sparseinteractions.py
# This method converts a list of (user, item, rating, time) to a sparse matrix
def interactions_list_to_sparse_matrix(interactions):
    users_column, items_column, ratings_column, _ = zip(*interactions)
    return sparse.coo_matrix((ratings_column, (users_column, items_column)),
                             shape=(n_users, n_items))


# Create sparse matrices of interaction data
sparse_train_ratings = interactions_list_to_sparse_matrix(train_ratings)
sparse_test_ratings = interactions_list_to_sparse_matrix(test_ratings)

## tensorrec_getting_started_traintest.py
# Shuffle the ratings and split them in to train/test sets 80%/20%
random.shuffle(raw_ratings)  # Shuffles the list in-place
cutoff = int(.8 * len(raw_ratings))
train_ratings = raw_ratings[:cutoff]
test_ratings = raw_ratings[cutoff:]

## tensorrec_getting_started_loading.py
# Open and read in the ratings file
print('Loading ratings')
with open('ratings.csv', 'r') as ratings_file:
    ratings_file_reader = csv.reader(ratings_file)
    raw_ratings = list(ratings_file_reader)
    raw_ratings_header = raw_ratings.pop(0)

# Iterate through the input to map MovieLens IDs to new internal IDs
# The new internal IDs will be created by the defaultdict on insertion
movielens_to_internal_user_ids = defaultdict(lambda: len(movielens_to_internal_user_ids))
	# Pull user 432's features out of the user features matrix and predict movie ranks for just that user
	u432_features = sparse.csr_matrix(user_indicator_features)[432]
	u432_rankings = hybrid_model.predict_rank(user_features=u432_features,
	item_features=full_item_features)[0]

	# Get internal IDs of User 432's top 10 recommendations
	# These are sorted by item ID, not by rank
	# This may contain items with which User 432 has already interacted
	u432_top_ten_recs = numpy.where(u432_rankings <= 10)[0]
	print("User 432 recommendations:")
	# Try concatenating the genres on to the indicator features for a hybrid recommender system
	full_item_features = sparse.hstack([item_indicator_features, movie_genre_features])

	print("Training hybrid recommender")
	hybrid_model = tensorrec.TensorRec(
	n_components=5,
	loss_graph=tensorrec.loss_graphs.WMRBLossGraph()
	)
	hybrid_model.fit(interactions=sparse_train_ratings_4plus,
	user_features=user_indicator_features,
	# Fit a content-based model using the genres as item features
	print("Training content-based recommender")
	content_model = tensorrec.TensorRec(
	n_components=n_genres,
	item_repr_graph=tensorrec.representation_graphs.FeaturePassThroughRepresentationGraph(),
	loss_graph=tensorrec.loss_graphs.WMRBLossGraph()
	)
	content_model.fit(interactions=sparse_train_ratings_4plus,
	user_features=user_indicator_features,
	item_features=movie_genre_features,
	# Map the MovieLens IDs to our internal IDs and keep track of the genres and titles
	movie_genres_by_internal_id = {}
	movie_titles_by_internal_id = {}
	for row in raw_movie_metadata:
	row[0] = movielens_to_internal_item_ids[int(row[0])] # Map to IDs
	row[2] = row[2].split('\|') # Split up the genres
	movie_genres_by_internal_id[row[0]] = row[2]
	movie_titles_by_internal_id[row[0]] = row[1]

	# Look at an example movie metadata row
	# Let's try a new loss function: WMRB
	print("Training collaborative filter with WMRB loss")
	ranking_cf_model = tensorrec.TensorRec(n_components=5,
	loss_graph=tensorrec.loss_graphs.WMRBLossGraph())
	ranking_cf_model.fit(interactions=sparse_train_ratings_4plus,
	user_features=user_indicator_features,
	item_features=item_indicator_features,
	n_sampled_items=int(n_items * .01))

	# Check the results of the WMRB MF CF model
	# Create sets of train/test interactions that are only ratings >= 4.0
	sparse_train_ratings_4plus = sparse_train_ratings.multiply(sparse_train_ratings >= 4.0)
	sparse_test_ratings_4plus = sparse_test_ratings.multiply(sparse_test_ratings >= 4.0)


	# This method consumes item ranks for each user and prints out recall@10 train/test metrics
	def check_results(ranks):
	train_recall_at_10 = tensorrec.eval.recall_at_k(
	test_interactions=sparse_train_ratings_4plus,
	predicted_ranks=ranks,
	# Construct indicator features for users and items
	user_indicator_features = sparse.identity(n_users)
	item_indicator_features = sparse.identity(n_items)

	# Build a matrix factorization collaborative filter model
	cf_model = tensorrec.TensorRec(n_components=5)

	# Fit the collaborative filter model
	print("Training collaborative filter")
	cf_model.fit(interactions=sparse_train_ratings,
	# This method converts a list of (user, item, rating, time) to a sparse matrix
	def interactions_list_to_sparse_matrix(interactions):
	users_column, items_column, ratings_column, _ = zip(*interactions)
	return sparse.coo_matrix((ratings_column, (users_column, items_column)),
	shape=(n_users, n_items))


	# Create sparse matrices of interaction data
	sparse_train_ratings = interactions_list_to_sparse_matrix(train_ratings)
	sparse_test_ratings = interactions_list_to_sparse_matrix(test_ratings)
	# Shuffle the ratings and split them in to train/test sets 80%/20%
	random.shuffle(raw_ratings) # Shuffles the list in-place
	cutoff = int(.8 * len(raw_ratings))
	train_ratings = raw_ratings[:cutoff]
	test_ratings = raw_ratings[cutoff:]
	# Open and read in the ratings file
	print('Loading ratings')
	with open('ratings.csv', 'r') as ratings_file:
	ratings_file_reader = csv.reader(ratings_file)
	raw_ratings = list(ratings_file_reader)
	raw_ratings_header = raw_ratings.pop(0)

	# Iterate through the input to map MovieLens IDs to new internal IDs
	# The new internal IDs will be created by the defaultdict on insertion
	movielens_to_internal_user_ids = defaultdict(lambda: len(movielens_to_internal_user_ids))