Skip to content

Instantly share code, notes, and snippets.

@acdick
Last active July 1, 2019 02:23
Show Gist options
  • Save acdick/fb76dcde98cd622f7756e9af9d54d464 to your computer and use it in GitHub Desktop.
Save acdick/fb76dcde98cd622f7756e9af9d54d464 to your computer and use it in GitHub Desktop.
Creating a Content-Based Product Similarity Matrix for a Recommender System
import pandas as pd
# calculates the content-based product similarity matrix
# items is a Pandas dataframe containing all product details available for comparison
# returns the feature matrix and the correlation matrix
def product_similarity(items):
# drop multicollinear columns and columns not considered for similarity
items = items.drop(['Item', 'Style', 'Product', 'On Sale'], axis=1)
print(items.nunique())
# select continuous and categorical features for correlation
similarity_features = items[
['URL', 'Original', 'Discount', 'Gender', 'Made In', 'Category']]
similarity_features = similarity_features.set_index('URL')
# dummy encoding of categorical variables
similarity_features = pd.get_dummies(
similarity_features, columns=['Gender', 'Made In', 'Category'])
# calculate correlation matrix
similarity_matrix = similarity_features.T
similarity_matrix = similarity_matrix.corr(method='pearson')
return similarity_features, similarity_matrix
# requests a content-based recommendation
# similarity_matrix is the pre-computed correlation matrix
# top_favorite is the user-rated favorite product
# returns the i-th product most-correlated with top_favorite
def content_based_similarity(similarity_matrix, top_favorite, i):
# sort all product correlations related to top_favorite
recommendations = similarity_matrix[top_favorite].sort_values(ascending = False)
# drop auto-correlation of top_favorite
recommendations = recommendations.drop([top_favorite], axis=0).index
# return the product with the highest correlation
new_recommendation = recommendations[i]
return new_recommendation
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment