Skip to content

Instantly share code, notes, and snippets.

@sente
Created April 8, 2014 00:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sente/10077852 to your computer and use it in GitHub Desktop.
Save sente/10077852 to your computer and use it in GitHub Desktop.
from collections import defaultdict
import operator
# Load data
f = open('data/user-brands.csv')
brand_users = defaultdict(list) # Given a brand, which users are followers
user_brands = defaultdict(list) # Given a user, which brands does the user follow
for line in f:
user, brand = line.strip().split(',', 1)
brand_users[brand].append(user)
user_brands[user].append(brand)
# Create similarity "matrix"
similarity = {} # Given two brands, what is the similarity score using Jaccard coefficient
brand_list = brand_users.keys()
for brand1, users1 in brand_users.items():
for brand2, users2 in brand_users.items():
if brand1 != brand2:
key = tuple(sorted([brand1, brand2])) # key is tuple of brands, sorted alphabetically
sim = len(set(users1).intersection(set(users2))) * 1.0 / len(set(users1).union(set(users2)))
similarity[key] = sim
# List all similarity scores
# print sorted(similarity.iteritems(), key=operator.itemgetter(1))
def get_similar_brands(brand):
"""Given a brand, return similar brands with scores"""
brand_scores = defaultdict(int)
for other_brand in brand_users.keys():
if brand == other_brand:
continue
key = tuple(sorted([brand, other_brand]))
sim = similarity.get(key, 0)
if sim > 0:
brand_scores[other_brand] += sim
return brand_scores
def get_brand_recommendations(user):
"""Given a user, return recommended brands with scores"""
all_brand_scores = defaultdict(int)
for brand in user_brands[user]:
brand_scores = get_similar_brands(brand)
for brand1, score in brand_scores.items():
if brand1 not in user_brands[user]:
all_brand_scores[brand1] += score
return sorted(all_brand_scores.iteritems(), key=operator.itemgetter(1), reverse=True)[:10]
user = '90217'
# user = '89112'
# user = '89116'
print "Current brands: {}".format(user_brands.get(user))
print "Recommendations:"
print get_brand_recommendations(user)
"""Optimizations:
1. Limit similarity scores to brands with at least __ followers
2. Filter recommendations via a score threshold
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment