Instantly share code, notes, and snippets.

# marcelcaraciolo/friends_recommender_exp.py Created Oct 11, 2012

friends recommender with explanations
 #-*-coding: utf-8 -*- ''' This module represents the recommender system for recommending new friends based on 'mutual friends'. ''' __author__ = 'Marcel Caraciolo ' from mrjob.job import MRJob def combinations(iterable, r): """ Implementation of itertools combinations method. Re-implemented here because of import issues in Amazon Elastic MapReduce. Was just easier to do this than bootstrap. More info here: http://docs.python.org/library/itertools.html#itertools.combinations Input/Output: combinations('ABCD', 2) --> AB AC AD BC BD CD combinations(range(4), 3) --> 012 013 023 123 """ pool = tuple(iterable) n = len(pool) if r > n: return indices = range(r) yield tuple(pool[i] for i in indices) while True: for i in reversed(range(r)): if indices[i] != i + n - r: break else: return indices[i] += 1 for j in range(i + 1, r): indices[j] = indices[j - 1] + 1 yield tuple(pool[i] for i in indices) TOP_N = 10 class FriendsRecommender(MRJob): def steps(self): return [self.mr(self.map_input, self.reverse_index), self.mr(None, self.count_number_of_friends), self.mr(None, self.top_recommendations)] def map_input(self, key, line): ''' Compute the reversed index for each connection between the source. Input (source -> {“friend1”, “friend2”, “friend3”}): marcel,jonas,maria,jose,amanda Output {[friend1], source, 1, friend1, {“friend1”, “friend2”, “friend3”}}; {[friend2], source, 1, friend2, {“friend1”, “friend2”, “friend3”}}; ''' input = line.split(';') user_id, item_ids = input[0], input[1:] for i in range(len(item_ids)): f1 = item_ids[i] yield f1, (user_id, 1, f1, item_ids) def reverse_index(self, key, values): ''' Compute the cartesian product between among all sources that shares common friends. Input (friend1 -> (source, 1, friend1, {“friend1”, “friend2”, “friend3”}}; marcel, jonas,maria,jose,amanda Output {(jonas, maria), (1, jonas)}, {(maria, jose), (1, maria)}, {(jonas,amanda), (1, jonas)}, {(maria, amanda),(1, maria)}, {(jose, jonas), (1, jonas)} ''' final_list = [] for value in values: final_list.append(value) for item1, item2 in combinations(final_list, 2): f1, count1, exp1, f1_friends = item1 f2, count2, exp2, f2_friends = item2 if f1 not in f2_friends: yield (f2, f1), (count1, exp1) if f2 not in f1_friends: yield (f1, f2), (count2, exp2) def count_number_of_friends(self, key, values): ''' Count the number of mutual friends. Input {(source, recommended), [(count1,exp), (count2,exp), ... (count n,exp)]} {(jonas, maria), [(1,marcel),(1, marcel),(1, marcel)]}, {(jose, jonas), [(1,marcel),(1, marcel)]}, {(maria, amanda), [(1,marcel)]}, {(maria,jose), [(1, marcel)]}, {(jonas,amanda), [(1,marcel), (1,marcel), (1,marcel)]} Output ({friend1, friend2} numberOfMutualFriends): (jonas, marcel ) (1, [marcel]] (fabiola, marcel) (1, [marcel]) (marcel, patricia) (1, [marcel]) (marcel, paula) (1, [marcel]) (carol, marcel) (2, [maria, jonas]) ''' user_id, possible_friend = key mutual_friends_count = 0 mutual_friends = [] for value in values: count, exp = value mutual_friends_count += count mutual_friends.append(exp) yield (user_id), \ (possible_friend, mutual_friends_count, mutual_friends) def top_recommendations(self, key, values): ''' Get the TOP N recommendations for user. Input ({user_id, [(numberOfMutualFriends, friend, [exp1,exp2]), (numberOfMutualFriends2, friend,[exp1,exp2,expn])]}): [{marcel, [(patricia, 1, [jonas]), (paula, 1, [flavia])]}, {carol, [(marcel, 2, [jonas, flavia])]}, {fabiola, [(marcel,1, [maria])]}, {jonas, [(marcel,1, [fabiola])]}] Output ({user_id, [(numberOfMutualFriends, friend, [exp1, exp2]), (numberOfMutualFriends2, friend, [exp1, exp2])]}): Ex: Get the top 3 suggestions. [{marcel, [(patricia, 1, [jonas]), (paula, 1, [flavia])]}, {carol, [(marcel, 2, [jonas, flavia])]}, {fabiola, [(marcel,1, [maria])]}, {jonas, [(marcel,1, [fabiola])]}] ''' recommendations = [] for idx, (item, score, exp) in enumerate(values): recommendations.append((item, score, exp)) yield key, sorted(recommendations, key=lambda k: -k[1])[:TOP_N] if __name__ == '__main__': FriendsRecommender.run()