public

friends recommender with explanations

  • Download Gist
friends_recommender_exp.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
#-*-coding: utf-8 -*-
 
'''
 
This module represents the recommender system for recommending
new friends based on 'mutual friends'.
 
 
'''
__author__ = 'Marcel Caraciolo <caraciol@gmail.com>'
 
from mrjob.job import MRJob
 
def combinations(iterable, r):
"""
Implementation of itertools combinations method. Re-implemented here because
of import issues in Amazon Elastic MapReduce. Was just easier to do this than
bootstrap.
More info here: http://docs.python.org/library/itertools.html#itertools.combinations
 
Input/Output:
 
combinations('ABCD', 2) --> AB AC AD BC BD CD
combinations(range(4), 3) --> 012 013 023 123
"""
pool = tuple(iterable)
n = len(pool)
if r > n:
return
indices = range(r)
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
return
indices[i] += 1
for j in range(i + 1, r):
indices[j] = indices[j - 1] + 1
yield tuple(pool[i] for i in indices)
 
 
TOP_N = 10
 
 
class FriendsRecommender(MRJob):
 
def steps(self):
return [self.mr(self.map_input, self.reverse_index),
self.mr(None, self.count_number_of_friends),
self.mr(None, self.top_recommendations)]
 
def map_input(self, key, line):
'''
Compute the reversed index for each connection
between the source.
Input (source -> {“friend1”, “friend2”, “friend3”}):
marcel,jonas,maria,jose,amanda
 
Output {[friend1], source, 1, friend1,
{“friend1”, “friend2”, “friend3”}};
{[friend2], source, 1, friend2,
{“friend1”, “friend2”, “friend3”}};
 
 
'''
input = line.split(';')
user_id, item_ids = input[0], input[1:]
for i in range(len(item_ids)):
f1 = item_ids[i]
yield f1, (user_id, 1, f1, item_ids)
 
def reverse_index(self, key, values):
'''
Compute the cartesian product between among all
sources that shares common friends.
 
Input (friend1 -> (source, 1, friend1,
{“friend1”, “friend2”, “friend3”}};
 
marcel, jonas,maria,jose,amanda
 
Output {(jonas, maria), (1, jonas)}, {(maria, jose), (1, maria)},
{(jonas,amanda), (1, jonas)}, {(maria, amanda),(1, maria)},
{(jose, jonas), (1, jonas)}
'''
final_list = []
for value in values:
final_list.append(value)
 
for item1, item2 in combinations(final_list, 2):
f1, count1, exp1, f1_friends = item1
f2, count2, exp2, f2_friends = item2
if f1 not in f2_friends:
yield (f2, f1), (count1, exp1)
if f2 not in f1_friends:
yield (f1, f2), (count2, exp2)
 
def count_number_of_friends(self, key, values):
'''
Count the number of mutual friends.
 
Input {(source, recommended), [(count1,exp),
(count2,exp), ... (count n,exp)]}
 
{(jonas, maria), [(1,marcel),(1, marcel),(1, marcel)]},
{(jose, jonas), [(1,marcel),(1, marcel)]},
{(maria, amanda), [(1,marcel)]},
{(maria,jose), [(1, marcel)]},
{(jonas,amanda), [(1,marcel), (1,marcel), (1,marcel)]}
 
Output ({friend1, friend2} numberOfMutualFriends):
 
(jonas, marcel ) (1, [marcel]]
(fabiola, marcel) (1, [marcel])
(marcel, patricia) (1, [marcel])
(marcel, paula) (1, [marcel])
(carol, marcel) (2, [maria, jonas])
 
'''
 
user_id, possible_friend = key
mutual_friends_count = 0
mutual_friends = []
 
for value in values:
count, exp = value
mutual_friends_count += count
mutual_friends.append(exp)
 
yield (user_id), \
(possible_friend, mutual_friends_count, mutual_friends)
 
def top_recommendations(self, key, values):
'''
Get the TOP N recommendations for user.
 
Input ({user_id, [(numberOfMutualFriends, friend, [exp1,exp2]),
(numberOfMutualFriends2, friend,[exp1,exp2,expn])]}):
[{marcel, [(patricia, 1, [jonas]), (paula, 1, [flavia])]},
{carol, [(marcel, 2, [jonas, flavia])]},
{fabiola, [(marcel,1, [maria])]},
{jonas, [(marcel,1, [fabiola])]}]
 
Output ({user_id, [(numberOfMutualFriends, friend, [exp1, exp2]),
(numberOfMutualFriends2, friend, [exp1, exp2])]}):
Ex: Get the top 3 suggestions.
 
[{marcel, [(patricia, 1, [jonas]), (paula, 1, [flavia])]},
{carol, [(marcel, 2, [jonas, flavia])]},
{fabiola, [(marcel,1, [maria])]},
{jonas, [(marcel,1, [fabiola])]}]
'''
recommendations = []
for idx, (item, score, exp) in enumerate(values):
recommendations.append((item, score, exp))
 
yield key, sorted(recommendations, key=lambda k: -k[1])[:TOP_N]
 
 
if __name__ == '__main__':
FriendsRecommender.run()

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.