public

Twitter Friends Recommender

  • Download Gist
friends_recommender.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
#-*-coding: utf-8 -*-
 
'''
 
This module represents the recommender system for recommending
new friends based on 'mutual friends'.
 
 
'''
__author__ = 'Marcel Caraciolo <caraciol@gmail.com>'
 
from mrjob.job import MRJob
 
def combinations(iterable, r):
"""
Implementation of itertools combinations method. Re-implemented here because
of import issues in Amazon Elastic MapReduce. Was just easier to do this than
bootstrap.
More info here: http://docs.python.org/library/itertools.html#itertools.combinations
 
Input/Output:
 
combinations('ABCD', 2) --> AB AC AD BC BD CD
combinations(range(4), 3) --> 012 013 023 123
"""
pool = tuple(iterable)
n = len(pool)
if r > n:
return
indices = range(r)
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
return
indices[i] += 1
for j in range(i + 1, r):
indices[j] = indices[j - 1] + 1
yield tuple(pool[i] for i in indices)
 
 
TOP_N = 10
 
 
class FriendsRecommender(MRJob):
 
def steps(self):
return [self.mr(self.map_input, self.reverse_index),
self.mr(None, self.count_number_of_friends),
self.mr(None, self.top_recommendations)]
 
def map_input(self, key, line):
'''
Compute the reversed index for each connection
between the source.
Input (source -> {“friend1”, “friend2”, “friend3”}):
marcel,jonas,maria,jose,amanda
 
Output {[friend1], source, 1,
{“friend1”, “friend2”, “friend3”}};
{[friend2], source, 1,
{“friend1”, “friend2”, “friend3”}};
 
 
'''
input = line.split(';')
user_id, item_ids = input[0], input[1:]
for i in range(len(item_ids)):
f1 = item_ids[i]
yield f1, (user_id, 1, item_ids)
 
def reverse_index(self, key, values):
'''
Compute the cartesian product between among all
sources that shares common friends.
 
Input (friend1 -> (source, 1,
{“friend1”, “friend2”, “friend3”}};
 
marcel, jonas,maria,jose,amanda
 
Output {(jonas, maria), 1}, {(maria, jose), 1},
{(jonas,amanda), 1}, {(maria, amanda),1},
{(jose, jonas), 1}
 
'''
final_list = []
for value in values:
final_list.append(value)
 
for item1, item2 in combinations(final_list, 2):
f1, count1, f1_friends = item1
f2, count2, f2_friends = item2
if f1 not in f2_friends:
yield (f2, f1), (count1)
if f2 not in f1_friends:
yield (f1, f2), (count2)
 
def count_number_of_friends(self, key, values):
'''
Count the number of mutual friends.
 
Input {(source, recommended), [count1, count2, ... count n]}
 
{(jonas, maria), [1,1,1]},
{(jose, jonas), [1,1]},
{(maria, amanda), [1]},
{(maria,jose), [1]},
{(jonas,amanda), [1, 1, 1]}
 
Output ({friend1, friend2} numberOfMutualFriends):
 
(jonas, marcel ) 1
(fabiola, marcel) 1
(marcel, patricia) 1
(marcel, paula) 1
(carol, marcel) 2
'''
 
user_id, possible_friend = key
 
yield (user_id), \
(possible_friend, sum(values))
 
def top_recommendations(self, key, values):
'''
Get the TOP N recommendations for user.
 
Input ({user_id, [(numberOfMutualFriends, friend),
(numberOfMutualFriends2, friend)]}):
[{marcel, [(patricia, 1), (paula, 1)]},
{carol, [(marcel, 2)]},
{fabiola, [(marcel,1)]},
{jonas, [(marcel,1)]}]
 
Output ({user_id, [(numberOfMutualFriends, friend),
(numberOfMutualFriends2, friend)]}):
Ex: Get the top 3 suggestions.
 
[{marcel, [(patricia, 1), (paula, 1)]},
{carol, [(marcel, 2)]},
{fabiola, [(marcel,1)]},
{jonas, [(marcel,1)]}]
 
'''
recommendations = []
for idx, (item, score) in enumerate(values):
recommendations.append((item, score))
 
yield key, sorted(recommendations, key=lambda k: -k[1])[:TOP_N]
 
 
if __name__ == '__main__':
FriendsRecommender.run()

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.