-
-
Save douglasmiranda/4029461 to your computer and use it in GitHub Desktop.
Twitter Friends Recommender
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*-coding: utf-8 -*- | |
''' | |
This module represents the recommender system for recommending | |
new friends based on 'mutual friends'. | |
''' | |
__author__ = 'Marcel Caraciolo <caraciol@gmail.com>' | |
from mrjob.job import MRJob | |
def combinations(iterable, r): | |
""" | |
Implementation of itertools combinations method. Re-implemented here because | |
of import issues in Amazon Elastic MapReduce. Was just easier to do this than | |
bootstrap. | |
More info here: http://docs.python.org/library/itertools.html#itertools.combinations | |
Input/Output: | |
combinations('ABCD', 2) --> AB AC AD BC BD CD | |
combinations(range(4), 3) --> 012 013 023 123 | |
""" | |
pool = tuple(iterable) | |
n = len(pool) | |
if r > n: | |
return | |
indices = range(r) | |
yield tuple(pool[i] for i in indices) | |
while True: | |
for i in reversed(range(r)): | |
if indices[i] != i + n - r: | |
break | |
else: | |
return | |
indices[i] += 1 | |
for j in range(i + 1, r): | |
indices[j] = indices[j - 1] + 1 | |
yield tuple(pool[i] for i in indices) | |
TOP_N = 10 | |
class FriendsRecommender(MRJob): | |
def steps(self): | |
return [self.mr(self.map_input, self.reverse_index), | |
self.mr(None, self.count_number_of_friends), | |
self.mr(None, self.top_recommendations)] | |
def map_input(self, key, line): | |
''' | |
Compute the reversed index for each connection | |
between the source. | |
Input (source -> {“friend1”, “friend2”, “friend3”}): | |
marcel,jonas,maria,jose,amanda | |
Output {[friend1], source, 1, | |
{“friend1”, “friend2”, “friend3”}}; | |
{[friend2], source, 1, | |
{“friend1”, “friend2”, “friend3”}}; | |
''' | |
input = line.split(';') | |
user_id, item_ids = input[0], input[1:] | |
for i in range(len(item_ids)): | |
f1 = item_ids[i] | |
yield f1, (user_id, 1, item_ids) | |
def reverse_index(self, key, values): | |
''' | |
Compute the cartesian product between among all | |
sources that shares common friends. | |
Input (friend1 -> (source, 1, | |
{“friend1”, “friend2”, “friend3”}}; | |
marcel, jonas,maria,jose,amanda | |
Output {(jonas, maria), 1}, {(maria, jose), 1}, | |
{(jonas,amanda), 1}, {(maria, amanda),1}, | |
{(jose, jonas), 1} | |
''' | |
final_list = [] | |
for value in values: | |
final_list.append(value) | |
for item1, item2 in combinations(final_list, 2): | |
f1, count1, f1_friends = item1 | |
f2, count2, f2_friends = item2 | |
if f1 not in f2_friends: | |
yield (f2, f1), (count1) | |
if f2 not in f1_friends: | |
yield (f1, f2), (count2) | |
def count_number_of_friends(self, key, values): | |
''' | |
Count the number of mutual friends. | |
Input {(source, recommended), [count1, count2, ... count n]} | |
{(jonas, maria), [1,1,1]}, | |
{(jose, jonas), [1,1]}, | |
{(maria, amanda), [1]}, | |
{(maria,jose), [1]}, | |
{(jonas,amanda), [1, 1, 1]} | |
Output ({friend1, friend2} numberOfMutualFriends): | |
(jonas, marcel ) 1 | |
(fabiola, marcel) 1 | |
(marcel, patricia) 1 | |
(marcel, paula) 1 | |
(carol, marcel) 2 | |
''' | |
user_id, possible_friend = key | |
yield (user_id), \ | |
(possible_friend, sum(values)) | |
def top_recommendations(self, key, values): | |
''' | |
Get the TOP N recommendations for user. | |
Input ({user_id, [(numberOfMutualFriends, friend), | |
(numberOfMutualFriends2, friend)]}): | |
[{marcel, [(patricia, 1), (paula, 1)]}, | |
{carol, [(marcel, 2)]}, | |
{fabiola, [(marcel,1)]}, | |
{jonas, [(marcel,1)]}] | |
Output ({user_id, [(numberOfMutualFriends, friend), | |
(numberOfMutualFriends2, friend)]}): | |
Ex: Get the top 3 suggestions. | |
[{marcel, [(patricia, 1), (paula, 1)]}, | |
{carol, [(marcel, 2)]}, | |
{fabiola, [(marcel,1)]}, | |
{jonas, [(marcel,1)]}] | |
''' | |
recommendations = [] | |
for idx, (item, score) in enumerate(values): | |
recommendations.append((item, score)) | |
yield key, sorted(recommendations, key=lambda k: -k[1])[:TOP_N] | |
if __name__ == '__main__': | |
FriendsRecommender.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment