Skip to content

Instantly share code, notes, and snippets.

@pims
Created September 9, 2009 22:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pims/184137 to your computer and use it in GitHub Desktop.
Save pims/184137 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# encoding: utf-8
"""
Example of friendships computing for mapreduce
inspired by this post : http://stevekrenzel.com/finding-friends-with-mapreduce
"""
__author__ = "Tim Bart"
__copyright__ = "No copyright"
__credits__ = ["Steve Krenzel for the inspiration", "Michael Nielsen for the MapReduce implementation"]
__license__ = "MIT"
from operator import itemgetter
import itertools
def map_reduce(i,mapper,reducer):
"""
Defines a single function, map_reduce, which takes an input
dictionary i and applies the user-defined function mapper to each
(input_key,input_value) pair, producing a list of intermediate
keys and intermediate values. Repeated intermediate keys then
have their values grouped into a list, and the user-defined
function reducer is applied to the intermediate key and list of
intermediate values. The results are returned as a list.
All rights reserved to Michael Nielsen
http://michaelnielsen.org/blog/write-your-first-mapreduce-program-in-20-minutes/
"""
intermediate = []
for (key,value) in i.items():
intermediate.extend(mapper(key,value))
groups = {}
for key, group in itertools.groupby(sorted(intermediate),lambda x: x[0]):
groups[key] = list([y for x, y in group])
return [reducer(intermediate_key,groups[intermediate_key]) for intermediate_key in groups]
def sortedKey(couple):
"""sorts list of two strings and returns concatenated string"""
couple.sort()
return ''.join(couple)
def cmpByKey(a,b):
"""compare two items by their first key"""
return cmp(a[0],b[0])
def mapper(input_key,input_value):
"""
split input in multiple lines and returns a tuple (key,value)
ex: ("AB",[C,D,E])
"""
res = []
for line in input_value.split('\n'):
person = line.split('|')[0]
friends = line.split('|')[1].split(';')
for friend in friends:
key = sortedKey([person,friend])
value = friends
res.append((key,value))
return res
def intersect(lists):
""" return list of intersections"""
intersection = []
if len(lists) > 1:
for f in lists[1]:
if f in lists[0]:
intersection.append(f)
return intersection
def reducer(intermediate_key,intermediate_value_list):
return (intermediate_key,intersect(intermediate_value_list))
def main():
"""run mapreduce job and outputs sorted results"""
"""
friends.txt should contain something like:
A|B;C;D
B|A;C;D;E
C|A;B;D;E
D|A;B;C;E
E|B;C;D
"""
i = {}
i["friends"] = open('friends.txt').read()
friendships = map_reduce(i,mapper,reducer)
friendships.sort(cmpByKey)
for friendship in friendships:
print "%s & %s have %s as common friends " % (friendship[0][0],friendship[0][1],friendship[1])
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment