Skip to content

Instantly share code, notes, and snippets.

@ptigas
Created December 7, 2011 21:25
Show Gist options
  • Save ptigas/1444735 to your computer and use it in GitHub Desktop.
Save ptigas/1444735 to your computer and use it in GitHub Desktop.
MapReduce example in python
'''
Playing with MapReduce in python
ref.
http://mikecvet.wordpress.com/2010/07/02/parallel-mapreduce-in-python/
'''
from multiprocessing import Pool
def generate_data(A = 90000, B = 20) :
return [ [ [j] for j in range(B)] for i in range(A)]
def M( I ):
return map(lambda x: len(x), I)
def R( I ):
return reduce( lambda x,y: x+y, I)
def chunks(l, n) :
for i in xrange(0, len(l), n):
yield l[i:i+n]
if __name__ == '__main__':
P = 8
pool = Pool(P)
I = generate_data()
data = list(chunks(I, len(I)/ P ))
inter = pool.map( M, data )
res = pool.map( R, inter)
print R( res )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment