Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Class that implements a complete MapReduce in Python.
from itertools import groupby
from functools import reduce as functools_reduce
from operator import itemgetter
class MapReduce(object):
"""
Class that emulates the behaviour of an actual map-reduce
infrastructure.
"""
def map(self, pairs: dict, map_function):
return map(map_function, pairs.keys(), pairs.values())
def shuffle(self, mapper):
for newpair in groupby(
sorted(mapper,
key=itemgetter(0)),
key=itemgetter(0)):
yield newpair
def reduce(self, pairs, reduce_function):
for pair in pairs:
yield pair[0], functools_reduce(
reduce_function, pair[1], [])
def run(self, pairs, map_function, reduce_function):
for result in self.reduce(self.shuffle(
self.map(pairs, map_function)),
reduce_function):
yield result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment