Skip to content

Instantly share code, notes, and snippets.

@Xonxt
Last active January 30, 2023 15:11
Show Gist options
  • Save Xonxt/e9258afd264b7b618f99488dd3a4a0e0 to your computer and use it in GitHub Desktop.
Save Xonxt/e9258afd264b7b618f99488dd3a4a0e0 to your computer and use it in GitHub Desktop.
Python map-reduce
def flat_map(iterable: list, map_function=None):
"""Applies a `map_function` to every element of the `iterable` input list
and then flattens all the results into a new list
Args:
iterable (list): An input list
map_function (callable, optional): A function that is applied to every element of the input list. Defaults to None.
Returns:
list: A new list
Example:
Given a list of strings, split every string into a list of word tokens:
texts = ['hello hoW are yOu', 'heLLo hello HOW Hello', 'aRE HELLO YOu Are']
flat_map(texts, lambda t: t.split(" "))
>>> ['hello', 'hoW', 'are', 'yOu', 'heLLo', 'hello', 'HOW', 'Hello', 'aRE', 'HELLO', 'YOu', 'Are']
"""
assert callable(map_function), "Expecting a Map function"
ys = []
for x in iterable:
ys.extend(map_function(x))
return ys
def flat_map_recursive(iterable: list, map_function=None, flat_list=None):
"""Recursively applies a `map_function` to every element of the input list, that can have nested lists
Args:
iterable (list): An input list
map_function (callable, optional): A function that is applied to every element of the input list. Defaults to None.
Returns:
list: A new list
Example:
texts = ["111 2222 33333 4444", ["555 5 555", "666 66 66666"], ["777 77777777"], ["888 88888 8888888", ["999 9999999", "00000"]]]
flat_map_recursive(texts, lambda t: t.split(" "))
>>> ['111', '2222', '33333', '4444', '555', '5', '555', '666', '66', '66666', '777', '77777777', '888', '88888', '8888888', '999', '9999999', '00000']
"""
assert callable(map_function), "Expecting a Map function"
if flat_list is None:
flat_list = []
if not isinstance(iterable, list):
flat_list.extend(map_function(iterable))
else:
for x in iterable:
flat_map_recursive(x, map_function, flat_list)
return flat_list
def map_reduce(iterable: list, map_function, reduce_function=None):
"""
Perform a Map-Reduce operation on an iterable (List)
Args:
iterable (List): An input list
map_function (callable): A lambda function to map the input list into pairs of <Key, Value>
reduce_function (callable, optional): Recuction lambda function to reduce the lists of Values for same Keys. Defaults to None.
Example:
Given a list of word, count how many times each word (in lower case) appears in the list
input_list = ['hello', 'hoW', 'are', 'heLLo', 'yOu', 'hello', 'HOW', 'Hello', 'aRE', 'HELLO', 'YOu', 'Are']
result = map_reduce(input_list,
map_function=lambda x: (x.lower(), 1),
reduce_function=lambda a, b: a + b)
result
>>> {'hello': 5, 'how': 2, 'are': 3, 'you': 2}
map_reduce(input_list, lambda x: (x.lower(), 1), None)
>>> {'hello': [1, 1, 1, 1, 1], 'how': [1, 1], 'are': [1, 1, 1], 'you': [1, 1]}
Returns:
Dict: A dictionary of <Key, ReducedValue> or <Key, [V1, V2, ..., Vi]> if no `reduce_function` given
"""
assert callable(map_function), "Expecting a Map function"
if reduce_function is not None:
assert callable(reduce_function), "Expecting a proper Reduce function"
mapped = list(map(map_function, iterable))
shuffled = {x[0]: list([y[1] for y in mapped if y[0] == x[0]]) for x in mapped}
if callable(reduce_function):
from functools import reduce
reduced = {k: reduce(reduce_function, vlist) for k,vlist in shuffled.items()}
return reduced
else:
return shuffled
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment