Skip to content

Instantly share code, notes, and snippets.

@jjmalina
Last active October 24, 2015 22:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jjmalina/3c122c3a21a4f8e23f67 to your computer and use it in GitHub Desktop.
Save jjmalina/3c122c3a21a4f8e23f67 to your computer and use it in GitHub Desktop.
Transducers in python
# -*- coding: utf-8 -*-
"""
transducers
~~~~~~~~~~~
Lazy evaluation of things over a stream. In this case we do a word count
"""
import types
import operator
from collections import defaultdict
def identity(function=lambda x: x):
def apply_(iterable):
for k, v in iterable:
yield k, v
return apply_
def map_(fn):
def apply_(iterable):
for key, value in iterable:
result = fn(key, value)
if isinstance(result, types.GeneratorType):
for k, v in result:
yield k, v
else:
yield result[0], result[1]
return apply_
def filter_(fn):
def apply_(iterable):
for key, value in iterable:
if fn(key, value):
yield key, value
return apply_
class Index(object):
def __init__(self):
self.keys = defaultdict(list)
def reduce_(fn):
index = Index()
def apply_(iterable):
for key, value in iterable:
index.keys[key].append(value)
grouped = sorted(index.keys.iteritems(), key=operator.itemgetter(0))
for key, value in grouped:
k_, v_ = fn(key, value)
yield k_, v_
index.keys = defaultdict(list)
return apply_
def sentences():
return [
"This module implements a number of iterator building blocks inspired by constructs from APL, Haskell, and SML. Each has been recast in a form suitable for Python.",
"The module standardizes a core set of fast, memory efficient tools that are useful by themselves or in combination. Together, they form an “iterator algebra” making it possible to construct specialized tools succinctly and efficiently in pure Python.",
"For instance, SML provides a tabulation tool: tabulate(f) which produces a sequence f(0), f(1), .... The same effect can be achieved in Python by combining imap() and count() to form imap(f, count()).",
"These tools and their built-in counterparts also work well with the high-speed functions in the operator module. For example, the multiplication operator can be mapped across two vectors to form an efficient dot-product: sum(imap(operator.mul, vector1, vector2)).",
]
def main():
pipeline = [
identity(),
map_(lambda key, sentence: ((word, word) for word in sentence.split(' '))),
filter_(lambda word, value: word.lower()[0] == 't'),
# we could just set the key to zero and be done in the next reduce step but
# if you want a word frequency then just comment out the last two steps
map_(lambda key, value: (key, 1)),
reduce_(lambda word, values: (word, sum(values))),
map_(lambda word, count: (0, count)),
reduce_(lambda key, values: (key, sum(values)))
]
iterator = enumerate(sentences())
for transformation in pipeline:
iterator = transformation(iterator)
results = list(iterator)
assert results == [(0, 22)]
print(results)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment