Skip to content

Instantly share code, notes, and snippets.

@cpdean
Last active December 19, 2015 03:38
Show Gist options
  • Save cpdean/5891444 to your computer and use it in GitHub Desktop.
Save cpdean/5891444 to your computer and use it in GitHub Desktop.
Comparing the performance between two interleave implementations.

When you want to combine iterables, the common way you write out list comprehensions and list operations wastes a lot of memory. For an easy performance speed up, try switching to generators/iterators.

They're data structures that are computed lazily as their elements are requested. That way you can chain together list operations like combining, interleaving, and slicing without needing to store the sequence in memory.

You lose flexibility, since you can't use subscripts or slice syntax, but your gains from avoiding python memory allocation can be pretty huge.

import itertools
def __interleave(first, second):
"""combine to iterables and return a list with elements from
the two given, alternating"""
return reduce(lambda x, y: x + y, [list(i) for i in zip(first, second)])
def interleave(first, second):
"""combine two iterables lazily"""
# has about a 10x speedup, tests below
return itertools.chain(*itertools.izip(first, second))
def iter_assert(first, second):
for i, j in itertools.izip(first, second):
assert i == j
def __top(first, second):
return __interleave(first, second)[:100]
def top(first, second):
# has over a 50x speedup from the fully-hydrated method
return itertools.islice(interleave(first, second), 100)
import time
a = range(1000)
b = [i * i for i in a]
def test_single(f):
aa = a[:]
bb = b[:]
output = []
start = time.time()
woven = f(aa, bb)
for i in woven:
output.append(i)
return time.time() - start
def test(f, n=100):
return sum(test_single(f) for i in range(n)) / n
iter_assert(__interleave(a, b), interleave(a, b))
print "general function"
heavy = test(__interleave)
light = test(interleave)
print "list-based", heavy # 0.0037
print "generators", light # 0.0003
print "speedup factor", heavy / light # about 10x speedup
print "getting just top of the list"
iter_assert(__top(a, b), top(a, b))
heavy = test(__top)
light = test(top)
print "list-based", heavy # 0.003338
print "generators", light # 0.000058
print "speedup factor", heavy / light # over 50x speedup
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment