Last active
January 4, 2021 04:19
-
-
Save juanarrivillaga/0ed8e599cbc8a8605abbe919df84c5fa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import functools | |
import timeit | |
import pandas as pd | |
import itertools | |
def flatten(nested): | |
result = [] | |
for sub in nested: | |
for x in sub: | |
result.append(x) | |
return result | |
def bad_flatten(nested): | |
functools.reduce(list.__add__, nested) | |
def itertools_flatten(nested): | |
return list(itertools.chain.from_iterable(nested)) | |
b = [[(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]] | |
sizes = range(1, 1000, 10) | |
flat_ts = [ | |
timeit.timeit("flatten(b)", | |
"from __main__ import flatten, b; b = {}*b".format(i), | |
number=100) | |
for i in sizes | |
] | |
bad_flat_ts = [ | |
timeit.timeit("bad_flatten(b)", | |
"from __main__ import bad_flatten, b; b = {}*b".format(i), | |
number=100) | |
for i in sizes | |
] | |
it_flat_ts = [ | |
timeit.timeit("itertools_flatten(b)", | |
"from __main__ import itertools_flatten, b; b = {}*b".format(i), | |
number=100) | |
for i in sizes | |
] | |
pd.DataFrame(dict(naive_for_loop=flat_ts, chain_from_iterable=it_flat_ts, reduce_with_plus=bad_flat_ts), index=sizes).plot() | |
##################### EDITED TO ADD OPTIMIZED FOR-LOOP VERSION ###################################################### | |
def flatten_o(nested): | |
result = [] | |
extend = result.extend | |
for sub in nested: | |
extend(sub) | |
return result | |
flato_ts = [ | |
timeit.timeit("flatten_o(b)", | |
"from __main__ import flatten_o, b; b = {}*b".format(i), | |
number=100) | |
for i in sizes | |
] | |
pd.DataFrame(dict(naive_for_loop=flat_ts, chain_from_iterable=it_flat_ts, reduce_with_plus=bad_flat_ts, optimized_for_loop=flato_ts), index=sizes).plot() |
Thanks for making this - really cool. Would you mind explaining how, as this massive performance difference is surprising. I tried to lookup in the cpython source where the __add__
method is implemented for the list
object, but am at a loss (new to looking this kind of digging stuff up). If you could take a look that'd be great.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The results: