Last active
August 29, 2015 14:17
-
-
Save Cilyan/50b9ee3e2dad67bb8a6b to your computer and use it in GitHub Desktop.
Time of solutions on https://stackoverflow.com/questions/29044946/python-nested-for-loop-array-comparison-possibility-of-optimization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import timeit | |
import itertools | |
import operator | |
import random | |
import sys | |
import functools | |
import pprint | |
DEBUG = False | |
# For all tests, we suppose that "do logic" is printing the value of the duplicate into a file. | |
# For real tests, we use /dev/null as file so that it doesn't impact performance for | |
# disk rate reasons. | |
if DEBUG: | |
OUT = "test.log" | |
ITERS = 1 | |
NBDICTS = 5 | |
RAND = "abc" | |
else: | |
OUT = "/dev/null" | |
ITERS = 10000 | |
NBDICTS = 100 | |
RAND = "abcdefghijklmnopqrstuvwxyz" | |
set_of_pk_values = [] | |
for i in range(NBDICTS): | |
set_of_pk_values.append(dict( | |
someKey=random.choice(RAND), | |
someOtherKey1=1, | |
someOtherKey2="foo", | |
someOtherKey3=True, | |
someOtherKey4=None | |
)) | |
log = open(OUT, "w") | |
print(pprint.pformat(set_of_pk_values), file=log) | |
def solutionOP(): | |
for idx, val in enumerate(set_of_pk_values): | |
for idx_2, val_2 in enumerate(set_of_pk_values): | |
if (val['someKey'] == val_2['someKey'] and idx != idx_2): | |
print("duplicate", val['someKey'], file=log) # Do some logic | |
def solutionMarcusMueller(): | |
getter = operator.itemgetter("someKey") | |
#hits = [ ( getter(first_dic), list(filter(lambda dic: getter(dic) == getter(first_dic), set_of_pk_values[idx:-1]) )) for idx, first_dic in enumerate(set_of_pk_values[:-1])] | |
hits = [ ( getter(first_dic), filter(lambda dic: getter(dic) == getter(first_dic), set_of_pk_values[idx:-1]) ) for idx, first_dic in enumerate(set_of_pk_values[:-1])] | |
# How do I use hits now? | |
# Avoid use of for as it was the goal of this answer | |
def print_dup(ignore, item): | |
print("duplicate", item['someKey'], file=log) # Do some logic | |
functools.reduce( # Use reduce to consume iterators | |
print_dup, | |
itertools.chain.from_iterable(map(operator.itemgetter(1), hits)), | |
"" | |
) | |
def solutionArtOfWarfare(): | |
# I did not use any kind of enumeration because not needed. | |
# enumerate returns an iterator, so using it twice doesn't work as it | |
# will consume the items (and is anyway not subscriptable), unless | |
# creating a list which would be even worse as enumerating twice. | |
for idx, val in enumerate(set_of_pk_values): | |
for val_2 in set_of_pk_values[idx+1:]: # Note the slice and lack of enumerate | |
if (val['someKey'] == val_2['someKey']): | |
print("duplicate", val['someKey'], file=log) # Do some logic | |
def solutionSiebz0r(): | |
getter = operator.itemgetter('someKey') | |
for a, b in itertools.combinations(set_of_pk_values, 2): | |
if getter(a) == getter(b): | |
print("duplicate", a['someKey'], file=log) # Do some logic | |
def solutionFullFunctional(): | |
getter = operator.itemgetter('someKey') | |
def print_dup(ignore, item): | |
print("duplicate", item['someKey'], file=log) | |
functools.reduce( # Use reduce to consume iterators | |
print_dup, | |
map( | |
operator.itemgetter(0), | |
filter( | |
lambda x: getter(x[0]) == getter(x[1]), | |
itertools.combinations(set_of_pk_values, 2) | |
) | |
), | |
"" | |
) | |
print("---- solutionOP ----", file=log) | |
print( | |
"solutionOP:", | |
timeit.timeit( | |
'solutionOP()', | |
setup="from __main__ import solutionOP", | |
number=ITERS | |
) | |
) | |
print("---- solutionMarcusMueller ----", file=log) | |
print( | |
"solutionMarcusMueller:", | |
timeit.timeit( | |
'solutionMarcusMueller()', | |
setup="from __main__ import solutionMarcusMueller", | |
number=ITERS | |
) | |
) | |
print("---- solutionArtOfWarfare ----", file=log) | |
print( | |
"solutionArtOfWarfare:", | |
timeit.timeit( | |
'solutionArtOfWarfare()', | |
setup="from __main__ import solutionArtOfWarfare", | |
number=ITERS | |
) | |
) | |
print("---- solutionSiebz0r ----", file=log) | |
print( | |
"solutionSiebz0r:", | |
timeit.timeit( | |
'solutionSiebz0r()', | |
setup="from __main__ import solutionSiebz0r", | |
number=ITERS | |
) | |
) | |
print("---- solutionFullFunctional ----", file=log) | |
print( | |
"solutionFullFunctional:", | |
timeit.timeit( | |
'solutionFullFunctional()', | |
setup="from __main__ import solutionFullFunctional", | |
number=ITERS | |
) | |
) | |
log.close() | |
# solutionOP: 19.336026290000518 | |
# solutionMarcusMueller: 25.26055055299912 (Note: doesn't work, any comment welcome) | |
# solutionArtOfWarfare: 8.554233753000517 | |
# solutionSiebz0r: 13.361154315000022 | |
# solutionFullFunctional: 23.725179624001612 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[{'someKey': 'b', | |
'someOtherKey1': 1, | |
'someOtherKey2': 'foo', | |
'someOtherKey3': True, | |
'someOtherKey4': None}, | |
{'someKey': 'c', | |
'someOtherKey1': 1, | |
'someOtherKey2': 'foo', | |
'someOtherKey3': True, | |
'someOtherKey4': None}, | |
{'someKey': 'a', | |
'someOtherKey1': 1, | |
'someOtherKey2': 'foo', | |
'someOtherKey3': True, | |
'someOtherKey4': None}, | |
{'someKey': 'c', | |
'someOtherKey1': 1, | |
'someOtherKey2': 'foo', | |
'someOtherKey3': True, | |
'someOtherKey4': None}, | |
{'someKey': 'a', | |
'someOtherKey1': 1, | |
'someOtherKey2': 'foo', | |
'someOtherKey3': True, | |
'someOtherKey4': None}] | |
---- solutionOP ---- | |
duplicate c | |
duplicate a | |
duplicate c | |
duplicate a | |
---- solutionMarcusMueller ---- | |
duplicate c | |
duplicate c | |
duplicate c | |
duplicate c | |
duplicate c | |
duplicate c | |
---- solutionArtOfWarfare ---- | |
duplicate c | |
duplicate a | |
---- solutionSiebz0r ---- | |
duplicate c | |
duplicate a | |
---- solutionFullFunctional ---- | |
duplicate c | |
duplicate a |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment