Skip to content

Instantly share code, notes, and snippets.

@Cilyan
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Cilyan/50b9ee3e2dad67bb8a6b to your computer and use it in GitHub Desktop.
Save Cilyan/50b9ee3e2dad67bb8a6b to your computer and use it in GitHub Desktop.
import timeit
import itertools
import operator
import random
import sys
import functools
import pprint
DEBUG = False
# For all tests, we suppose that "do logic" is printing the value of the duplicate into a file.
# For real tests, we use /dev/null as file so that it doesn't impact performance for
# disk rate reasons.
if DEBUG:
OUT = "test.log"
ITERS = 1
NBDICTS = 5
RAND = "abc"
else:
OUT = "/dev/null"
ITERS = 10000
NBDICTS = 100
RAND = "abcdefghijklmnopqrstuvwxyz"
set_of_pk_values = []
for i in range(NBDICTS):
set_of_pk_values.append(dict(
someKey=random.choice(RAND),
someOtherKey1=1,
someOtherKey2="foo",
someOtherKey3=True,
someOtherKey4=None
))
log = open(OUT, "w")
print(pprint.pformat(set_of_pk_values), file=log)
def solutionOP():
for idx, val in enumerate(set_of_pk_values):
for idx_2, val_2 in enumerate(set_of_pk_values):
if (val['someKey'] == val_2['someKey'] and idx != idx_2):
print("duplicate", val['someKey'], file=log) # Do some logic
def solutionMarcusMueller():
getter = operator.itemgetter("someKey")
#hits = [ ( getter(first_dic), list(filter(lambda dic: getter(dic) == getter(first_dic), set_of_pk_values[idx:-1]) )) for idx, first_dic in enumerate(set_of_pk_values[:-1])]
hits = [ ( getter(first_dic), filter(lambda dic: getter(dic) == getter(first_dic), set_of_pk_values[idx:-1]) ) for idx, first_dic in enumerate(set_of_pk_values[:-1])]
# How do I use hits now?
# Avoid use of for as it was the goal of this answer
def print_dup(ignore, item):
print("duplicate", item['someKey'], file=log) # Do some logic
functools.reduce( # Use reduce to consume iterators
print_dup,
itertools.chain.from_iterable(map(operator.itemgetter(1), hits)),
""
)
def solutionArtOfWarfare():
# I did not use any kind of enumeration because not needed.
# enumerate returns an iterator, so using it twice doesn't work as it
# will consume the items (and is anyway not subscriptable), unless
# creating a list which would be even worse as enumerating twice.
for idx, val in enumerate(set_of_pk_values):
for val_2 in set_of_pk_values[idx+1:]: # Note the slice and lack of enumerate
if (val['someKey'] == val_2['someKey']):
print("duplicate", val['someKey'], file=log) # Do some logic
def solutionSiebz0r():
getter = operator.itemgetter('someKey')
for a, b in itertools.combinations(set_of_pk_values, 2):
if getter(a) == getter(b):
print("duplicate", a['someKey'], file=log) # Do some logic
def solutionFullFunctional():
getter = operator.itemgetter('someKey')
def print_dup(ignore, item):
print("duplicate", item['someKey'], file=log)
functools.reduce( # Use reduce to consume iterators
print_dup,
map(
operator.itemgetter(0),
filter(
lambda x: getter(x[0]) == getter(x[1]),
itertools.combinations(set_of_pk_values, 2)
)
),
""
)
print("---- solutionOP ----", file=log)
print(
"solutionOP:",
timeit.timeit(
'solutionOP()',
setup="from __main__ import solutionOP",
number=ITERS
)
)
print("---- solutionMarcusMueller ----", file=log)
print(
"solutionMarcusMueller:",
timeit.timeit(
'solutionMarcusMueller()',
setup="from __main__ import solutionMarcusMueller",
number=ITERS
)
)
print("---- solutionArtOfWarfare ----", file=log)
print(
"solutionArtOfWarfare:",
timeit.timeit(
'solutionArtOfWarfare()',
setup="from __main__ import solutionArtOfWarfare",
number=ITERS
)
)
print("---- solutionSiebz0r ----", file=log)
print(
"solutionSiebz0r:",
timeit.timeit(
'solutionSiebz0r()',
setup="from __main__ import solutionSiebz0r",
number=ITERS
)
)
print("---- solutionFullFunctional ----", file=log)
print(
"solutionFullFunctional:",
timeit.timeit(
'solutionFullFunctional()',
setup="from __main__ import solutionFullFunctional",
number=ITERS
)
)
log.close()
# solutionOP: 19.336026290000518
# solutionMarcusMueller: 25.26055055299912 (Note: doesn't work, any comment welcome)
# solutionArtOfWarfare: 8.554233753000517
# solutionSiebz0r: 13.361154315000022
# solutionFullFunctional: 23.725179624001612
[{'someKey': 'b',
'someOtherKey1': 1,
'someOtherKey2': 'foo',
'someOtherKey3': True,
'someOtherKey4': None},
{'someKey': 'c',
'someOtherKey1': 1,
'someOtherKey2': 'foo',
'someOtherKey3': True,
'someOtherKey4': None},
{'someKey': 'a',
'someOtherKey1': 1,
'someOtherKey2': 'foo',
'someOtherKey3': True,
'someOtherKey4': None},
{'someKey': 'c',
'someOtherKey1': 1,
'someOtherKey2': 'foo',
'someOtherKey3': True,
'someOtherKey4': None},
{'someKey': 'a',
'someOtherKey1': 1,
'someOtherKey2': 'foo',
'someOtherKey3': True,
'someOtherKey4': None}]
---- solutionOP ----
duplicate c
duplicate a
duplicate c
duplicate a
---- solutionMarcusMueller ----
duplicate c
duplicate c
duplicate c
duplicate c
duplicate c
duplicate c
---- solutionArtOfWarfare ----
duplicate c
duplicate a
---- solutionSiebz0r ----
duplicate c
duplicate a
---- solutionFullFunctional ----
duplicate c
duplicate a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment