Skip to content

Instantly share code, notes, and snippets.

@pstoll
Last active October 29, 2017 20:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pstoll/ae73582763540051d321a4eb15304226 to your computer and use it in GitHub Desktop.
Save pstoll/ae73582763540051d321a4eb15304226 to your computer and use it in GitHub Desktop.
$ python ./test-seqs.py
timing 5 iterations with 1000 items:
first 5 ids: ['5aalphabetR84', '2aalphabet3aY', 'BmalphabetACg', 'JMalphabetunQ', 'Gqalphabet7le']
func dedup_orig: 0.00107002258301
func dedup_a1: 0.000247001647949
func dedup_a2: 0.000427961349487
timing 5 iterations with 10000 items:
first 5 ids: ['UPalphabetGuF', 'gqalphabet1rI', 'DGalphabetxbx', 'cHalphabetOYB', 'JYalphabetzW2']
func dedup_orig: 0.0133740901947
func dedup_a1: 0.00292015075684
func dedup_a2: 0.00455904006958
timing 5 iterations with 100000 items:
first 5 ids: ['NmalphabetLQj', 'uoalphabetd8l', 'ncalphabetkTm', 'tWalphabet77K', 'v6alphabetUBX']
func dedup_orig: 0.16601395607
func dedup_a1: 0.0943830013275
func dedup_a2: 0.103300094604
timing 5 iterations with 1000000 items:
first 5 ids: ['RCalphabetApz', 'ymalphabetxqn', '8OalphabetXmI', 'dlalphabetHma', 'YbalphabetCE8']
func dedup_orig: 2.21989107132
func dedup_a1: 1.22671294212
func dedup_a2: 1.61492204666
timing 5 iterations with 10000000 items:
first 5 ids: ['YPalphabetHQQ', 'LfalphabetUWA', 'S0alphabet9kW', 'HSalphabet8fM', 'lqalphabetzTV']
func dedup_orig: 22.2884390354
func dedup_a1: 13.5447628498
func dedup_a2: 17.1326370239
#!/usr/bin/env python
"""
Test the sequence dedeup function in the article at
https://blog.algorithmia.com/deep-dive-into-object-detection-with-open-images-using-tensorflow/
vs some other basic implementations
This code is placed in the public domain.
Author: Perry A Stoll
"""
import string
import random
import timeit
def id_generator(size=6, chars=string.ascii_letters + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
def make_ids(n,s='alphabet'):
# add some entropy at the start and end of string
# avoid possible odd effects of lots of similar prefix strings
ids = [ id_generator(2)+ s + id_generator(3) for x in range(n) ]
return ids
def wrapper(func, *args, **kwargs):
def wrapped():
#print("input len={}".format(len(args[0])))
out = func(*args, **kwargs)
#print("output len={}".format(len(out)))
return wrapped
def dedup_orig(seq):
"""Original convoluted list comprehension with explicit set membership check and addition"""
seen = set()
seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))]
def dedup_a1(seq):
"""Assuming you just need a new sequence, leave it as a set."""
return set(seq)
def dedup_a2(seq):
"""If you really need a list, add the extra step to conver the set to a list"""
return list(set(seq))
funcs = [dedup_orig, dedup_a1, dedup_a2]
def fname(f):
try:
return f.func_name
except:
return f.__name__
else:
return ''
def test():
for item_count in [10**3,10**4,10**5,10**6, 10**7]:
ids = make_ids(item_count)
iters = 5
print("timing {} iterations with {} items:".format(iters, item_count))
print("first 5 ids: {}".format(ids[:5]))
for func in funcs:
wrapped = wrapper(func, ids)
timing = timeit.timeit( wrapped, number=iters)
print("\tfunc {}: {}".format(fname(func),timing))
if __name__ == '__main__':
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment