Skip to content

Instantly share code, notes, and snippets.

@storborg
Created April 15, 2009 04:43
Show Gist options
  • Save storborg/95604 to your computer and use it in GitHub Desktop.
Save storborg/95604 to your computer and use it in GitHub Desktop.
"""
what's the fastest way to test for order-insensitive equality of two lists of
integers?
running timing tests for compare_sets:
comparing equal shuffled lists takes 1.289612
comparing equal unshuffled lists takes 1.226031
comparing unequal lists takes 0.660738
running timing tests for compare_sort:
comparing equal shuffled lists takes 2.248559
comparing equal unshuffled lists takes 2.108789
comparing unequal lists takes 1.158039
running timing tests for compare_iter_set_dumb:
comparing equal shuffled lists takes 0.893579
comparing equal unshuffled lists takes 0.900293
comparing unequal lists takes 0.566193
running timing tests for compare_iter_set_smart:
comparing equal shuffled lists takes 0.916000
comparing equal unshuffled lists takes 0.923753
comparing unequal lists takes 0.056960
"""
__author__ = 'scott torborg (scotttorborg.com)'
import time, random
def compare_sort(foo, bar):
return sorted(foo) == sorted(bar)
def compare_sets(foo, bar):
return set(foo) == set(bar)
def compare_iter_list(foo, bar):
# don't even bother testing this, it's really really slow
# (and this impl is incorrect)
for el in bar:
if not el in foo:
return False
return True
def compare_iter_set_dumb(foo, bar):
# can return incorrect result for len(foo) > len(bar)
f = set(foo)
for el in bar:
if not el in f:
return False
return True
def compare_iter_set_smart(foo, bar):
# make sure foo is the shorter list
if len(foo) > len(bar):
foo, bar = bar, foo
f = set(foo)
for el in bar:
if not el in f:
return False
return True
def time_compare(func, num_trials=10000):
print "running timing tests for %s:" % func.__name__
def randlist(len=5):
return [random.randint(0, 65535) for _ in range(len)]
# Generate two random lists.
foo = randlist(344)
bar = randlist(54)
# Generate a third which is a copy of the first.
baz = list(foo)
# Generate a fourth which is a shuffled version of the first.
quux = list(foo)
random.shuffle(quux)
# Some quick tests.
assert foo != quux, "order-sensitive comp fails"
assert func(foo, quux), "order-insensitive comp fails"
assert not func(foo, bar), "order-insensitive comp returns false positive"
start = time.time()
for i in range(num_trials):
func(foo, quux)
print " comparing equal shuffled lists takes %f" % (time.time() - start)
start = time.time()
for i in range(num_trials):
func(foo, baz)
print " comparing equal unshuffled lists takes %f" % (time.time() - start)
start = time.time()
for i in range(num_trials):
func(foo, bar)
print " comparing unequal lists takes %f" % (time.time() - start)
time_compare(compare_sets)
time_compare(compare_sort)
time_compare(compare_iter_set_dumb)
time_compare(compare_iter_set_smart)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment