Skip to content

Instantly share code, notes, and snippets.



Last active Oct 12, 2018
What would you like to do?
Collection length checking before determining duplicate items
import timeit
'one', 'two', 'three', 'four', 'five', 'six',
'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve')
'one', 'two', 'three', 'four', 'five', 'six',
'seven', 'one', 'nine', 'two', 'eleven', 'three')
def check_length_first(collection):
if len(collection) != len(set(collection)):
return {x for x in collection if collection.count(x) > 1}
def immediate_duplicates(collection):
return {x for x in collection if collection.count(x) > 1}
def seen_set_duplicates(collection):
"""Builds a set of unique entries, returns items already contained. O(n)"""
seen = set()
return {x for x in collection if x in seen or seen.add(x)}
def call_latency(collection):
if __name__ == '__main__':
print 'Just the function calls: {}'.format(timeit.timeit(
setup='from __main__ import call_latency, NO_DUPLICATES'))
print '\nLength check first, no dupes: {}'.format(timeit.timeit(
setup='from __main__ import check_length_first, NO_DUPLICATES'))
print 'Length check first, dupes: {}'.format(timeit.timeit(
setup='from __main__ import check_length_first, WITH_DUPLICATES'))
print '\nNo length check, no dupes: {}'.format(timeit.timeit(
setup='from __main__ import immediate_duplicates, NO_DUPLICATES'))
print 'No Length check, dupes: {}'.format(timeit.timeit(
setup='from __main__ import immediate_duplicates, WITH_DUPLICATES'))
print '\nSeen-set based, no dupes: {}'.format(timeit.timeit(
setup='from __main__ import seen_set_duplicates, NO_DUPLICATES'))
print 'Seen-set based, dupes: {}'.format(timeit.timeit(
setup='from __main__ import seen_set_duplicates, WITH_DUPLICATES'))
# Example timings from a Core i5-6200U
# Just the function calls: 0.0743689537048
# Length check first, no dupes: 0.500941991806
# Length check first, dupes: 3.38291621208
# No length check, no dupes: 2.72766900063
# No Length check, dupes: 2.82286787033
# Seen-set based, no dupes: 1.60548996925
# Seen-set based, dupes: 1.44544911385
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.