Created
April 17, 2019 20:59
-
-
Save phette23/aa1a1c49a24d83bd6fc7ad9600785531 to your computer and use it in GitHub Desktop.
trying out a few methods of testing for inclusion in a large list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
I tested a few options for testing for inclusion in a very large (millions of items) set. Results if you're curious: | |
¿ python3 heapq-test.py | |
> List took 71.93408012390137s time | |
> Heap took 160.27557826042175s time | |
> Sorted Containers took 0.004794120788574219s time | |
sortedcontainers is _real_ fast. | |
""" | |
from heapq import * | |
import random | |
import time | |
from sortedcontainers import SortedList | |
heap = [] | |
list = [] | |
with open('dupes.txt', 'r') as file: | |
for line in file: | |
heappush(heap, line) | |
list.append(line) | |
sort = SortedList(list) | |
# pick out 1000 random items from the file | |
randoms = [] | |
while len(randoms) < 1001: | |
randoms.append(random.choice(list)) | |
start_time = time.time() | |
for r in randoms: | |
if r in list: | |
pass | |
print('List took {}s time'.format(time.time() - start_time)) | |
start_time = time.time() | |
for r in randoms: | |
if r in heap: | |
pass | |
print('Heap took {}s time'.format(time.time() - start_time)) | |
start_time = time.time() | |
for r in randoms: | |
if r in sort: | |
pass | |
print('Sorted Containers took {}s time'.format(time.time() - start_time)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment