Created
March 14, 2017 20:21
-
-
Save mrmoje/30bacf757e400ef28c56fdaecfb0b2a8 to your computer and use it in GitHub Desktop.
A sample PyPy vs CPython wager script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import cProfile | |
import random | |
import string | |
from datetime import datetime | |
def find_duplicates(seq): | |
""" | |
Given a sequence of integers, returns a list of all the ones that | |
occur more than once. | |
>>> find_duplicates([1, 1, 2, 3, 3]) | |
[1, 3] | |
""" | |
uniques = set() | |
duplicates = [None] * 13000 | |
i = 0 | |
for item in seq: | |
if item in uniques: | |
duplicates[i] = item | |
i += 1 | |
uniques.add(item) | |
return duplicates[:i] | |
if __name__ == '__main__': | |
print("Loading 5M msisdns... ") | |
msisdns = [int(line) for line in open('5m.csv')] | |
print("Finding duplicates...") | |
t1 = datetime.now() | |
duplicates = find_duplicates(msisdns) | |
t2 = datetime.now() | |
print("{0} duplicates found.".format(len(duplicates))) | |
print("{0} seconds elapsed.".format(t2 - t1)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment