Skip to content

Instantly share code, notes, and snippets.

@mrmoje
Created March 14, 2017 20:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrmoje/30bacf757e400ef28c56fdaecfb0b2a8 to your computer and use it in GitHub Desktop.
Save mrmoje/30bacf757e400ef28c56fdaecfb0b2a8 to your computer and use it in GitHub Desktop.
A sample PyPy vs CPython wager script
from __future__ import print_function
import cProfile
import random
import string
from datetime import datetime
def find_duplicates(seq):
"""
Given a sequence of integers, returns a list of all the ones that
occur more than once.
>>> find_duplicates([1, 1, 2, 3, 3])
[1, 3]
"""
uniques = set()
duplicates = [None] * 13000
i = 0
for item in seq:
if item in uniques:
duplicates[i] = item
i += 1
uniques.add(item)
return duplicates[:i]
if __name__ == '__main__':
print("Loading 5M msisdns... ")
msisdns = [int(line) for line in open('5m.csv')]
print("Finding duplicates...")
t1 = datetime.now()
duplicates = find_duplicates(msisdns)
t2 = datetime.now()
print("{0} duplicates found.".format(len(duplicates)))
print("{0} seconds elapsed.".format(t2 - t1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment