Skip to content

Instantly share code, notes, and snippets.

@mdengler
Created September 14, 2012 20:26
Show Gist options
  • Save mdengler/3724536 to your computer and use it in GitHub Desktop.
Save mdengler/3724536 to your computer and use it in GitHub Desktop.
python exercise to find the intersection of two sets of integers
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Given two lists of N integers (32-bit signed), determine the integers
in both lists.
"""
import os
import sys
import time
def freq_bydict(v1, v2):
freqs = {}
for el in v1:
freqs.setdefault(el, [0, 0])[0] += 1
for el in v2:
freqs.setdefault(el, [0, 0])[1] += 1
common = set()
for el, (v1f, v2f) in freqs.iteritems():
if v1f > 0 and v2f > 0:
common.add(el)
return common
def freq_byarray(v1, v2):
minval = min([min(v1), min(v2)])
maxval = max([max(v1), max(v2)])
count = (maxval - minval) + 1
in_both = [None for k in range(count)]
def mark(v):
for el in v:
idx = el - count
status = in_both[idx]
if status is None:
in_both[idx] = False
else:
in_both[idx] = True
mark(v1)
mark(v2)
common = set()
for offset in range(count):
if in_both[offset]:
common.add(minval + offset)
return common
def freq_byset(v1, v2):
return set(v1).intersection(set(v2))
def timeit(func, iterations=10000):
t0 = time.time()
for i in range(iterations):
func()
t1 = time.time()
return t1 - t0
def test_raw(n=1000, iterations=1000):
v1 = range(n)
v2 = range(n / 2, int(n * 1.5))
return (iterations,
n,
timeit(lambda: freq_bydict (v1, v2), iterations=iterations),
timeit(lambda: freq_byarray(v1, v2), iterations=iterations),
timeit(lambda: freq_byset (v1, v2), iterations=iterations),
)
def test(n=1000, iterations=1000, suites=1, fh=None):
if fh is None:
fh = sys.stdout
fh.write("n,iterations,freq_bydict,freq_byarray,freq_byset%s" % os.linesep)
for i in range(suites):
fh.write(",".join(map(str, test_raw(n=n, iterations=iterations))))
fh.write(os.linesep)
def test_progressive():
log = open(__name__ + "-log.txt", "a")
log.write("n,iterations,freq_bydict,freq_byarray,freq_byset%s" % os.linesep)
for powerof10 in range(2, 7):
for powerof2 in range(5, 18):
test(n=10**powerof10,
iterations=2**powerof2,
suites=3,
fh=log)
log.flush()
if __name__ == "__main__":
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment