Skip to content

Instantly share code, notes, and snippets.

@tysonmalchow
Created November 23, 2010 04:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tysonmalchow/711272 to your computer and use it in GitHub Desktop.
Save tysonmalchow/711272 to your computer and use it in GitHub Desktop.
find the delta between 2 lists in python
import time
import string
import random
def print_timing(func):
def wrapper(*arg):
t1 = time.time()
res = func(*arg)
t2 = time.time()
print '%s took %0.3f ms' % (func.func_name, (t2-t1)*1000.0)
return res
return wrapper
@print_timing
def findDelta1(today,yesterday):
results = {}
ypos = 0
for i,title in enumerate(today):
if title in results:
results[title] = results[title] - i
else:
for ypos in xrange(ypos,len(yesterday)):
if yesterday[ypos] == title:
results[title] = ypos - i
ypos = ypos + 1
break
else:
results[yesterday[ypos]] = ypos
return results
@print_timing
def findDelta2(today,yesterday):
D = dict((k,v) for v,k in enumerate(yesterday))
return dict((k,D[k]-v) for v,k in enumerate(today))
class LookupOnce:
def __init__(self, seq):
self.cache = {}
self.seq = iter(seq)
def get(self, key):
if key in self.cache:
value = self.cache[key]
del self.cache[key]
return value
for v,k in self.seq:
if k == key:
return v
self.cache[k] = v
raise KeyError
@print_timing
def findDelta3(a, b):
rank_a = LookupOnce(enumerate(a))
rank_b = enumerate(b)
result = {}
for i, k in rank_b:
result[k] = i - rank_a.get(k)
return result
def createRandomString(l):
return "".join([random.choice(string.letters) for x in xrange(l)])
def createRandomList(l,sl):
return [createRandomString(sl) for x in xrange(l)];
def agitateList(l):
r = l
random.shuffle(r,random.random)
return r
random.seed(99)
today = createRandomList(100000,20)
#today = createRandomList(10,10)
print 'no change'
yesterday = today
findDelta1(today,yesterday)
findDelta2(today,yesterday)
findDelta3(today,yesterday)
print 'fully shuffled'
yesterday = today[:]
random.shuffle(yesterday)
findDelta1(today,yesterday)
findDelta2(today,yesterday)
findDelta3(today,yesterday)
# a random resultset on my system. run for yourself.
# no change
# findDelta1 took 339.984 ms
# findDelta2 took 803.962 ms
# findDelta3 took 563.973 ms
# fully shuffled
# findDelta1 took 479.977 ms
# findDelta2 took 1047.950 ms
# findDelta3 took 467.977 ms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment