Skip to content

Instantly share code, notes, and snippets.

@jokester
Last active December 17, 2015 20:29
Show Gist options
  • Save jokester/5668025 to your computer and use it in GitHub Desktop.
Save jokester/5668025 to your computer and use it in GitHub Desktop.
test correctness and speed of linear search and bisect search in list
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import random
from array import array
from bisect import bisect_left, bisect_right
from timeit import repeat
class Distribution:
"""Generate random integers according to a specific function.
-- self.values: array.array('f')
-- self.start: int
Lower bound of definition interval.
-- self.nbVal: int
Number of values to be stored = len(self.values)
"""
def __init__(self, function, start, end):
"""Initializer.
-- function: function
1-argument function. <function>(x) will be proportional to the
probability that the integer x be chosen.
-- start: int
= self.start
-- end: int
Upper bound of definition interval.
"""
values = [function(x) for x in xrange(start, end + 1)]
fact = 1. / sum(values)
s = 0.
for i, v in enumerate(values):
s += fact * v
values[i] = s
self.values = array('f', values)
self.start = start
self.nbVal = end + 1 - start
def next_bisect(self, r):
"""Return new random integer, according to distribution."""
values = self.values
i = bisect_right( values, r )
# still "first i such that r < values[i]"
return self.start + i
def next_linear(self, r):
"""Return new random integer, according to distribution."""
values = self.values
i = -1
for i in xrange(self.nbVal):
if r < values[i]:
return self.start + i
return self.start + i # We should never reach this line
def assert_correctness():
prob_list = [ random.random() for i in range(0,500001) ]
sum_prob_list = sum(prob_list)
dist = Distribution( prob_list.__getitem__, 0, 500000 )
# random number : when r is new random number
for i in range(50000):
r = random.random()
n1 = dist.next_linear(r)
n2 = dist.next_linear(r)
assert( n1==n2 )
# marginal condition : when r equals to some item in cumulative list
for j in range(0,2000):
r = sum( prob_list[:j] ) / sum_prob_list
n1 = dist.next_linear(r)
n2 = dist.next_linear(r)
assert( n1==n2 )
print( "correctness verified" )
def benchmark():
prob_list = [ random.random() for i in range(0,500001) ]
dist = Distribution( prob_list.__getitem__, 0, 500000 )
time_linear = repeat(
lambda: dist.next_linear(random.random()),
number = 10000,
repeat = 3
)
time_bisect = repeat(
lambda: dist.next_bisect(random.random()),
number = 10000,
repeat = 3
)
print( "average run time of linear search: %e " %( sum(time_linear )/10000/3 ) )
print( "average run time of bisect search: %e " %( sum(time_bisect )/10000/3 ) )
print( "ratio of average run time: %e " %( 1.0 * sum(time_linear) / sum(time_bisect) ) )
def assert_correctness__with_0_prob():
prob_list = [
0, # <= 0
1, # <= 1
0, # <= 2
0, # <= 3
0, # <= 4
1, # <= 5
0, # <= 6
]
dist = Distribution( prob_list.__getitem__, 0, 6 )
for r,expected in {
( 0.0 ) : 1 ,
( 0.1 ) : 1 ,
( 0.4 ) : 1 ,
( 0.5-1e-16 ) : 1 ,
( 0.5 ) : 5 ,
( 0.5+1e-16 ) : 5 ,
( 0.6 ) : 5 ,
( 0.7 ) : 5 ,
( 0.99999 ) : 5 ,
( 1-1e-16 ) : 5 ,
# ( 1.0 ) : 5 , ### NOTE not required. random.random() ranges in [0,1)
}.iteritems():
actual = dist.next_bisect(r)
try:
assert( actual == expected )
except:
print("next_bisect(%1.20f) returned %i , not %i" % (r, actual, expected) )
assert_correctness()
benchmark()
assert_correctness__with_0_prob()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment