Last active
December 17, 2015 20:29
-
-
Save jokester/5668025 to your computer and use it in GitHub Desktop.
test correctness and speed of linear search and bisect search in list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
import random | |
from array import array | |
from bisect import bisect_left, bisect_right | |
from timeit import repeat | |
class Distribution: | |
"""Generate random integers according to a specific function. | |
-- self.values: array.array('f') | |
-- self.start: int | |
Lower bound of definition interval. | |
-- self.nbVal: int | |
Number of values to be stored = len(self.values) | |
""" | |
def __init__(self, function, start, end): | |
"""Initializer. | |
-- function: function | |
1-argument function. <function>(x) will be proportional to the | |
probability that the integer x be chosen. | |
-- start: int | |
= self.start | |
-- end: int | |
Upper bound of definition interval. | |
""" | |
values = [function(x) for x in xrange(start, end + 1)] | |
fact = 1. / sum(values) | |
s = 0. | |
for i, v in enumerate(values): | |
s += fact * v | |
values[i] = s | |
self.values = array('f', values) | |
self.start = start | |
self.nbVal = end + 1 - start | |
def next_bisect(self, r): | |
"""Return new random integer, according to distribution.""" | |
values = self.values | |
i = bisect_right( values, r ) | |
# still "first i such that r < values[i]" | |
return self.start + i | |
def next_linear(self, r): | |
"""Return new random integer, according to distribution.""" | |
values = self.values | |
i = -1 | |
for i in xrange(self.nbVal): | |
if r < values[i]: | |
return self.start + i | |
return self.start + i # We should never reach this line | |
def assert_correctness(): | |
prob_list = [ random.random() for i in range(0,500001) ] | |
sum_prob_list = sum(prob_list) | |
dist = Distribution( prob_list.__getitem__, 0, 500000 ) | |
# random number : when r is new random number | |
for i in range(50000): | |
r = random.random() | |
n1 = dist.next_linear(r) | |
n2 = dist.next_linear(r) | |
assert( n1==n2 ) | |
# marginal condition : when r equals to some item in cumulative list | |
for j in range(0,2000): | |
r = sum( prob_list[:j] ) / sum_prob_list | |
n1 = dist.next_linear(r) | |
n2 = dist.next_linear(r) | |
assert( n1==n2 ) | |
print( "correctness verified" ) | |
def benchmark(): | |
prob_list = [ random.random() for i in range(0,500001) ] | |
dist = Distribution( prob_list.__getitem__, 0, 500000 ) | |
time_linear = repeat( | |
lambda: dist.next_linear(random.random()), | |
number = 10000, | |
repeat = 3 | |
) | |
time_bisect = repeat( | |
lambda: dist.next_bisect(random.random()), | |
number = 10000, | |
repeat = 3 | |
) | |
print( "average run time of linear search: %e " %( sum(time_linear )/10000/3 ) ) | |
print( "average run time of bisect search: %e " %( sum(time_bisect )/10000/3 ) ) | |
print( "ratio of average run time: %e " %( 1.0 * sum(time_linear) / sum(time_bisect) ) ) | |
def assert_correctness__with_0_prob(): | |
prob_list = [ | |
0, # <= 0 | |
1, # <= 1 | |
0, # <= 2 | |
0, # <= 3 | |
0, # <= 4 | |
1, # <= 5 | |
0, # <= 6 | |
] | |
dist = Distribution( prob_list.__getitem__, 0, 6 ) | |
for r,expected in { | |
( 0.0 ) : 1 , | |
( 0.1 ) : 1 , | |
( 0.4 ) : 1 , | |
( 0.5-1e-16 ) : 1 , | |
( 0.5 ) : 5 , | |
( 0.5+1e-16 ) : 5 , | |
( 0.6 ) : 5 , | |
( 0.7 ) : 5 , | |
( 0.99999 ) : 5 , | |
( 1-1e-16 ) : 5 , | |
# ( 1.0 ) : 5 , ### NOTE not required. random.random() ranges in [0,1) | |
}.iteritems(): | |
actual = dist.next_bisect(r) | |
try: | |
assert( actual == expected ) | |
except: | |
print("next_bisect(%1.20f) returned %i , not %i" % (r, actual, expected) ) | |
assert_correctness() | |
benchmark() | |
assert_correctness__with_0_prob() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment