jokester/test_bisect.py

## test_bisect.py
#!/usr/bin/env python2
# -*- coding: utf-8 -*-

import random
from array import array
from bisect import bisect_left, bisect_right
from timeit import repeat

class Distribution:
    """Generate random integers according to a specific function.

    -- self.values: array.array('f')
    -- self.start: int
        Lower bound of definition interval.
    -- self.nbVal: int
        Number of values to be stored = len(self.values)
    """

    def __init__(self, function, start, end):
        """Initializer.

        -- function: function
            1-argument function. <function>(x) will be proportional to the
            probability that the integer x be chosen.
        -- start: int
            = self.start
        -- end: int
            Upper bound of definition interval.
        """
        values = [function(x) for x in xrange(start, end + 1)]
        fact = 1. / sum(values)
        s = 0.
        for i, v in enumerate(values):
            s += fact * v
            values[i] = s
        self.values = array('f', values)
        self.start = start
        self.nbVal = end + 1 - start

    def next_bisect(self, r):
        """Return new random integer, according to distribution."""
        values = self.values
        i = bisect_right( values, r  )
        # still "first i such that r < values[i]"
        return self.start + i

    def next_linear(self, r):
        """Return new random integer, according to distribution."""
        values = self.values
        i = -1
        for i in xrange(self.nbVal):
            if r < values[i]:
                return self.start + i
        return self.start + i    # We should never reach this line

def assert_correctness():
    prob_list = [ random.random() for i in range(0,500001) ]
    sum_prob_list = sum(prob_list)
    dist = Distribution( prob_list.__getitem__, 0, 500000 )

    # random number : when r is new random number
    for i in range(50000):
        r = random.random()
        n1 = dist.next_linear(r)
        n2 = dist.next_linear(r)
        assert( n1==n2 )

    # marginal condition : when r equals to some item in cumulative list
    for j in range(0,2000):
        r = sum( prob_list[:j] ) / sum_prob_list
        n1 = dist.next_linear(r)
        n2 = dist.next_linear(r)
        assert( n1==n2 )
    print( "correctness verified" )

def benchmark():
    prob_list = [ random.random() for i in range(0,500001) ]
    dist = Distribution( prob_list.__getitem__, 0, 500000 )

    time_linear = repeat(
        lambda: dist.next_linear(random.random()),
        number = 10000,
        repeat = 3
        )

    time_bisect = repeat(
        lambda: dist.next_bisect(random.random()),
        number = 10000,
        repeat = 3
        )

    print( "average run time of linear search: %e " %( sum(time_linear )/10000/3 ) )
    print( "average run time of bisect search: %e " %( sum(time_bisect )/10000/3 ) )
    print( "ratio of average run time: %e " %( 1.0 * sum(time_linear) / sum(time_bisect) ) )

def assert_correctness__with_0_prob():
    prob_list = [
            0, # <= 0
            1, # <= 1
            0, # <= 2
            0, # <= 3
            0, # <= 4
            1, # <= 5
            0, # <= 6
            ]
    dist = Distribution( prob_list.__getitem__, 0, 6 )
    for r,expected in {
            ( 0.0 ) : 1 ,
            ( 0.1 ) : 1 ,
            ( 0.4 ) : 1 ,
            ( 0.5-1e-16 ) : 1 ,
            ( 0.5 ) : 5 ,
            ( 0.5+1e-16 ) : 5 ,
            ( 0.6 ) : 5 ,
            ( 0.7 ) : 5 ,
            ( 0.99999 ) : 5 ,
            ( 1-1e-16 ) : 5 ,
            # ( 1.0 ) : 5 , ### NOTE not required. random.random() ranges in [0,1)
            }.iteritems():
        actual = dist.next_bisect(r)
        try:
            assert( actual == expected )
        except:
            print("next_bisect(%1.20f) returned %i , not %i" % (r, actual, expected) )

assert_correctness()
benchmark()
assert_correctness__with_0_prob()
	#!/usr/bin/env python2
	# -- coding: utf-8 --

	import random
	from array import array
	from bisect import bisect_left, bisect_right
	from timeit import repeat

	class Distribution:
	"""Generate random integers according to a specific function.

	-- self.values: array.array('f')
	-- self.start: int
	Lower bound of definition interval.
	-- self.nbVal: int
	Number of values to be stored = len(self.values)
	"""

	def __init__(self, function, start, end):
	"""Initializer.

	-- function: function
	1-argument function. <function>(x) will be proportional to the
	probability that the integer x be chosen.
	-- start: int
	= self.start
	-- end: int
	Upper bound of definition interval.
	"""
	values = [function(x) for x in xrange(start, end + 1)]
	fact = 1. / sum(values)
	s = 0.
	for i, v in enumerate(values):
	s += fact * v
	values[i] = s
	self.values = array('f', values)
	self.start = start
	self.nbVal = end + 1 - start

	def next_bisect(self, r):
	"""Return new random integer, according to distribution."""
	values = self.values
	i = bisect_right( values, r )
	# still "first i such that r < values[i]"
	return self.start + i

	def next_linear(self, r):
	"""Return new random integer, according to distribution."""
	values = self.values
	i = -1
	for i in xrange(self.nbVal):
	if r < values[i]:
	return self.start + i
	return self.start + i # We should never reach this line

	def assert_correctness():
	prob_list = [ random.random() for i in range(0,500001) ]
	sum_prob_list = sum(prob_list)
	dist = Distribution( prob_list.__getitem__, 0, 500000 )

	# random number : when r is new random number
	for i in range(50000):
	r = random.random()
	n1 = dist.next_linear(r)
	n2 = dist.next_linear(r)
	assert( n1==n2 )

	# marginal condition : when r equals to some item in cumulative list
	for j in range(0,2000):
	r = sum( prob_list[:j] ) / sum_prob_list
	n1 = dist.next_linear(r)
	n2 = dist.next_linear(r)
	assert( n1==n2 )
	print( "correctness verified" )

	def benchmark():
	prob_list = [ random.random() for i in range(0,500001) ]
	dist = Distribution( prob_list.__getitem__, 0, 500000 )

	time_linear = repeat(
	lambda: dist.next_linear(random.random()),
	number = 10000,
	repeat = 3
	)

	time_bisect = repeat(
	lambda: dist.next_bisect(random.random()),
	number = 10000,
	repeat = 3
	)

	print( "average run time of linear search: %e " %( sum(time_linear )/10000/3 ) )
	print( "average run time of bisect search: %e " %( sum(time_bisect )/10000/3 ) )
	print( "ratio of average run time: %e " %( 1.0 * sum(time_linear) / sum(time_bisect) ) )

	def assert_correctness__with_0_prob():
	prob_list = [
	0, # <= 0
	1, # <= 1
	0, # <= 2
	0, # <= 3
	0, # <= 4
	1, # <= 5
	0, # <= 6
	]
	dist = Distribution( prob_list.__getitem__, 0, 6 )
	for r,expected in {
	( 0.0 ) : 1 ,
	( 0.1 ) : 1 ,
	( 0.4 ) : 1 ,
	( 0.5-1e-16 ) : 1 ,
	( 0.5 ) : 5 ,
	( 0.5+1e-16 ) : 5 ,
	( 0.6 ) : 5 ,
	( 0.7 ) : 5 ,
	( 0.99999 ) : 5 ,
	( 1-1e-16 ) : 5 ,
	# ( 1.0 ) : 5 , ### NOTE not required. random.random() ranges in [0,1)
	}.iteritems():
	actual = dist.next_bisect(r)
	try:
	assert( actual == expected )
	except:
	print("next_bisect(%1.20f) returned %i , not %i" % (r, actual, expected) )

	assert_correctness()
	benchmark()
	assert_correctness__with_0_prob()