py-in-the-sky/bisect_and_binary_search.py

## bisect_and_binary_search.py
"""
My variation of the Python bisect functions, inspired by my reading
of chapter 4 of Beautiful Code.

Studying the source code of Python's bisect module has been
very enlightening for me, both about binary search and about
setting up and reasoning about loop invariants in order to
ensure the correctness of a program.

Chapter 4 of Beautiful Code introduced me to slightly different
starting conditions and loop invariants for binary search, which
I found to be more intuitive than those inherent in the Python
bisect module.

One such change that made the code more natural and easy to follow
was how the 'lo' variable is used and updated. In bisect, the predicate
for the while loop is 'while lo < hi' and lo is updated as
'lo = mid + 1'. The 'mid + 1' calculation is necessary for correctness
and for avoiding infinite loops. In the binary-search pattern below, taken
from Beautiful Code, the predicate of the while loop is 'while hi - lo > 1'
and lo and hi are updated similarly: 'lo = mid' and 'hi = mid'. The docstrings
for the bisect_right and bisect_left functions below go into further detail
about loop invariants and reasoning about the correctness of the algorithms.

Another change I've made that I believe makes this code useful to study
is that binary search is implemented only once, in _bisect, rather than
in more than one place and in slightly different ways, as in bisect. It
makes visible the fact that the choice of bisect_right or bisect_left
as the mode of binary search creates only one small difference in the code:
the difference is simply the choice of '>' versus '>=' in the predicate for
updating hi.

Source for Python bisect: https://github.com/python/cpython/blob/2.7/Lib/bisect.py
Beautiful Code: http://shop.oreilly.com/product/9780596510046.do
"""


BISECT_LEFT, BISECT_RIGHT = True, False


def bisect_left(A, target):
    """
    Returns i such that A[i] is the leftmost value in A that
    is greater than or equal to target, if such a value exists
    in A. Otherwise, returns len(A).

    [1, 2, 2, 3]
        ^

    Loop invariants:
        * target <= A[hi] or hi == len(A)
        * A[lo] < target  or lo == -1

    It follows from the predicate of the while loop that in
    the end, hi == lo + 1. It follows from this and the loop
    invariants that in the end, hi == len(A) or A[hi] is the
    leftmost value in A that is greater than or equal to target
    (and A[lo] is the rightmost value in A that is less than target,
    or lo == -1).
    """
    return _bisect(A, target, BISECT_LEFT)


def bisect_right(A, target):
    """
    Returns i such that A[i] is the leftmost value in A that
    is greater than target, if such a value exists in A. Otherwise,
    returns len(A).

    [1, 2, 2, 3]
              ^

    Loop invariants:
        * target < A[hi]  or hi == len(A)
        * A[lo] <= target or lo == -1

    It follows from the predicate of the while loop that in
    the end, hi == lo + 1. It follows from this and the loop
    invariants that in the end, hi == len(A) or A[hi] is the
    leftmost value in A that is greater than target (and A[lo]
    is the rightmost value in A that is less than or equal to
    target, or lo == -1).
    """
    return _bisect(A, target, BISECT_RIGHT)


def _bisect(A, target, mode):
    """
    Does this algorithm always end? In other words, does it ever
    get caught in an infinite loop? Answer: it never gets caught
    in an infinite loop and therefore it always ends. The reason:
    As long as hi - lo > 1 (the predicate on the while loop), then
    it is the case that lo < lo + (hi - lo) // 2 < hi. This means
    that for every loop, lo < mid < hi, and therefore the value of
    hi - lo will decrease with every iteration of the while loop,
    which in turn means that we'll get to hi - lo <= 1 in a finite
    number of iterations.
    """
    assert mode in (BISECT_LEFT, BISECT_RIGHT)

    b_right = mode is BISECT_RIGHT
    lo, hi = -1, len(A)

    while hi - lo > 1:
        mid = lo + (hi - lo) // 2  # Integer overflow doesn't happen in Python, but I felt this was instructive anyway.

        if (A[mid] > target if b_right else A[mid] >= target):
            hi = mid
        else:
            lo = mid

    return hi


def binary_search(A, target):
    "Returns index of target in A, if target is in A. Otherwise, returns None."
    i = bisect_left(A, target)

    if i < len(A) and A[i] == target:
        return i
    else:
        return None


def tests():
    import bisect
    import random

    hardcoded_cases = [
        [([], 5), None],
        [([1], 5), None],
        [([6], 5), None],
        [([1, 5, 9], 5), 1],
    ]

    for inputs,output in hardcoded_cases:
        assert binary_search(*inputs) == output
        assert bisect.bisect_left(*inputs) == bisect_left(*inputs)
        assert bisect.bisect_right(*inputs) == bisect_right(*inputs)

    N = 1000
    random_sorted_array = lambda: sorted(random.randint(0, N) for _ in xrange(random.randint(0, N)))
    random_cases = [(random_sorted_array(), random.randint(0, N)) for _ in xrange(N)]

    for inputs in random_cases:
        assert bisect.bisect_left(*inputs) == bisect_left(*inputs)
        assert bisect.bisect_right(*inputs) == bisect_right(*inputs)

    print 'Tests pass!'
	"""
	My variation of the Python bisect functions, inspired by my reading
	of chapter 4 of Beautiful Code.

	Studying the source code of Python's bisect module has been
	very enlightening for me, both about binary search and about
	setting up and reasoning about loop invariants in order to
	ensure the correctness of a program.

	Chapter 4 of Beautiful Code introduced me to slightly different
	starting conditions and loop invariants for binary search, which
	I found to be more intuitive than those inherent in the Python
	bisect module.

	One such change that made the code more natural and easy to follow
	was how the 'lo' variable is used and updated. In bisect, the predicate
	for the while loop is 'while lo < hi' and lo is updated as
	'lo = mid + 1'. The 'mid + 1' calculation is necessary for correctness
	and for avoiding infinite loops. In the binary-search pattern below, taken
	from Beautiful Code, the predicate of the while loop is 'while hi - lo > 1'
	and lo and hi are updated similarly: 'lo = mid' and 'hi = mid'. The docstrings
	for the bisect_right and bisect_left functions below go into further detail
	about loop invariants and reasoning about the correctness of the algorithms.

	Another change I've made that I believe makes this code useful to study
	is that binary search is implemented only once, in _bisect, rather than
	in more than one place and in slightly different ways, as in bisect. It
	makes visible the fact that the choice of bisect_right or bisect_left
	as the mode of binary search creates only one small difference in the code:
	the difference is simply the choice of '>' versus '>=' in the predicate for
	updating hi.

	Source for Python bisect: https://github.com/python/cpython/blob/2.7/Lib/bisect.py
	Beautiful Code: http://shop.oreilly.com/product/9780596510046.do
	"""


	BISECT_LEFT, BISECT_RIGHT = True, False


	def bisect_left(A, target):
	"""
	Returns i such that A[i] is the leftmost value in A that
	is greater than or equal to target, if such a value exists
	in A. Otherwise, returns len(A).

	[1, 2, 2, 3]
	^

	Loop invariants:
	* target <= A[hi] or hi == len(A)
	* A[lo] < target or lo == -1

	It follows from the predicate of the while loop that in
	the end, hi == lo + 1. It follows from this and the loop
	invariants that in the end, hi == len(A) or A[hi] is the
	leftmost value in A that is greater than or equal to target
	(and A[lo] is the rightmost value in A that is less than target,
	or lo == -1).
	"""
	return _bisect(A, target, BISECT_LEFT)


	def bisect_right(A, target):
	"""
	Returns i such that A[i] is the leftmost value in A that
	is greater than target, if such a value exists in A. Otherwise,
	returns len(A).

	[1, 2, 2, 3]
	^

	Loop invariants:
	* target < A[hi] or hi == len(A)
	* A[lo] <= target or lo == -1

	It follows from the predicate of the while loop that in
	the end, hi == lo + 1. It follows from this and the loop
	invariants that in the end, hi == len(A) or A[hi] is the
	leftmost value in A that is greater than target (and A[lo]
	is the rightmost value in A that is less than or equal to
	target, or lo == -1).
	"""
	return _bisect(A, target, BISECT_RIGHT)


	def _bisect(A, target, mode):
	"""
	Does this algorithm always end? In other words, does it ever
	get caught in an infinite loop? Answer: it never gets caught
	in an infinite loop and therefore it always ends. The reason:
	As long as hi - lo > 1 (the predicate on the while loop), then
	it is the case that lo < lo + (hi - lo) // 2 < hi. This means
	that for every loop, lo < mid < hi, and therefore the value of
	hi - lo will decrease with every iteration of the while loop,
	which in turn means that we'll get to hi - lo <= 1 in a finite
	number of iterations.
	"""
	assert mode in (BISECT_LEFT, BISECT_RIGHT)

	b_right = mode is BISECT_RIGHT
	lo, hi = -1, len(A)

	while hi - lo > 1:
	mid = lo + (hi - lo) // 2 # Integer overflow doesn't happen in Python, but I felt this was instructive anyway.

	if (A[mid] > target if b_right else A[mid] >= target):
	hi = mid
	else:
	lo = mid

	return hi


	def binary_search(A, target):
	"Returns index of target in A, if target is in A. Otherwise, returns None."
	i = bisect_left(A, target)

	if i < len(A) and A[i] == target:
	return i
	else:
	return None


	def tests():
	import bisect
	import random

	hardcoded_cases = [
	[([], 5), None],
	[([1], 5), None],
	[([6], 5), None],
	[([1, 5, 9], 5), 1],
	]

	for inputs,output in hardcoded_cases:
	assert binary_search(*inputs) == output
	assert bisect.bisect_left(inputs) == bisect_left(inputs)
	assert bisect.bisect_right(inputs) == bisect_right(inputs)

	N = 1000
	random_sorted_array = lambda: sorted(random.randint(0, N) for _ in xrange(random.randint(0, N)))
	random_cases = [(random_sorted_array(), random.randint(0, N)) for _ in xrange(N)]

	for inputs in random_cases:
	assert bisect.bisect_left(inputs) == bisect_left(inputs)
	assert bisect.bisect_right(inputs) == bisect_right(inputs)

	print 'Tests pass!'