edelooff/merge_benchmark.py

## merge_benchmark.py
"""Merging two sorted lists in Python in four ways.

1. The "stupid naive" way of adding them together and sorting the whole thing.
   This should be pretty fast as timsort is designed to be fast on partially
   sorted lists. However, it uses more memory and might not scale very well?
2. The "picking an algorithm designed for this" way of using heapq.merge. This
   provides a generator that walks the two lists without additional use of
   memory.
3. The "C-way" of indexing our way through two lists. This being Python though,
   we will be yielding the values though (wrapping the resulting generator in
   list() is actually faster than returning a list.) We'd expect this to be
   pretty fast, though it's implementation in Python might not make it as fast
   as the sorting. It should beat the heapq.merge though.
4. The Python way of successively iterating over two potentially infinite
   iterables. Hopefully Raymond will be proud of us for taking iterators real
   serious. We'd expect this to do pretty well, but maybe not quite as fast as
   doing it the C-way.

You might be surprised how these compare.
"""

from heapq import merge
from random import sample
import timeit


def merge_naive(left, right):
    """Naive generator for merging two sorted lists."""
    i, max_i = 0, len(left)
    j, max_j = 0, len(right)
    while i < max_i and j < max_j:
        if left[i] < right[j]:
            yield left[i]
            i += 1
        else:
            yield right[j]
            j += 1
    yield from left[i:]
    yield from right[j:]


def merge_walk(left, right):
    """Yield values from two sorted lists in overall sorted order.

     we strive to do the absolute minimum work inside
    it, executing checks for exhaustion only when moving the index forward,
    rather than checking for exhaustion on each loop cycle.

    For good loop performance. this function makes aggressive use of Python's
    cheap try/except statement for new array value retrievals. Catching an
    exception is expensive, but will only happen once per merge.

    The overall time complexity for this function is O(n+m) where n and m are
    the sizes of the input lists. These dominate over the startup checks.
    """
    if not left or not right:
        yield from left
        yield from right
        return
    i, left_val = 0, left[0]
    j, right_val = 0, right[0]
    while True:
        if left_val < right_val:
            yield left_val
            try:
                i += 1
                left_val = left[i]
            except IndexError:
                yield from right[j:]
                return
        else:
            yield right_val
            try:
                j += 1
                right_val = right[j]
            except IndexError:
                yield from left[i:]
                return


def merge_iter(left, right):
    """Yield values from two sorted iterables in overall sorted order.

    This operates on a pair of iterables without consuming additional memory
    itself, making it safe to use on large or infinite sequences. The hot loop
    relies on Python's iter() bookkeeping, which it stands to reason is faster
    than manual twiddling of indices.

    By wrapping almost the entire function in a single try/except statement we
    achieve strong separation of hot and cold code, and maintain reasonable
    levels of readability.

    Unfortunately, for however hard this code tries, it's still slower than
    a simple sorted(la+lb). But this one won't break on infinite lists ;-)
    """
    left = iter(left)
    right = iter(right)
    try:
        left_val = next(left)
        right_val = next(right)
        while True:
            if left_val < right_val:
                yield left_val
                left_val = next(left)
            else:
                yield right_val
                right_val = next(right)
    except StopIteration:
        if 'left_val' in locals():
            if 'right_val' in locals():
                yield max(left_val, right_val)
            else:
                yield left_val
        yield from left
        yield from right


COMBINERS = [
    ('combine-and-sort', lambda la, lb: lambda: sorted(la + lb)),
    ('heap-merge', lambda la, lb: lambda: list(merge(la, lb))),
    ('merge-naive', lambda la, lb: lambda: list(merge_naive(la, lb))),
    ('merge-walk', lambda la, lb: lambda: list(merge_walk(la, lb))),
    ('merge-iter', lambda la, lb: lambda: list(merge_iter(la, lb))),
]

TESTS = [
    ([1, 2], [3, 4], [1, 2, 3, 4]),
    ([1, 3], [2, 4], [1, 2, 3, 4]),
    ([], [1, 3], [1, 3]),
    ([1, 3], [], [1, 3])]

BENCHMARKS = {
    1: 1000000,
    10: 200000,
    100: 20000,
    1000: 2000,
    10000: 400,
    100000: 80,
    1000000: 16,
    10000000: 3}


def main():
    for run_a, run_b, expected in TESTS:
        for _name, test_func_creator in COMBINERS:
            test_func = test_func_creator(run_a, run_b)
            assert test_func() == expected

    for length, tests in sorted(BENCHMARKS.items()):
        run_a = sorted(sample(range(length * 2), round(length * 0.8)))
        run_b = sorted(sample(range(length * 2), round(length * 1.2)))
        print(f'\nMerging two sorted lists of approximate length {length}')
        for name, test_func_creator in COMBINERS:
            test_func = test_func_creator(run_a, run_b)
            assert test_func() == sorted(run_a + run_b)
            best = min(timeit.repeat(test_func, number=tests, repeat=3))
            micros = best * 1e6 / tests
            if micros > 1000:
                print(f'  * [{name}]: {micros / 1000:.1f}ms')
            else:
                print(f'  * [{name}]: {micros:.1f}μs')


if __name__ == '__main__':
    main()
	"""Merging two sorted lists in Python in four ways.

	1. The "stupid naive" way of adding them together and sorting the whole thing.
	This should be pretty fast as timsort is designed to be fast on partially
	sorted lists. However, it uses more memory and might not scale very well?
	2. The "picking an algorithm designed for this" way of using heapq.merge. This
	provides a generator that walks the two lists without additional use of
	memory.
	3. The "C-way" of indexing our way through two lists. This being Python though,
	we will be yielding the values though (wrapping the resulting generator in
	list() is actually faster than returning a list.) We'd expect this to be
	pretty fast, though it's implementation in Python might not make it as fast
	as the sorting. It should beat the heapq.merge though.
	4. The Python way of successively iterating over two potentially infinite
	iterables. Hopefully Raymond will be proud of us for taking iterators real
	serious. We'd expect this to do pretty well, but maybe not quite as fast as
	doing it the C-way.

	You might be surprised how these compare.
	"""

	from heapq import merge
	from random import sample
	import timeit


	def merge_naive(left, right):
	"""Naive generator for merging two sorted lists."""
	i, max_i = 0, len(left)
	j, max_j = 0, len(right)
	while i < max_i and j < max_j:
	if left[i] < right[j]:
	yield left[i]
	i += 1
	else:
	yield right[j]
	j += 1
	yield from left[i:]
	yield from right[j:]


	def merge_walk(left, right):
	"""Yield values from two sorted lists in overall sorted order.

	we strive to do the absolute minimum work inside
	it, executing checks for exhaustion only when moving the index forward,
	rather than checking for exhaustion on each loop cycle.

	For good loop performance. this function makes aggressive use of Python's
	cheap try/except statement for new array value retrievals. Catching an
	exception is expensive, but will only happen once per merge.

	The overall time complexity for this function is O(n+m) where n and m are
	the sizes of the input lists. These dominate over the startup checks.
	"""
	if not left or not right:
	yield from left
	yield from right
	return
	i, left_val = 0, left[0]
	j, right_val = 0, right[0]
	while True:
	if left_val < right_val:
	yield left_val
	try:
	i += 1
	left_val = left[i]
	except IndexError:
	yield from right[j:]
	return
	else:
	yield right_val
	try:
	j += 1
	right_val = right[j]
	except IndexError:
	yield from left[i:]
	return


	def merge_iter(left, right):
	"""Yield values from two sorted iterables in overall sorted order.

	This operates on a pair of iterables without consuming additional memory
	itself, making it safe to use on large or infinite sequences. The hot loop
	relies on Python's iter() bookkeeping, which it stands to reason is faster
	than manual twiddling of indices.

	By wrapping almost the entire function in a single try/except statement we
	achieve strong separation of hot and cold code, and maintain reasonable
	levels of readability.

	Unfortunately, for however hard this code tries, it's still slower than
	a simple sorted(la+lb). But this one won't break on infinite lists ;-)
	"""
	left = iter(left)
	right = iter(right)
	try:
	left_val = next(left)
	right_val = next(right)
	while True:
	if left_val < right_val:
	yield left_val
	left_val = next(left)
	else:
	yield right_val
	right_val = next(right)
	except StopIteration:
	if 'left_val' in locals():
	if 'right_val' in locals():
	yield max(left_val, right_val)
	else:
	yield left_val
	yield from left
	yield from right


	COMBINERS = [
	('combine-and-sort', lambda la, lb: lambda: sorted(la + lb)),
	('heap-merge', lambda la, lb: lambda: list(merge(la, lb))),
	('merge-naive', lambda la, lb: lambda: list(merge_naive(la, lb))),
	('merge-walk', lambda la, lb: lambda: list(merge_walk(la, lb))),
	('merge-iter', lambda la, lb: lambda: list(merge_iter(la, lb))),
	]

	TESTS = [
	([1, 2], [3, 4], [1, 2, 3, 4]),
	([1, 3], [2, 4], [1, 2, 3, 4]),
	([], [1, 3], [1, 3]),
	([1, 3], [], [1, 3])]

	BENCHMARKS = {
	1: 1000000,
	10: 200000,
	100: 20000,
	1000: 2000,
	10000: 400,
	100000: 80,
	1000000: 16,
	10000000: 3}


	def main():
	for run_a, run_b, expected in TESTS:
	for _name, test_func_creator in COMBINERS:
	test_func = test_func_creator(run_a, run_b)
	assert test_func() == expected

	for length, tests in sorted(BENCHMARKS.items()):
	run_a = sorted(sample(range(length * 2), round(length * 0.8)))
	run_b = sorted(sample(range(length * 2), round(length * 1.2)))
	print(f'\nMerging two sorted lists of approximate length {length}')
	for name, test_func_creator in COMBINERS:
	test_func = test_func_creator(run_a, run_b)
	assert test_func() == sorted(run_a + run_b)
	best = min(timeit.repeat(test_func, number=tests, repeat=3))
	micros = best * 1e6 / tests
	if micros > 1000:
	print(f' * [{name}]: {micros / 1000:.1f}ms')
	else:
	print(f' * [{name}]: {micros:.1f}μs')


	if __name__ == '__main__':
	main()