ilknarf/01 patience.py

## 01 patience.py
"""
Rough implementations of patience sorts (p3 sort) as shown in
https://www.microsoft.com/en-us/research/publication/patience-is-a-virtue-revisiting-merge-and-sort-on-modern-processors/.
Note that performance benchmarks won't be valid, since the data structures used (e.g. deque) don't take advantage of memory locality
in the same way that an implementation of cache-local linked memory blocks would.
"""

from collections import deque
from binary_search import binary_search

def patience_sort(l, iter_t=list):
    """
    patience_sort creates sorted runs by adding incrementing items
    to a previous run, and creating new runs from smaller items
    """
    runs = []
    i = 0

    # while elements still remain, keep adding to runs
    while i < len(l):
        n = l[i]
        for run in runs:
            if run[-1] <= n:
                run.append(n)
                break
        else:
            # new run
            next_run = iter_t()
            next_run.append(n)
            runs.append(next_run)


        i += 1

    return runs


def patience_star_sort(l):
    """
    patience_star_sort performs a modified patience sort where
    decrementing items can be appended to the front of a previous run,
    as well as added to the end of the first run
    """
    runs = []
    i = 0

    # while elements still remain, keep adding to runs
    while i < len(l):
        n = l[i]

        for j in range(len(runs)):
            run = runs[j]
            if run[-1] <= n:
                run.append(n)
                if j == 0:
                    while i + 1 < len(l) and l[i + 1] >= n:
                        i += 1
                        n = l[i]
                        run.append(n)
                break
        else:
            index = binary_search(runs, n)
            if index != len(runs):
                # append to run
                runs[index].appendleft(n)
            else:
                # new run
                next_run = deque()
                next_run.append(n)

                runs.append(next_run)

        i += 1

    return runs

## 02 merge.py
"""
Rough Implementations of the merge methods highlighted in
https://www.microsoft.com/en-us/research/publication/patience-is-a-virtue-revisiting-merge-and-sort-on-modern-processors/.
In the paper, balanced and unbalanced ping-pong merges were shown to have significant performance increases
"""

import heapq
from collections import deque

def k_way_merge(runs):
    """
    k_way_merge performs a k-way merge by placing lists
    in a heap.
    """
    res = []

    # add lists to heap
    heapq.heapify(runs)

    # while runs remain
    while runs:
        next_list = heapq.heappop(runs)
        # append first item to result
        res.append(next_list.popleft())

        # if not empty, push to heap
        if next_list:
            heapq.heappush(runs, next_list)

    return res


def binary_merge(dest, src1, src2, dest_start, r1_start, r1_length, r2_start, r2_length):
    """
    binary_merge performs a binary merge of two sorted lists into destß
    """
    # copy src if dest is the same as src
    if src1 is not dest:
        i1, end1 = r1_start, r1_start + r1_length
    else:
        src1 = src1[r1_start:r1_start + r1_length]
        i1, end1 = 0, r1_length

    if src2 is not dest:
        i2, end2 = r2_start, r2_start + r2_length
    else:
        src2 = src2[r2_start:r2_start + r2_length]
        i2, end2 = 0, r2_length

    combined_end = dest_start + r1_length + r2_length

    dest_i = dest_start

    while i1 < end1 and i2 < end2:
        n1 = src1[i1]
        n2 = src2[i2]

        # determine next value
        if n1 < n2:
            n = n1
            i1 += 1
        else:
            n = n2
            i2 += 1

        dest[dest_i] = n
        dest_i += 1

    # merge remaining items, if exists
    if i1 < end1:
        dest[dest_i:combined_end] = src1[i1:end1]

    if i2 < end2:
        dest[dest_i:combined_end] = src2[i2:end2]


def ping_pong_balanced(runs):
    """
    ping_pong_balanced performs a balanced ping_pong merge, as
    laid out in the SIGMOD paper
    """
    runs.sort(key=len)

    # create two lists to 'ping pong'
    l1 = [item for run in runs for item in run]
    l2 = [0] * len(l1)

    current_run = 1
    run_lengths = [len(l) for l in runs]

    while len(run_lengths) > 1:
        new_run_lengths = []

        start_index = 0
        for i in range(len(run_lengths) // 2):
            # get run lengths
            r1_length = run_lengths[i * 2]
            r2_length = run_lengths[i * 2 + 1]

            # get slice of next list
            combined_length = r1_length + r2_length
            merge_start = start_index

            # get run start indices
            r1_start = start_index
            start_index += r1_length

            r2_start = start_index
            start_index += r2_length

            # perform binary merge
            binary_merge(l2, l1, l1, merge_start, r1_start, r1_length, r2_start, r2_length)

            # add length to new_run_lengths
            new_run_lengths.append(combined_length)

        # if item remaining, add to end of merges
        if len(run_lengths) % 2 != 0:
            # copy remaining run
            l2[start_index:] = l1[start_index:]
            # add length to run_lengths
            new_run_lengths.append(run_lengths[-1])

        # swap l1 and l2
        l1, l2 = l2, l1
        run_lengths = new_run_lengths

    return l1


def ping_pong_unbalanced(runs):
    """
    ping_pong_unbalanced performs an unbalanced ping-pong merge, as laid out
    in the SIGMOD paper
    """
    # sort by length
    runs.sort(key=len)

    # flatten runs, then prepare second array
    l1 = [item for run in runs for item in run]
    l2 = [0] * len(l1)

    # store tuple of (list_1?, run_length), where list_1? indicates if a list is l1
    elems = deque((True, len(run)) for run in runs)

    i = 0

    # check if merging first pair cheaper than current pair

    # increment start index across loops to determine run starts
    start_index = 0
    while len(elems) > 1:
        # if no next pair exists, reset iteration
        if i + 1 >= len(elems):
            i = 0
            start_index = 0

        # if first pair is cheaper to merge, reset iteration
        elif elems[0][1] + elems[1][1] < elems[i][1] + elems[i + 1][1]:
            i = 0
            start_index = 0

        # get run info from elems
        r1_list, r1_length = elems[i]
        r2_list, r2_length = elems[i + 1]

        # set dest start index for merge
        dest_start = start_index

        # get start index of runs, and increment for next runs
        r1_start = start_index
        start_index += r1_length

        r2_start = start_index
        start_index += r2_length

        # set sources and dest for merge
        if r1_list:
            dest = l2
            src1 = l1
        else:
            dest = l1
            src1 = l2

        src2 = l1 if r2_list else l2

        # merge runs
        binary_merge(dest, src1, src2, dest_start, r1_start, r1_length, r2_start, r2_length)

        # update elems
        elems[i] = not elems[i][0], r1_length + r2_length
        del elems[i + 1]

        # increment i
        i += 1

    # return merged list (e.g, the remaining element)
    if elems[0][0]:
        return l1

    return l2

## 03 binary_search.py
def binary_search(l, v):
    """
    binary search of lists, using the first element of the list for comparison
    """
    lo, hi = 0, len(l)
    while lo < hi:
        mid = lo + (hi - lo) // 2

        if l[mid][0] >= v:
            hi = mid
        else:
            lo = mid + 1

    return lo
	"""
	Rough implementations of patience sorts (p3 sort) as shown in
	https://www.microsoft.com/en-us/research/publication/patience-is-a-virtue-revisiting-merge-and-sort-on-modern-processors/.
	Note that performance benchmarks won't be valid, since the data structures used (e.g. deque) don't take advantage of memory locality
	in the same way that an implementation of cache-local linked memory blocks would.
	"""

	from collections import deque
	from binary_search import binary_search

	def patience_sort(l, iter_t=list):
	"""
	patience_sort creates sorted runs by adding incrementing items
	to a previous run, and creating new runs from smaller items
	"""
	runs = []
	i = 0

	# while elements still remain, keep adding to runs
	while i < len(l):
	n = l[i]
	for run in runs:
	if run[-1] <= n:
	run.append(n)
	break
	else:
	# new run
	next_run = iter_t()
	next_run.append(n)
	runs.append(next_run)


	i += 1

	return runs


	def patience_star_sort(l):
	"""
	patience_star_sort performs a modified patience sort where
	decrementing items can be appended to the front of a previous run,
	as well as added to the end of the first run
	"""
	runs = []
	i = 0

	# while elements still remain, keep adding to runs
	while i < len(l):
	n = l[i]

	for j in range(len(runs)):
	run = runs[j]
	if run[-1] <= n:
	run.append(n)
	if j == 0:
	while i + 1 < len(l) and l[i + 1] >= n:
	i += 1
	n = l[i]
	run.append(n)
	break
	else:
	index = binary_search(runs, n)
	if index != len(runs):
	# append to run
	runs[index].appendleft(n)
	else:
	# new run
	next_run = deque()
	next_run.append(n)

	runs.append(next_run)

	i += 1

	return runs
	"""
	Rough Implementations of the merge methods highlighted in
	https://www.microsoft.com/en-us/research/publication/patience-is-a-virtue-revisiting-merge-and-sort-on-modern-processors/.
	In the paper, balanced and unbalanced ping-pong merges were shown to have significant performance increases
	"""

	import heapq
	from collections import deque

	def k_way_merge(runs):
	"""
	k_way_merge performs a k-way merge by placing lists
	in a heap.
	"""
	res = []

	# add lists to heap
	heapq.heapify(runs)

	# while runs remain
	while runs:
	next_list = heapq.heappop(runs)
	# append first item to result
	res.append(next_list.popleft())

	# if not empty, push to heap
	if next_list:
	heapq.heappush(runs, next_list)

	return res


	def binary_merge(dest, src1, src2, dest_start, r1_start, r1_length, r2_start, r2_length):
	"""
	binary_merge performs a binary merge of two sorted lists into destß
	"""
	# copy src if dest is the same as src
	if src1 is not dest:
	i1, end1 = r1_start, r1_start + r1_length
	else:
	src1 = src1[r1_start:r1_start + r1_length]
	i1, end1 = 0, r1_length

	if src2 is not dest:
	i2, end2 = r2_start, r2_start + r2_length
	else:
	src2 = src2[r2_start:r2_start + r2_length]
	i2, end2 = 0, r2_length

	combined_end = dest_start + r1_length + r2_length

	dest_i = dest_start

	while i1 < end1 and i2 < end2:
	n1 = src1[i1]
	n2 = src2[i2]

	# determine next value
	if n1 < n2:
	n = n1
	i1 += 1
	else:
	n = n2
	i2 += 1

	dest[dest_i] = n
	dest_i += 1

	# merge remaining items, if exists
	if i1 < end1:
	dest[dest_i:combined_end] = src1[i1:end1]

	if i2 < end2:
	dest[dest_i:combined_end] = src2[i2:end2]


	def ping_pong_balanced(runs):
	"""
	ping_pong_balanced performs a balanced ping_pong merge, as
	laid out in the SIGMOD paper
	"""
	runs.sort(key=len)

	# create two lists to 'ping pong'
	l1 = [item for run in runs for item in run]
	l2 = [0] * len(l1)

	current_run = 1
	run_lengths = [len(l) for l in runs]

	while len(run_lengths) > 1:
	new_run_lengths = []

	start_index = 0
	for i in range(len(run_lengths) // 2):
	# get run lengths
	r1_length = run_lengths[i * 2]
	r2_length = run_lengths[i * 2 + 1]

	# get slice of next list
	combined_length = r1_length + r2_length
	merge_start = start_index

	# get run start indices
	r1_start = start_index
	start_index += r1_length

	r2_start = start_index
	start_index += r2_length

	# perform binary merge
	binary_merge(l2, l1, l1, merge_start, r1_start, r1_length, r2_start, r2_length)

	# add length to new_run_lengths
	new_run_lengths.append(combined_length)

	# if item remaining, add to end of merges
	if len(run_lengths) % 2 != 0:
	# copy remaining run
	l2[start_index:] = l1[start_index:]
	# add length to run_lengths
	new_run_lengths.append(run_lengths[-1])

	# swap l1 and l2
	l1, l2 = l2, l1
	run_lengths = new_run_lengths

	return l1


	def ping_pong_unbalanced(runs):
	"""
	ping_pong_unbalanced performs an unbalanced ping-pong merge, as laid out
	in the SIGMOD paper
	"""
	# sort by length
	runs.sort(key=len)

	# flatten runs, then prepare second array
	l1 = [item for run in runs for item in run]
	l2 = [0] * len(l1)

	# store tuple of (list_1?, run_length), where list_1? indicates if a list is l1
	elems = deque((True, len(run)) for run in runs)

	i = 0

	# check if merging first pair cheaper than current pair

	# increment start index across loops to determine run starts
	start_index = 0
	while len(elems) > 1:
	# if no next pair exists, reset iteration
	if i + 1 >= len(elems):
	i = 0
	start_index = 0

	# if first pair is cheaper to merge, reset iteration
	elif elems[0][1] + elems[1][1] < elems[i][1] + elems[i + 1][1]:
	i = 0
	start_index = 0

	# get run info from elems
	r1_list, r1_length = elems[i]
	r2_list, r2_length = elems[i + 1]

	# set dest start index for merge
	dest_start = start_index

	# get start index of runs, and increment for next runs
	r1_start = start_index
	start_index += r1_length

	r2_start = start_index
	start_index += r2_length

	# set sources and dest for merge
	if r1_list:
	dest = l2
	src1 = l1
	else:
	dest = l1
	src1 = l2

	src2 = l1 if r2_list else l2

	# merge runs
	binary_merge(dest, src1, src2, dest_start, r1_start, r1_length, r2_start, r2_length)

	# update elems
	elems[i] = not elems[i][0], r1_length + r2_length
	del elems[i + 1]

	# increment i
	i += 1

	# return merged list (e.g, the remaining element)
	if elems[0][0]:
	return l1

	return l2
	def binary_search(l, v):
	"""
	binary search of lists, using the first element of the list for comparison
	"""
	lo, hi = 0, len(l)
	while lo < hi:
	mid = lo + (hi - lo) // 2

	if l[mid][0] >= v:
	hi = mid
	else:
	lo = mid + 1

	return lo