Reimirno/sorting.py

## sorting.py
class Sorting:

  ### Comparison Sorting ###

  # Insertion sort
  # without swapping in every step
  # θ(n^2), but gets better if data is already somewhat sorted
  # more precisely, θ(nk), k is the max amount of data displaced from sorted position
  @staticmethod
  def insertion_sort(arr, comparator):
    for i in range(1, len(arr)):
      x = arr[i] # keep a copy of arr[i] first
      for j in reversed(range(0, i, 1)):
        if comparator(arr[j], x) <= 0: # stable
          break
        arr[j + 1] = arr[j] # don't swap i j yet; copy over j only
      arr[j + 1] = x # now put back x

  # Shell sort
  # an improvement on insertion sort, reducing number of inversion fast by sorting distant elements first
  # the last round is a vanilla insertion sort, but with most inversions gone
  # θ(n^3/2)
  @staticmethod
  def shell_sort(arr, comparator):
    n, interval = len(arr), len(arr) // 2
    while interval:
      for i in range(interval, n):
        x = arr[i]
        for j in reversed(range(0, i, interval)):
          if comparator(arr[j], x) <= 0:
            break
          arr[j + interval] = arr[j]
        arr[j + interval] = x # now put back x
        interval //= 2

  # Selection sort
  # each round, put the max of the remaining elements at the back
  # θ(n^2), regardless how sorted the data originally were
  # If we use priority queue (Heap sort) or a balanced tree structure (Tree sort), the we can have θ(nlogn)
  # For heap: the initial heapify operation can be O(n), but the n removal each needs O(logn)
  # For tree: initial n insertions each needs O(logn), and a traversal O(n), overall O(nlogn)
  @staticmethod
  def selection_sort(arr, comparator):
    for i in reversed(range(1, len(arr))):
      max_idx = 0
      for j in range(1, i):
        if comparator(arr[j],arr[max_idx]) > 0:
          max_idx = j
      arr[i - 1], arr[max_idx] = arr[max_idx], arr[i - 1]

  ## From this point onwards I will ignore the comparator syntax and focus on the sorting itself

  # Merge sort
  # Implemented most easily with recursion
  # θ(nlogn) time, O(n) space in those array copying
  @staticmethod
  def merge_sort(arr):
    if len(arr) < 2:
      return
    Sorting.split(arr, 0, len(arr) - 1)
  @staticmethod
  def __split(arr, start: int, end: int):
    if end > start:
      mid = start + (end - start) // 2
      Sorting.__split(arr, start, mid)
      Sorting.__split(arr, mid, end)
      Sorting.__merge(arr, start, mid, end)
  @staticmethod
  def __merge(arr, start: int, mid: int, end: int):
    arr1, arr2 = arr[0:mid], arr[mid:end]
    p1, p2, pm = 0, 0, start
    while p1 < len(arr1) and p2 < len(arr2):
      if arr1[p1] < arr2[p2]:
        arr[pm] = arr1[p1]
        p1 += 1
      else:
        arr[pm] = arr2[p2]
        p2 += 1
      pm += 1
    if p1 < len(arr1):
      remain_len = len(arr1) - p1
      arr[pm:pm + remain_len] = arr1[p1:]
    elif p2 < len(arr2):
      remain_len = len(arr2) - p2
      arr[pm:pm + remain_len] = arr2[p2:]

  # Quick sort
  # best θ(nlogn), worst O(n^2)
  # space O(logn) due to recursive calls
  # not stable
  @staticmethod
  def quick_sort(arr):
    Sorting.__quick(arr, 0, len(arr) - 1)
  @staticmethod
  def __quick(arr, start: int, end: int):
    if start < end - 1:
      pivot = Sorting.__partition(arr, start, end)
      Sorting.__quick(arr, start, pivot)
      Sorting.__quick(arr, pivot, end)
    pass
  # I think this partition strategy is better than Hoare's partition
  # It is much easier to understand
  @staticmethod
  def __partition(arr, start: int, end: int):
    # get pivot first
    pivot_index = Sorting.__pivot(arr, start, end)
    arr[end], arr[pivot_index] = arr[pivot_index], arr[end]
    pivot, divide, i = arr[end], start - 1, start
    # whenever you get a smaller element put it on the left
    for i in range(start, end):
      if arr[i] <= pivot:
        divide += 1
        arr[divide], arr[i] = arr[i], arr[divide]
    arr[i + 1], arr[end] = arr[end], arr[i + 1]
  # put whatever strategy of choosing pivot here
  # this is a naive way to do by always choosing the last element
  @staticmethod
  def __pivot(arr, start: int, end: int) -> int:
    return end

  ### Distribution Sorting ###

  # Counting sort (for int list)
  # θ(n + m), n is number of numbers, m is max value of numbers
  # we are just traversing the frequency and the original array
  # good for external sorting
  @staticmethod
  def counting_sort(arr, selector = lambda x : x):
    # make a cumulative sum freq list
    freq = [None] * max(arr, selector)
    for x in arr:
      freq[selector(x)] += 1 # freq list
    for i in range(1, len(freq)):
      freq[i] += freq[i - 1] # cumulative freq list
    # fill in sorted array and copy over
    sorted = [None] * len(arr)
    for x in arr:
      # key insight at this step:
      # freq list represents its sorted index
      sorted[freq[selector(x)]], freq[selector(x)] = x, freq[selector(x)] -1
    for i in range(0, len(arr)):
      arr[i] = sorted[i]

  # Radix sort (for string list with a finite alphabet because
  # we need counting_sort as a subroutine)
  # working from right to left is easier (LSD radix)
  # θ(p(m + n)), since we are just running Counting sort p times.
  # p is the max number of characters among strings
  # m is the size of alphabet (for numbers it will be 0 - 9, so m = 10)
  # n is the size of arr
  # p can also be written as logk, where k is the max value of the string/integers
  # or θ(B), B is the total size of data (number of characters)
  # good for external sorting
  @staticmethod
  def lsd_radix_sort(arr):
    n_passes = max(arr, lambda s: len(s))
    for i in range(0, n_passes):
      Sorting.counting_sort(arr,\
         lambda x : ord(x[i]) if i < len(x) else -1)
	class Sorting:

	### Comparison Sorting ###

	# Insertion sort
	# without swapping in every step
	# θ(n^2), but gets better if data is already somewhat sorted
	# more precisely, θ(nk), k is the max amount of data displaced from sorted position
	@staticmethod
	def insertion_sort(arr, comparator):
	for i in range(1, len(arr)):
	x = arr[i] # keep a copy of arr[i] first
	for j in reversed(range(0, i, 1)):
	if comparator(arr[j], x) <= 0: # stable
	break
	arr[j + 1] = arr[j] # don't swap i j yet; copy over j only
	arr[j + 1] = x # now put back x

	# Shell sort
	# an improvement on insertion sort, reducing number of inversion fast by sorting distant elements first
	# the last round is a vanilla insertion sort, but with most inversions gone
	# θ(n^3/2)
	@staticmethod
	def shell_sort(arr, comparator):
	n, interval = len(arr), len(arr) // 2
	while interval:
	for i in range(interval, n):
	x = arr[i]
	for j in reversed(range(0, i, interval)):
	if comparator(arr[j], x) <= 0:
	break
	arr[j + interval] = arr[j]
	arr[j + interval] = x # now put back x
	interval //= 2

	# Selection sort
	# each round, put the max of the remaining elements at the back
	# θ(n^2), regardless how sorted the data originally were
	# If we use priority queue (Heap sort) or a balanced tree structure (Tree sort), the we can have θ(nlogn)
	# For heap: the initial heapify operation can be O(n), but the n removal each needs O(logn)
	# For tree: initial n insertions each needs O(logn), and a traversal O(n), overall O(nlogn)
	@staticmethod
	def selection_sort(arr, comparator):
	for i in reversed(range(1, len(arr))):
	max_idx = 0
	for j in range(1, i):
	if comparator(arr[j],arr[max_idx]) > 0:
	max_idx = j
	arr[i - 1], arr[max_idx] = arr[max_idx], arr[i - 1]

	## From this point onwards I will ignore the comparator syntax and focus on the sorting itself

	# Merge sort
	# Implemented most easily with recursion
	# θ(nlogn) time, O(n) space in those array copying
	@staticmethod
	def merge_sort(arr):
	if len(arr) < 2:
	return
	Sorting.split(arr, 0, len(arr) - 1)
	@staticmethod
	def __split(arr, start: int, end: int):
	if end > start:
	mid = start + (end - start) // 2
	Sorting.__split(arr, start, mid)
	Sorting.__split(arr, mid, end)
	Sorting.__merge(arr, start, mid, end)
	@staticmethod
	def __merge(arr, start: int, mid: int, end: int):
	arr1, arr2 = arr[0:mid], arr[mid:end]
	p1, p2, pm = 0, 0, start
	while p1 < len(arr1) and p2 < len(arr2):
	if arr1[p1] < arr2[p2]:
	arr[pm] = arr1[p1]
	p1 += 1
	else:
	arr[pm] = arr2[p2]
	p2 += 1
	pm += 1
	if p1 < len(arr1):
	remain_len = len(arr1) - p1
	arr[pm:pm + remain_len] = arr1[p1:]
	elif p2 < len(arr2):
	remain_len = len(arr2) - p2
	arr[pm:pm + remain_len] = arr2[p2:]

	# Quick sort
	# best θ(nlogn), worst O(n^2)
	# space O(logn) due to recursive calls
	# not stable
	@staticmethod
	def quick_sort(arr):
	Sorting.__quick(arr, 0, len(arr) - 1)
	@staticmethod
	def __quick(arr, start: int, end: int):
	if start < end - 1:
	pivot = Sorting.__partition(arr, start, end)
	Sorting.__quick(arr, start, pivot)
	Sorting.__quick(arr, pivot, end)
	pass
	# I think this partition strategy is better than Hoare's partition
	# It is much easier to understand
	@staticmethod
	def __partition(arr, start: int, end: int):
	# get pivot first
	pivot_index = Sorting.__pivot(arr, start, end)
	arr[end], arr[pivot_index] = arr[pivot_index], arr[end]
	pivot, divide, i = arr[end], start - 1, start
	# whenever you get a smaller element put it on the left
	for i in range(start, end):
	if arr[i] <= pivot:
	divide += 1
	arr[divide], arr[i] = arr[i], arr[divide]
	arr[i + 1], arr[end] = arr[end], arr[i + 1]
	# put whatever strategy of choosing pivot here
	# this is a naive way to do by always choosing the last element
	@staticmethod
	def __pivot(arr, start: int, end: int) -> int:
	return end

	### Distribution Sorting ###

	# Counting sort (for int list)
	# θ(n + m), n is number of numbers, m is max value of numbers
	# we are just traversing the frequency and the original array
	# good for external sorting
	@staticmethod
	def counting_sort(arr, selector = lambda x : x):
	# make a cumulative sum freq list
	freq = [None] * max(arr, selector)
	for x in arr:
	freq[selector(x)] += 1 # freq list
	for i in range(1, len(freq)):
	freq[i] += freq[i - 1] # cumulative freq list
	# fill in sorted array and copy over
	sorted = [None] * len(arr)
	for x in arr:
	# key insight at this step:
	# freq list represents its sorted index
	sorted[freq[selector(x)]], freq[selector(x)] = x, freq[selector(x)] -1
	for i in range(0, len(arr)):
	arr[i] = sorted[i]

	# Radix sort (for string list with a finite alphabet because
	# we need counting_sort as a subroutine)
	# working from right to left is easier (LSD radix)
	# θ(p(m + n)), since we are just running Counting sort p times.
	# p is the max number of characters among strings
	# m is the size of alphabet (for numbers it will be 0 - 9, so m = 10)
	# n is the size of arr
	# p can also be written as logk, where k is the max value of the string/integers
	# or θ(B), B is the total size of data (number of characters)
	# good for external sorting
	@staticmethod
	def lsd_radix_sort(arr):
	n_passes = max(arr, lambda s: len(s))
	for i in range(0, n_passes):
	Sorting.counting_sort(arr,\
	lambda x : ord(x[i]) if i < len(x) else -1)