Skip to content

Instantly share code, notes, and snippets.

@Reimirno
Created May 5, 2022 20:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Reimirno/cf4759f28f36a8cfae3e06168993ed83 to your computer and use it in GitHub Desktop.
Save Reimirno/cf4759f28f36a8cfae3e06168993ed83 to your computer and use it in GitHub Desktop.
class Sorting:
### Comparison Sorting ###
# Insertion sort
# without swapping in every step
# θ(n^2), but gets better if data is already somewhat sorted
# more precisely, θ(nk), k is the max amount of data displaced from sorted position
@staticmethod
def insertion_sort(arr, comparator):
for i in range(1, len(arr)):
x = arr[i] # keep a copy of arr[i] first
for j in reversed(range(0, i, 1)):
if comparator(arr[j], x) <= 0: # stable
break
arr[j + 1] = arr[j] # don't swap i j yet; copy over j only
arr[j + 1] = x # now put back x
# Shell sort
# an improvement on insertion sort, reducing number of inversion fast by sorting distant elements first
# the last round is a vanilla insertion sort, but with most inversions gone
# θ(n^3/2)
@staticmethod
def shell_sort(arr, comparator):
n, interval = len(arr), len(arr) // 2
while interval:
for i in range(interval, n):
x = arr[i]
for j in reversed(range(0, i, interval)):
if comparator(arr[j], x) <= 0:
break
arr[j + interval] = arr[j]
arr[j + interval] = x # now put back x
interval //= 2
# Selection sort
# each round, put the max of the remaining elements at the back
# θ(n^2), regardless how sorted the data originally were
# If we use priority queue (Heap sort) or a balanced tree structure (Tree sort), the we can have θ(nlogn)
# For heap: the initial heapify operation can be O(n), but the n removal each needs O(logn)
# For tree: initial n insertions each needs O(logn), and a traversal O(n), overall O(nlogn)
@staticmethod
def selection_sort(arr, comparator):
for i in reversed(range(1, len(arr))):
max_idx = 0
for j in range(1, i):
if comparator(arr[j],arr[max_idx]) > 0:
max_idx = j
arr[i - 1], arr[max_idx] = arr[max_idx], arr[i - 1]
## From this point onwards I will ignore the comparator syntax and focus on the sorting itself
# Merge sort
# Implemented most easily with recursion
# θ(nlogn) time, O(n) space in those array copying
@staticmethod
def merge_sort(arr):
if len(arr) < 2:
return
Sorting.split(arr, 0, len(arr) - 1)
@staticmethod
def __split(arr, start: int, end: int):
if end > start:
mid = start + (end - start) // 2
Sorting.__split(arr, start, mid)
Sorting.__split(arr, mid, end)
Sorting.__merge(arr, start, mid, end)
@staticmethod
def __merge(arr, start: int, mid: int, end: int):
arr1, arr2 = arr[0:mid], arr[mid:end]
p1, p2, pm = 0, 0, start
while p1 < len(arr1) and p2 < len(arr2):
if arr1[p1] < arr2[p2]:
arr[pm] = arr1[p1]
p1 += 1
else:
arr[pm] = arr2[p2]
p2 += 1
pm += 1
if p1 < len(arr1):
remain_len = len(arr1) - p1
arr[pm:pm + remain_len] = arr1[p1:]
elif p2 < len(arr2):
remain_len = len(arr2) - p2
arr[pm:pm + remain_len] = arr2[p2:]
# Quick sort
# best θ(nlogn), worst O(n^2)
# space O(logn) due to recursive calls
# not stable
@staticmethod
def quick_sort(arr):
Sorting.__quick(arr, 0, len(arr) - 1)
@staticmethod
def __quick(arr, start: int, end: int):
if start < end - 1:
pivot = Sorting.__partition(arr, start, end)
Sorting.__quick(arr, start, pivot)
Sorting.__quick(arr, pivot, end)
pass
# I think this partition strategy is better than Hoare's partition
# It is much easier to understand
@staticmethod
def __partition(arr, start: int, end: int):
# get pivot first
pivot_index = Sorting.__pivot(arr, start, end)
arr[end], arr[pivot_index] = arr[pivot_index], arr[end]
pivot, divide, i = arr[end], start - 1, start
# whenever you get a smaller element put it on the left
for i in range(start, end):
if arr[i] <= pivot:
divide += 1
arr[divide], arr[i] = arr[i], arr[divide]
arr[i + 1], arr[end] = arr[end], arr[i + 1]
# put whatever strategy of choosing pivot here
# this is a naive way to do by always choosing the last element
@staticmethod
def __pivot(arr, start: int, end: int) -> int:
return end
### Distribution Sorting ###
# Counting sort (for int list)
# θ(n + m), n is number of numbers, m is max value of numbers
# we are just traversing the frequency and the original array
# good for external sorting
@staticmethod
def counting_sort(arr, selector = lambda x : x):
# make a cumulative sum freq list
freq = [None] * max(arr, selector)
for x in arr:
freq[selector(x)] += 1 # freq list
for i in range(1, len(freq)):
freq[i] += freq[i - 1] # cumulative freq list
# fill in sorted array and copy over
sorted = [None] * len(arr)
for x in arr:
# key insight at this step:
# freq list represents its sorted index
sorted[freq[selector(x)]], freq[selector(x)] = x, freq[selector(x)] -1
for i in range(0, len(arr)):
arr[i] = sorted[i]
# Radix sort (for string list with a finite alphabet because
# we need counting_sort as a subroutine)
# working from right to left is easier (LSD radix)
# θ(p(m + n)), since we are just running Counting sort p times.
# p is the max number of characters among strings
# m is the size of alphabet (for numbers it will be 0 - 9, so m = 10)
# n is the size of arr
# p can also be written as logk, where k is the max value of the string/integers
# or θ(B), B is the total size of data (number of characters)
# good for external sorting
@staticmethod
def lsd_radix_sort(arr):
n_passes = max(arr, lambda s: len(s))
for i in range(0, n_passes):
Sorting.counting_sort(arr,\
lambda x : ord(x[i]) if i < len(x) else -1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment