Skip to content

Instantly share code, notes, and snippets.

@ramanshah
Created August 12, 2024 17:02
Show Gist options
  • Save ramanshah/f89ef8c3e5e5e75d3b5c0be0f27b625d to your computer and use it in GitHub Desktop.
Save ramanshah/f89ef8c3e5e5e75d3b5c0be0f27b625d to your computer and use it in GitHub Desktop.
Benchmark manual sorting algorithms for my "Sorting Stuff Efficiently" ELGL article
#! /usr/bin/env python3
# To benchmark sorting 150 things, averaging over 100 trials:
# ./sort_benchmark.py 150 100
import copy
import random
from sys import argv
TASK_SIZE = int(argv[1])
NUM_TRIALS = int(argv[2])
def insertion_sort(x):
comp_counter = 0
x_work = copy.deepcopy(x)
for i in range(1, len(x)):
j = i
while j > 0:
comp_counter += 1
comp = (x_work[j - 1] > x_work[j])
if comp:
temp = x_work[j]
x_work[j] = x_work[j - 1]
x_work[j - 1] = temp
j = j - 1
else:
break
return x_work, comp_counter
def merge_sort(x, y):
comp_counter = 0
i = 0
j = 0
result = []
while i < len(x) and j < len(y):
comp_counter += 1
comp = (x[i] > y[j])
if comp:
result.append(y[j])
j += 1
else:
result.append(x[i])
i += 1
if i == len(x):
# Remainder in y
result.extend(y[j:])
else:
# Remainder in x
result.extend(x[i:])
return result, comp_counter
insertion_work_log = []
for trial in range(NUM_TRIALS):
random.seed(trial)
data = [random.random() for _ in range(TASK_SIZE)]
result, comp_counter = insertion_sort(data)
assert result == sorted(data)
insertion_work_log.append(comp_counter)
print(f'Average comparisons over {NUM_TRIALS} trials to sort {TASK_SIZE} objects with insertion sort:')
print(sum(insertion_work_log) / NUM_TRIALS)
merge_work_log = []
for trial in range(NUM_TRIALS):
random.seed(trial)
data = [random.random() for _ in range(TASK_SIZE)]
check = sorted(data)
comp_counter = 0
sublists = []
remainder = copy.deepcopy(data)
while len(remainder) >= 4:
# Insertion sort a chunk of 4
chunk, c = insertion_sort(remainder[0:4])
remainder = remainder[4:]
sublists.append(chunk)
comp_counter += c
# Merge down as needed
while len(sublists) > 1 and (len(sublists[-2]) == len(sublists[-1])):
new_sublist, c = merge_sort(sublists[-2], sublists[-1])
comp_counter += c
if len(sublists) == 2:
sublists = [new_sublist]
else:
sublists = sublists[:-2] + [new_sublist]
# Deal with remainder
if len(remainder) > 0:
final_sublist, c = insertion_sort(remainder)
sublists.append(final_sublist)
comp_counter += c
while len(sublists) > 1:
new_sublist, c = merge_sort(sublists[-2], sublists[-1])
comp_counter += c
sublists = sublists[:-2] + [new_sublist]
assert sublists[0] == sorted(data)
merge_work_log.append(comp_counter)
print(f'Average comparisons over {NUM_TRIALS} trials to sort {TASK_SIZE} objects with the merge sort in the ELGL article:')
print(sum(merge_work_log) / NUM_TRIALS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment