Created
August 12, 2024 17:02
-
-
Save ramanshah/f89ef8c3e5e5e75d3b5c0be0f27b625d to your computer and use it in GitHub Desktop.
Benchmark manual sorting algorithms for my "Sorting Stuff Efficiently" ELGL article
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# To benchmark sorting 150 things, averaging over 100 trials: | |
# ./sort_benchmark.py 150 100 | |
import copy | |
import random | |
from sys import argv | |
TASK_SIZE = int(argv[1]) | |
NUM_TRIALS = int(argv[2]) | |
def insertion_sort(x): | |
comp_counter = 0 | |
x_work = copy.deepcopy(x) | |
for i in range(1, len(x)): | |
j = i | |
while j > 0: | |
comp_counter += 1 | |
comp = (x_work[j - 1] > x_work[j]) | |
if comp: | |
temp = x_work[j] | |
x_work[j] = x_work[j - 1] | |
x_work[j - 1] = temp | |
j = j - 1 | |
else: | |
break | |
return x_work, comp_counter | |
def merge_sort(x, y): | |
comp_counter = 0 | |
i = 0 | |
j = 0 | |
result = [] | |
while i < len(x) and j < len(y): | |
comp_counter += 1 | |
comp = (x[i] > y[j]) | |
if comp: | |
result.append(y[j]) | |
j += 1 | |
else: | |
result.append(x[i]) | |
i += 1 | |
if i == len(x): | |
# Remainder in y | |
result.extend(y[j:]) | |
else: | |
# Remainder in x | |
result.extend(x[i:]) | |
return result, comp_counter | |
insertion_work_log = [] | |
for trial in range(NUM_TRIALS): | |
random.seed(trial) | |
data = [random.random() for _ in range(TASK_SIZE)] | |
result, comp_counter = insertion_sort(data) | |
assert result == sorted(data) | |
insertion_work_log.append(comp_counter) | |
print(f'Average comparisons over {NUM_TRIALS} trials to sort {TASK_SIZE} objects with insertion sort:') | |
print(sum(insertion_work_log) / NUM_TRIALS) | |
merge_work_log = [] | |
for trial in range(NUM_TRIALS): | |
random.seed(trial) | |
data = [random.random() for _ in range(TASK_SIZE)] | |
check = sorted(data) | |
comp_counter = 0 | |
sublists = [] | |
remainder = copy.deepcopy(data) | |
while len(remainder) >= 4: | |
# Insertion sort a chunk of 4 | |
chunk, c = insertion_sort(remainder[0:4]) | |
remainder = remainder[4:] | |
sublists.append(chunk) | |
comp_counter += c | |
# Merge down as needed | |
while len(sublists) > 1 and (len(sublists[-2]) == len(sublists[-1])): | |
new_sublist, c = merge_sort(sublists[-2], sublists[-1]) | |
comp_counter += c | |
if len(sublists) == 2: | |
sublists = [new_sublist] | |
else: | |
sublists = sublists[:-2] + [new_sublist] | |
# Deal with remainder | |
if len(remainder) > 0: | |
final_sublist, c = insertion_sort(remainder) | |
sublists.append(final_sublist) | |
comp_counter += c | |
while len(sublists) > 1: | |
new_sublist, c = merge_sort(sublists[-2], sublists[-1]) | |
comp_counter += c | |
sublists = sublists[:-2] + [new_sublist] | |
assert sublists[0] == sorted(data) | |
merge_work_log.append(comp_counter) | |
print(f'Average comparisons over {NUM_TRIALS} trials to sort {TASK_SIZE} objects with the merge sort in the ELGL article:') | |
print(sum(merge_work_log) / NUM_TRIALS) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment