Skip to content

Instantly share code, notes, and snippets.

@lopuhin
Created January 12, 2014 18:03
Show Gist options
  • Save lopuhin/8388183 to your computer and use it in GitHub Desktop.
Save lopuhin/8388183 to your computer and use it in GitHub Desktop.
Benchmarking different sort methods against built-in sort on 1M random 32-bit integers (radix sort wins over built-in, and naive intra-sort is less than 2x slower)
# encoding: utf-8
import time
import math
import random
__help__ = '''
Benchmarking different sort methods against built-in sort
on 1M random 32-bit integers
$ pypy sort.py
sorting 1000000 32-bit integers
radix_sort: 0.03230 ± 0.00179
std_sort: 0.18172 ± 0.00513
intrasort2: 0.32484 ± 0.01911
intrasort: 0.33739 ± 0.01363
qsort_inplace: 0.52141 ± 0.01229
qsort_copy: 1.30378 ± 0.07212
'''
# Built-in sort
def std_sort(lst):
lst.sort()
# Quick sort: making copies, a-la haskell two-liner
def qsort_copy(lst):
lst[:] = _qsort(lst)
def _qsort(lst):
if len(lst) < 2:
return lst
pivot = lst[len(lst)/2]
return _qsort([x for x in lst if x < pivot]) + \
[x for x in lst if x == pivot] + \
_qsort([x for x in lst if x > pivot])
# Quick sort: classic in-place variant
def qsort_inplace(lst):
_qsort_inplace(lst, 0, len(lst) - 1)
def _qsort_inplace(lst, a, b):
if a < b:
pidx = (a + b) / 2
new_pidx = partition(lst, a, b, pidx)
_qsort_inplace(lst, a, new_pidx - 1)
_qsort_inplace(lst, new_pidx + 1, b)
def partition(lst, a, b, pidx):
p = lst[pidx]
swap(lst, pidx, b)
new_pidx = a
for i in xrange(a, b):
if lst[i] <= p:
swap(lst, i, new_pidx)
new_pidx += 1
swap(lst, new_pidx, b)
return new_pidx
# Intrasort: quick sort with insertion sort at the bottom
def intrasort(lst):
_intrasort(lst, 0, len(lst) - 1)
def _intrasort(lst, a, b):
if b - a > 50:
pidx = (a + b) / 2
new_pidx = partition(lst, a, b, pidx)
_intrasort(lst, a, new_pidx - 1)
_intrasort(lst, new_pidx + 1, b)
else:
insertion(lst, a, b)
# Intrasort: quick sort with insertion sort at the top
def intrasort2(lst):
_intrasort2(lst, 0, len(lst) - 1)
insertion(lst, 0, len(lst) - 1)
def _intrasort2(lst, a, b):
if b - a > 50:
pidx = (a + b) / 2
new_pidx = partition(lst, a, b, pidx)
_intrasort2(lst, a, new_pidx - 1)
_intrasort2(lst, new_pidx + 1, b)
# Radix sort - translation of
# http://www.quora.com/Sorting-Algorithms/What-is-the-most-efficient-way-to-sort-a-million-32-bit-integers)
def radix_sort(lst):
tmp = list(lst)
for shift in xrange(0, 32, 8):
counts = [0] * 0x100
for x in lst:
counts[(x >> shift) & 0xFF] += 1
buckets = []
cnt = 0
for c in counts:
buckets.append(cnt)
cnt += c
for x in lst:
b = (x >> shift) & 0xFF
tmp[buckets[b]] = x
buckets[b] +=1
lst, tmp = tmp, lst
# Sort utils
def insertion(lst, a, b):
for i in xrange(a, b + 1):
j = i
while j > 0 and lst[j-1] > lst[j]:
swap(lst, j-1, j)
j -= 1
def swap(lst, i1, i2):
lst[i1], lst[i2] = lst[i2], lst[i1]
# Benchmarking utils
def randlist(n):
return [random.randint(0, 2**32) for _ in xrange(n)]
def test(sort_fn):
for _ in xrange(10):
lst = randlist(100)
sort_fn(lst)
assert lst == list(sorted(lst)), lst
def stddev(times):
average = lambda lst: sum(lst) * 1.0 / len(lst)
avg = average(times)
return math.sqrt(average([(x - avg)**2 for x in times]))
def bench(sort_fn, n, warmup=5):
test(sort_fn)
times = []
for i in xrange(warmup + 1):
lst = randlist(n)
t = time.time()
sort_fn(lst)
if i:
times.append(time.time() - t)
print '%s:\t%.5f ± %.5f' % (sort_fn.__name__, min(times), stddev(times))
def main(n):
print 'sorting %d 32-bit integers' % n
bench(radix_sort, n)
bench(std_sort, n)
bench(intrasort2, n)
bench(intrasort, n)
bench(qsort_inplace, n)
bench(qsort_copy, n)
if __name__ == '__main__':
main(1 * 1000 * 1000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment