Created
January 12, 2014 18:03
-
-
Save lopuhin/8388183 to your computer and use it in GitHub Desktop.
Benchmarking different sort methods against built-in sort on 1M random 32-bit integers (radix sort wins over built-in, and naive intra-sort is less than 2x slower)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
import time | |
import math | |
import random | |
__help__ = ''' | |
Benchmarking different sort methods against built-in sort | |
on 1M random 32-bit integers | |
$ pypy sort.py | |
sorting 1000000 32-bit integers | |
radix_sort: 0.03230 ± 0.00179 | |
std_sort: 0.18172 ± 0.00513 | |
intrasort2: 0.32484 ± 0.01911 | |
intrasort: 0.33739 ± 0.01363 | |
qsort_inplace: 0.52141 ± 0.01229 | |
qsort_copy: 1.30378 ± 0.07212 | |
''' | |
# Built-in sort | |
def std_sort(lst): | |
lst.sort() | |
# Quick sort: making copies, a-la haskell two-liner | |
def qsort_copy(lst): | |
lst[:] = _qsort(lst) | |
def _qsort(lst): | |
if len(lst) < 2: | |
return lst | |
pivot = lst[len(lst)/2] | |
return _qsort([x for x in lst if x < pivot]) + \ | |
[x for x in lst if x == pivot] + \ | |
_qsort([x for x in lst if x > pivot]) | |
# Quick sort: classic in-place variant | |
def qsort_inplace(lst): | |
_qsort_inplace(lst, 0, len(lst) - 1) | |
def _qsort_inplace(lst, a, b): | |
if a < b: | |
pidx = (a + b) / 2 | |
new_pidx = partition(lst, a, b, pidx) | |
_qsort_inplace(lst, a, new_pidx - 1) | |
_qsort_inplace(lst, new_pidx + 1, b) | |
def partition(lst, a, b, pidx): | |
p = lst[pidx] | |
swap(lst, pidx, b) | |
new_pidx = a | |
for i in xrange(a, b): | |
if lst[i] <= p: | |
swap(lst, i, new_pidx) | |
new_pidx += 1 | |
swap(lst, new_pidx, b) | |
return new_pidx | |
# Intrasort: quick sort with insertion sort at the bottom | |
def intrasort(lst): | |
_intrasort(lst, 0, len(lst) - 1) | |
def _intrasort(lst, a, b): | |
if b - a > 50: | |
pidx = (a + b) / 2 | |
new_pidx = partition(lst, a, b, pidx) | |
_intrasort(lst, a, new_pidx - 1) | |
_intrasort(lst, new_pidx + 1, b) | |
else: | |
insertion(lst, a, b) | |
# Intrasort: quick sort with insertion sort at the top | |
def intrasort2(lst): | |
_intrasort2(lst, 0, len(lst) - 1) | |
insertion(lst, 0, len(lst) - 1) | |
def _intrasort2(lst, a, b): | |
if b - a > 50: | |
pidx = (a + b) / 2 | |
new_pidx = partition(lst, a, b, pidx) | |
_intrasort2(lst, a, new_pidx - 1) | |
_intrasort2(lst, new_pidx + 1, b) | |
# Radix sort - translation of | |
# http://www.quora.com/Sorting-Algorithms/What-is-the-most-efficient-way-to-sort-a-million-32-bit-integers) | |
def radix_sort(lst): | |
tmp = list(lst) | |
for shift in xrange(0, 32, 8): | |
counts = [0] * 0x100 | |
for x in lst: | |
counts[(x >> shift) & 0xFF] += 1 | |
buckets = [] | |
cnt = 0 | |
for c in counts: | |
buckets.append(cnt) | |
cnt += c | |
for x in lst: | |
b = (x >> shift) & 0xFF | |
tmp[buckets[b]] = x | |
buckets[b] +=1 | |
lst, tmp = tmp, lst | |
# Sort utils | |
def insertion(lst, a, b): | |
for i in xrange(a, b + 1): | |
j = i | |
while j > 0 and lst[j-1] > lst[j]: | |
swap(lst, j-1, j) | |
j -= 1 | |
def swap(lst, i1, i2): | |
lst[i1], lst[i2] = lst[i2], lst[i1] | |
# Benchmarking utils | |
def randlist(n): | |
return [random.randint(0, 2**32) for _ in xrange(n)] | |
def test(sort_fn): | |
for _ in xrange(10): | |
lst = randlist(100) | |
sort_fn(lst) | |
assert lst == list(sorted(lst)), lst | |
def stddev(times): | |
average = lambda lst: sum(lst) * 1.0 / len(lst) | |
avg = average(times) | |
return math.sqrt(average([(x - avg)**2 for x in times])) | |
def bench(sort_fn, n, warmup=5): | |
test(sort_fn) | |
times = [] | |
for i in xrange(warmup + 1): | |
lst = randlist(n) | |
t = time.time() | |
sort_fn(lst) | |
if i: | |
times.append(time.time() - t) | |
print '%s:\t%.5f ± %.5f' % (sort_fn.__name__, min(times), stddev(times)) | |
def main(n): | |
print 'sorting %d 32-bit integers' % n | |
bench(radix_sort, n) | |
bench(std_sort, n) | |
bench(intrasort2, n) | |
bench(intrasort, n) | |
bench(qsort_inplace, n) | |
bench(qsort_copy, n) | |
if __name__ == '__main__': | |
main(1 * 1000 * 1000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment