Instantly share code, notes, and snippets.

# goerz/quicksort.py

Created November 3, 2013 03:42
A reference implementation of the Quicksort algorithm. Should be easy to translate to other languages.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 #!/usr/bin/env python """ Reference implementation of Quicksort on an integer array """ __author__ = 'Michael Goerz' import numpy as np import numpy.random as nprnd import random import math random.seed() # initialize the random number generator def partition(a, left, right, debug=False): """ Partition the section of the array a between indices `left` and `right`. A random pivot is chosen and the array is re-ordered such that all entries left of the pivot are smaller or equal to the pivot, and all element right of it a larger or equal. Returns the index of the pivot element in the re-ordered array. Elements before `left` or after `right` remain unchanged. Algorithm follows C. A. R. Hoare. Quicksort. Computer Journal 5, 10 (1962) See also Cormen et al. Introduction to Algorithms. 3rd Edition -- Problem 7.1 """ # The implementation follows the pseudocode found in Cormen. Confusingly, # the pseudocode did not seem to work out of the box, so it is modified # here: the pivot element is kept untouched on the left, and is only moved # into the partitioned list at the very end. # # One could start with the a[left] as the pivot element, but if the array # is already sorted, this leads to worst-case runtime. An alternative is to # choose a random pivot element, simply move it to the left, and then # proceed in the normal way. # # The implementation does not try to be pythonic in any way, but instead to # translate easily into other languages. # p = random.randint(left, right) ap = a[p] # pivot value # move pivot element to the left a[p] = a[left] a[left] = ap if debug: print " chosen %d at position %d as pivot element" % (ap, p) l = left # Cormen pseudo-code would seem to say 'l = left - 1' r = right + 1 while (True): while (True): # move in from the right until an element that is smaller than the # pivot element is found r = r - 1 if (a[r] <= ap or r <= l): break while (True): # move in from the left until an element that is larger than the # pivot element is found l = l + 1 if (a[l] >= ap or l >= r): break if (l < r): # switch the two elements temp = a[r] a[r] = a[l] a[l] = temp else: # move the pivot into place (Cormen doesn't have this) p = r a[left] = a[p] a[p] = ap if debug: print " done partitioning: a = %s" % str(a) print " p = %d" % p break # return p (except we also want to execute the debug block) if (debug): # check that the pivoting worked assert a[p] == ap, "Pivot value is not in the correct place" for l in range(left, p): assert a[l] <= ap, "Values left of pivot larger than pivot" for r in range(p+1, right+1): assert a[r] >= ap, "Values right of pivot smaller than pivot" return p # index of the pivot element def qsort(a, debug=False): """ Perform a Quicksort on a given integer array """ # For small lists, insertion-sort is more efficient. As soon as we # encounter arrays with less or equal to INSERT_LIMIT element, we sort the # remaining list with insertion-sort. INSERT_LIMIT = 10 # according to Wikipedia, "between 8 and 20" # Variables used (assuming the partitioning is in-lined): # # integer n : length of array `a` # integer level : Quicksort recursion level # integer left : left boundary for section in current level # integer right : right boundary for section in current level # integer l : loop-index from left in partitioning/insertion-sort # integer r : loop-index from right in partitioning/insertion-sort # integer p : pivot-index # (integer) ap : pivot value # (integer) temp : temporary variable for swapping # # The last two variables are of the same type as the elements of the array # `a`, i.e. integers in this example, but whatever if you're sorting # non-integer arrays. # # Lastly, there are two integer stack arrays which need to be allocated to # lg(n). They keep track of the section of the array that is processed at # each recursion level of Quicksort. # # integer left_stack[] : stack for left boundary of section per level # integer right_stack[]: stack for right boundary of section per level n = len(a) left_stack = np.empty(math.log(n, 2), dtype=int) # allocate right_stack = np.empty(math.log(n, 2), dtype=int) # allocate level = 0 left_stack[level] = 0 right_stack[level] = n - 1 while (level >= 0): left = left_stack[level] right = right_stack[level] # if left < right: # for INSERT_LIMIT == 0 if (right - left) > INSERT_LIMIT: # do a normal Quicksort iteration if debug: print "Level %d: calling partition on %s between %d and %d" \ % (level, str(a), left, right) p = partition(a, left, right, debug) # pivot element ends up at p # Recursively sort the lists left and right of the pivot element # The shorter of the two sublists should be pushed to the next # stack level. The other one is done via tail recursion. # This guarantees that the stack goes at most to depth # log(n, 2) # Note that in an actual implementation the partitioning should be # in-lined. if (p-left < right-p): # left partition to next level left_stack[level+1] = left right_stack[level+1] = p - 1 # afterwards, right partition (replacing the current level) left_stack[level] = p + 1 # right[level] unchanged else: # right partition to the next level left_stack[level+1] = p + 1 right_stack[level+1] = right # afterwards, left partition (replacing the current level) right_stack[level] = p - 1 # left[level] unchanged level = level + 1 else: # we're below the INSERTION_LIMIT # do an insertion sort for elements between `left` and `right` if debug: print "Level %d: performing insertion-sort" % level print " between %d and %d" % (left, right) for l in range(left+1, right+1): # starting at the 2nd element temp = a[l] # insert a(l) into the previous sorted elements r = l - 1 while (r >= left and a[r] > temp): a[r + 1] = a[r] r = r - 1 a[r+1] = temp # done with insertion sort -- the list is now fully sorted. # we're done at this level (also when doing Quicksort all the way) level = level - 1 if debug: # Test that the array is actually sorted for i in range(1, n): assert a[i] >= a[i-1], "Output array is not fully sorted" def test_partition(n, max_val, debug=True): """ Call the partition function on a random array of length `n` with values between 0 and `max_val` """ a = nprnd.randint(0, max_val, n) print "Input: %s" % str(a) partition(a, 0, n-1, debug) print "Output: %s" % str(a) def test_qsort(n, max_val, debug=True): """ Generate a random array of integers of length n with values between 0 and max_val and sort it """ a = nprnd.randint(0, max_val, n) print "Input: %s" % str(a) qsort(a, debug) print "Output: %s" % str(a) if __name__ == "__main__": while(True): test_qsort(100000, 100000, debug=False)

### goerz commented Nov 13, 2013

It might be better to do the insertion sort at the very end, over the entire array