Instantly share code, notes, and snippets.

# goerz/quicksort.py Created Nov 3, 2013

A reference implementation of the Quicksort algorithm. Should be easy to translate to other languages.
 #!/usr/bin/env python """ Reference implementation of Quicksort on an integer array """ __author__ = 'Michael Goerz' import numpy as np import numpy.random as nprnd import random import math random.seed() # initialize the random number generator def partition(a, left, right, debug=False): """ Partition the section of the array a between indices `left` and `right`. A random pivot is chosen and the array is re-ordered such that all entries left of the pivot are smaller or equal to the pivot, and all element right of it a larger or equal. Returns the index of the pivot element in the re-ordered array. Elements before `left` or after `right` remain unchanged. Algorithm follows C. A. R. Hoare. Quicksort. Computer Journal 5, 10 (1962) See also Cormen et al. Introduction to Algorithms. 3rd Edition -- Problem 7.1 """ # The implementation follows the pseudocode found in Cormen. Confusingly, # the pseudocode did not seem to work out of the box, so it is modified # here: the pivot element is kept untouched on the left, and is only moved # into the partitioned list at the very end. # # One could start with the a[left] as the pivot element, but if the array # is already sorted, this leads to worst-case runtime. An alternative is to # choose a random pivot element, simply move it to the left, and then # proceed in the normal way. # # The implementation does not try to be pythonic in any way, but instead to # translate easily into other languages. # p = random.randint(left, right) ap = a[p] # pivot value # move pivot element to the left a[p] = a[left] a[left] = ap if debug: print " chosen %d at position %d as pivot element" % (ap, p) l = left # Cormen pseudo-code would seem to say 'l = left - 1' r = right + 1 while (True): while (True): # move in from the right until an element that is smaller than the # pivot element is found r = r - 1 if (a[r] <= ap or r <= l): break while (True): # move in from the left until an element that is larger than the # pivot element is found l = l + 1 if (a[l] >= ap or l >= r): break if (l < r): # switch the two elements temp = a[r] a[r] = a[l] a[l] = temp else: # move the pivot into place (Cormen doesn't have this) p = r a[left] = a[p] a[p] = ap if debug: print " done partitioning: a = %s" % str(a) print " p = %d" % p break # return p (except we also want to execute the debug block) if (debug): # check that the pivoting worked assert a[p] == ap, "Pivot value is not in the correct place" for l in range(left, p): assert a[l] <= ap, "Values left of pivot larger than pivot" for r in range(p+1, right+1): assert a[r] >= ap, "Values right of pivot smaller than pivot" return p # index of the pivot element def qsort(a, debug=False): """ Perform a Quicksort on a given integer array """ # For small lists, insertion-sort is more efficient. As soon as we # encounter arrays with less or equal to INSERT_LIMIT element, we sort the # remaining list with insertion-sort. INSERT_LIMIT = 10 # according to Wikipedia, "between 8 and 20" # Variables used (assuming the partitioning is in-lined): # # integer n : length of array `a` # integer level : Quicksort recursion level # integer left : left boundary for section in current level # integer right : right boundary for section in current level # integer l : loop-index from left in partitioning/insertion-sort # integer r : loop-index from right in partitioning/insertion-sort # integer p : pivot-index # (integer) ap : pivot value # (integer) temp : temporary variable for swapping # # The last two variables are of the same type as the elements of the array # `a`, i.e. integers in this example, but whatever if you're sorting # non-integer arrays. # # Lastly, there are two integer stack arrays which need to be allocated to # lg(n). They keep track of the section of the array that is processed at # each recursion level of Quicksort. # # integer left_stack[] : stack for left boundary of section per level # integer right_stack[]: stack for right boundary of section per level n = len(a) left_stack = np.empty(math.log(n, 2), dtype=int) # allocate right_stack = np.empty(math.log(n, 2), dtype=int) # allocate level = 0 left_stack[level] = 0 right_stack[level] = n - 1 while (level >= 0): left = left_stack[level] right = right_stack[level] # if left < right: # for INSERT_LIMIT == 0 if (right - left) > INSERT_LIMIT: # do a normal Quicksort iteration if debug: print "Level %d: calling partition on %s between %d and %d" \ % (level, str(a), left, right) p = partition(a, left, right, debug) # pivot element ends up at p # Recursively sort the lists left and right of the pivot element # The shorter of the two sublists should be pushed to the next # stack level. The other one is done via tail recursion. # This guarantees that the stack goes at most to depth # log(n, 2) # Note that in an actual implementation the partitioning should be # in-lined. if (p-left < right-p): # left partition to next level left_stack[level+1] = left right_stack[level+1] = p - 1 # afterwards, right partition (replacing the current level) left_stack[level] = p + 1 # right[level] unchanged else: # right partition to the next level left_stack[level+1] = p + 1 right_stack[level+1] = right # afterwards, left partition (replacing the current level) right_stack[level] = p - 1 # left[level] unchanged level = level + 1 else: # we're below the INSERTION_LIMIT # do an insertion sort for elements between `left` and `right` if debug: print "Level %d: performing insertion-sort" % level print " between %d and %d" % (left, right) for l in range(left+1, right+1): # starting at the 2nd element temp = a[l] # insert a(l) into the previous sorted elements r = l - 1 while (r >= left and a[r] > temp): a[r + 1] = a[r] r = r - 1 a[r+1] = temp # done with insertion sort -- the list is now fully sorted. # we're done at this level (also when doing Quicksort all the way) level = level - 1 if debug: # Test that the array is actually sorted for i in range(1, n): assert a[i] >= a[i-1], "Output array is not fully sorted" def test_partition(n, max_val, debug=True): """ Call the partition function on a random array of length `n` with values between 0 and `max_val` """ a = nprnd.randint(0, max_val, n) print "Input: %s" % str(a) partition(a, 0, n-1, debug) print "Output: %s" % str(a) def test_qsort(n, max_val, debug=True): """ Generate a random array of integers of length n with values between 0 and max_val and sort it """ a = nprnd.randint(0, max_val, n) print "Input: %s" % str(a) qsort(a, debug) print "Output: %s" % str(a) if __name__ == "__main__": while(True): test_qsort(100000, 100000, debug=False)
Owner Author

### goerz commented Nov 13, 2013

 It might be better to do the insertion sort at the very end, over the entire array