etrepum/QSort1.hs

## QSort1.hs
module QSort1 ( qsort, quicksort, main, prop_qsort ) where

-- Ported from glibc's qsort.c
-- Copyright (C) 1991-2014 Free Software Foundation, Inc.
-- This file is part of the GNU C Library.
-- Written by Douglas C. Schmidt (schmidt@ics.uci.edu).

-- The GNU C Library is free software; you can redistribute it and/or
-- modify it under the terms of the GNU Lesser General Public
-- License as published by the Free Software Foundation; either
-- version 2.1 of the License, or (at your option) any later version.

-- The GNU C Library is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-- Lesser General Public License for more details.

-- You should have received a copy of the GNU Lesser General Public
-- License along with the GNU C Library; if not, see
-- <http://www.gnu.org/licenses/>.

-- If you consider tuning this algorithm, you should consult first:
-- Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
-- Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993.

import qualified Data.Vector as V
import qualified Data.Vector.Mutable as MV
import Data.Vector.Mutable ( IOVector )
import Control.Monad ( when )
import qualified Data.IORef as I
import Control.Applicative ( (<$>), (<*>) )

import Data.List ( sort )
import Test.QuickCheck ( Property, quickCheck )
import Test.QuickCheck.Monadic ( monadicIO, run, assert )

main :: IO ()
main = quickCheck prop_qsort

prop_qsort :: [Int] -> Property
prop_qsort xs = monadicIO $ do
  xs' <- run (qsort xs)
  assert (xs' == sort xs)

-- | Discontinue quicksort algorithm when partition gets below this size.
--   This particular magic number was chosen to work best on a Sun 4/260.
maxThresh :: Int
maxThresh = 4

qsort :: (Ord a) => [a] -> IO [a]
qsort = go . V.fromList
  where
    go v = do
      mv <- V.unsafeThaw v
      quicksort mv compare
      V.toList <$> V.unsafeFreeze mv

-- Order size using quicksort.  This implementation incorporates
-- four optimizations discussed in Sedgewick:

-- 1. Non-recursive, using an explicit stack of pointer that store the
--    next array partition to sort.  To save time, this maximum amount
--    of space required to store an array of SIZE_MAX is allocated on the
--    stack.  Assuming a 32-bit (64 bit) integer for size_t, this needs
--    only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
--    Pretty cheap, actually.

-- 2. Chose the pivot element using a median-of-three decision tree.
--    This reduces the probability of selecting a bad pivot value and
--    eliminates certain extraneous comparisons.

-- 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
--    insertion sort to order the MAX_THRESH items within each partition.
--    This is a big win, since insertion sort is faster for small, mostly
--    sorted array segments.

-- 4. The larger of the two sub-partitions is always pushed onto the
--    stack first, with the algorithm then concentrating on the
--    smaller partition.  This *guarantees* no more than log (total_elems)
--    stack size is needed (actually O(1) in this case)!
quicksort :: IOVector a           -- ^ vector to sort
          -> (a -> a -> Ordering) -- ^ comparator
          -> IO ()
quicksort v cmp
  | totalElems == 0 =
    -- Avoid lossage with unsigned arithmetic below.
    return ()
  | otherwise = do
    when (totalElems > maxThresh) $ do
      lo <- alloc 0
      hi <- alloc endVal
      top <- alloc [(0, 0)]
      while (stackNotEmpty top) $ do
        -- Select median value from among LO, MID, and HI. Rearrange
        -- LO and HI so the three values are sorted. This lowers the
        -- probability of picking a pathological pivot value and
        -- skips a comparison for both the LEFT_PTR and RIGHT_PTR in
        -- the while loops.
        mid <- getPair lo hi >>= \(l, h) -> alloc (l + (h - l) `div` 2)
        ifM (lt mid lo) (swap mid lo)
        ifM (lt hi mid) $ do
          swap mid hi
          ifM (lt mid lo) (swap mid lo)
        leftPtr <- allocM (succ <$> get lo)
        rightPtr <- allocM (pred <$> get hi)
        -- Here's the famous ``collapse the walls'' section of quicksort.
        -- Gotta like those tight inner loops!  They are the main reason
        -- that this algorithm runs much faster than others.
        doWhile (getOp (<=) leftPtr rightPtr) $ do
          while (lt leftPtr mid) $
            inc leftPtr
          while (lt mid rightPtr) $
            dec rightPtr
          ifElseM (getOp (<) leftPtr rightPtr)
            (do
                swap leftPtr rightPtr
                ifElseM (getOp (==) mid leftPtr)
                  (putM mid (get leftPtr))
                  (ifM (getOp (==) mid rightPtr) $
                   putM mid (get rightPtr))
                inc leftPtr
                dec rightPtr)
            (ifM (getOp (==) leftPtr rightPtr) $ do
                inc leftPtr
                dec rightPtr)
        -- Set up pointers for next iteration.  First determine whether
        -- left and right partitions are below the threshold size.  If so,
        -- ignore one or both.  Otherwise, push the larger partition's
        -- bounds on the stack and continue sorting the smaller one.
        loVal <- get lo
        hiVal <- get hi
        leftVal <- get leftPtr
        rightVal <- get rightPtr
        case (rightVal - loVal, hiVal - leftVal) of
          (loSize, hiSize)
            | loSize <= maxThresh ->
              if hiSize <= maxThresh
                -- Ignore both small partitions.
                then pop top lo hi
                -- Ignore small left partition.
                else put lo leftVal
            | hiSize <= maxThresh ->
              -- Ignore small right partition.
              put hi rightVal
            | loSize > hiSize -> do
              -- Push larger left partition indices.
              push top (loVal, rightVal)
              put lo leftVal
            | otherwise -> do
              -- Push larger right partition indices.
              push top (leftVal, hiVal)
              put hi rightVal
    -- Once the BASE_PTR array is partially sorted by quicksort the rest
    -- is completely sorted using insertion sort, since this is efficient
    -- for partitions below MAX_THRESH size. BASE_PTR points to the beginning
    -- of the array to sort, and END_PTR points at the very last element in
    -- the array (*not* one beyond it!).
    tmpPtr <- alloc 0
    basePtr <- alloc 0
    -- Find smallest element in first threshold and place it at the
    -- array's beginning.  This is the smallest array element,
    -- and the operation speeds up insertion sort's inner loop.
    runPtr <- alloc 1
    while ((<= min endVal maxThresh) <$> get runPtr) $ do
      ifM (lt runPtr tmpPtr) $
        putM tmpPtr (get runPtr)
      inc runPtr
    ifM (getOp (/=) tmpPtr basePtr) $
      swap tmpPtr basePtr
    -- Insertion sort, running from left-hand-side up to right-hand-side.
    put runPtr 1
    while (inc runPtr >> (<= endVal) <$> get runPtr) $ do
      putM tmpPtr (pred <$> get runPtr)
      while (lt runPtr tmpPtr) $
        dec tmpPtr
      inc tmpPtr
      ifM (getOp (/=) tmpPtr runPtr) $ do
        lo <- allocM (get runPtr)
        hi <- alloc 0
        while (putM hi (get lo) >> dec lo >> getOp (>=) lo tmpPtr) $
          swap lo hi
  where
    totalElems = MV.length v
    endVal = totalElems - 1
    -- define a DSL for C-like programming in Haskell
    swapPure = MV.swap v
    swap aVar bVar = do
      a <- get aVar
      b <- get bVar
      swapPure a b
    alloc = I.newIORef
    get = I.readIORef
    allocM = (alloc =<<)
    getPair aVar bVar = (,) <$> get aVar <*> get bVar
    getAt var = get var >>= MV.read v
    getOp op aVar bVar = do
      a <- get aVar
      b <- get bVar
      return (op a b)
    put varName x = I.writeIORef varName $! x
    putM varName a = a >>= put varName
    modify varName = I.modifyIORef' varName
    inc varName = modify varName succ
    dec varName = modify varName pred
    lt aVar bVar = do
      res <- cmp <$> getAt aVar <*> getAt bVar
      return (LT == res)
    ifM c a = c >>= \x -> when x a
    ifElseM c aThen aElse = c >>= \x -> when x aThen >> when (not x) aElse
    while c a = ifM c (a >> while c a)
    doWhile c a = a >> while c a
    -- Define a stack abstraction, similar to the macros in C but without
    -- contiguous storage for simplicity
    push stack = modify stack . (:)
    pop stack low high = do
      (a, b):rest <- get stack
      put low a
      put high b
      put stack rest
    stackNotEmpty stack =
      not . null <$> get stack
	module QSort1 ( qsort, quicksort, main, prop_qsort ) where

	-- Ported from glibc's qsort.c
	-- Copyright (C) 1991-2014 Free Software Foundation, Inc.
	-- This file is part of the GNU C Library.
	-- Written by Douglas C. Schmidt (schmidt@ics.uci.edu).

	-- The GNU C Library is free software; you can redistribute it and/or
	-- modify it under the terms of the GNU Lesser General Public
	-- License as published by the Free Software Foundation; either
	-- version 2.1 of the License, or (at your option) any later version.

	-- The GNU C Library is distributed in the hope that it will be useful,
	-- but WITHOUT ANY WARRANTY; without even the implied warranty of
	-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	-- Lesser General Public License for more details.

	-- You should have received a copy of the GNU Lesser General Public
	-- License along with the GNU C Library; if not, see
	-- <http://www.gnu.org/licenses/>.

	-- If you consider tuning this algorithm, you should consult first:
	-- Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
	-- Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993.

	import qualified Data.Vector as V
	import qualified Data.Vector.Mutable as MV
	import Data.Vector.Mutable ( IOVector )
	import Control.Monad ( when )
	import qualified Data.IORef as I
	import Control.Applicative ( (<$>), (<*>) )

	import Data.List ( sort )
	import Test.QuickCheck ( Property, quickCheck )
	import Test.QuickCheck.Monadic ( monadicIO, run, assert )

	main :: IO ()
	main = quickCheck prop_qsort

	prop_qsort :: [Int] -> Property
	prop_qsort xs = monadicIO $ do
	xs' <- run (qsort xs)
	assert (xs' == sort xs)

	-- \| Discontinue quicksort algorithm when partition gets below this size.
	-- This particular magic number was chosen to work best on a Sun 4/260.
	maxThresh :: Int
	maxThresh = 4

	qsort :: (Ord a) => [a] -> IO [a]
	qsort = go . V.fromList
	where
	go v = do
	mv <- V.unsafeThaw v
	quicksort mv compare
	V.toList <$> V.unsafeFreeze mv

	-- Order size using quicksort. This implementation incorporates
	-- four optimizations discussed in Sedgewick:

	-- 1. Non-recursive, using an explicit stack of pointer that store the
	-- next array partition to sort. To save time, this maximum amount
	-- of space required to store an array of SIZE_MAX is allocated on the
	-- stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
	-- only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
	-- Pretty cheap, actually.

	-- 2. Chose the pivot element using a median-of-three decision tree.
	-- This reduces the probability of selecting a bad pivot value and
	-- eliminates certain extraneous comparisons.

	-- 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
	-- insertion sort to order the MAX_THRESH items within each partition.
	-- This is a big win, since insertion sort is faster for small, mostly
	-- sorted array segments.

	-- 4. The larger of the two sub-partitions is always pushed onto the
	-- stack first, with the algorithm then concentrating on the
	-- smaller partition. This guarantees no more than log (total_elems)
	-- stack size is needed (actually O(1) in this case)!
	quicksort :: IOVector a -- ^ vector to sort
	-> (a -> a -> Ordering) -- ^ comparator
	-> IO ()
	quicksort v cmp
	\| totalElems == 0 =
	-- Avoid lossage with unsigned arithmetic below.
	return ()
	\| otherwise = do
	when (totalElems > maxThresh) $ do
	lo <- alloc 0
	hi <- alloc endVal
	top <- alloc [(0, 0)]
	while (stackNotEmpty top) $ do
	-- Select median value from among LO, MID, and HI. Rearrange
	-- LO and HI so the three values are sorted. This lowers the
	-- probability of picking a pathological pivot value and
	-- skips a comparison for both the LEFT_PTR and RIGHT_PTR in
	-- the while loops.
	mid <- getPair lo hi >>= \(l, h) -> alloc (l + (h - l) `div` 2)
	ifM (lt mid lo) (swap mid lo)
	ifM (lt hi mid) $ do
	swap mid hi
	ifM (lt mid lo) (swap mid lo)
	leftPtr <- allocM (succ <$> get lo)
	rightPtr <- allocM (pred <$> get hi)
	-- Here's the famous ``collapse the walls'' section of quicksort.
	-- Gotta like those tight inner loops! They are the main reason
	-- that this algorithm runs much faster than others.
	doWhile (getOp (<=) leftPtr rightPtr) $ do
	while (lt leftPtr mid) $
	inc leftPtr
	while (lt mid rightPtr) $
	dec rightPtr
	ifElseM (getOp (<) leftPtr rightPtr)
	(do
	swap leftPtr rightPtr
	ifElseM (getOp (==) mid leftPtr)
	(putM mid (get leftPtr))
	(ifM (getOp (==) mid rightPtr) $
	putM mid (get rightPtr))
	inc leftPtr
	dec rightPtr)
	(ifM (getOp (==) leftPtr rightPtr) $ do
	inc leftPtr
	dec rightPtr)
	-- Set up pointers for next iteration. First determine whether
	-- left and right partitions are below the threshold size. If so,
	-- ignore one or both. Otherwise, push the larger partition's
	-- bounds on the stack and continue sorting the smaller one.
	loVal <- get lo
	hiVal <- get hi
	leftVal <- get leftPtr
	rightVal <- get rightPtr
	case (rightVal - loVal, hiVal - leftVal) of
	(loSize, hiSize)
	\| loSize <= maxThresh ->
	if hiSize <= maxThresh
	-- Ignore both small partitions.
	then pop top lo hi
	-- Ignore small left partition.
	else put lo leftVal
	\| hiSize <= maxThresh ->
	-- Ignore small right partition.
	put hi rightVal
	\| loSize > hiSize -> do
	-- Push larger left partition indices.
	push top (loVal, rightVal)
	put lo leftVal
	\| otherwise -> do
	-- Push larger right partition indices.
	push top (leftVal, hiVal)
	put hi rightVal
	-- Once the BASE_PTR array is partially sorted by quicksort the rest
	-- is completely sorted using insertion sort, since this is efficient
	-- for partitions below MAX_THRESH size. BASE_PTR points to the beginning
	-- of the array to sort, and END_PTR points at the very last element in
	-- the array (not one beyond it!).
	tmpPtr <- alloc 0
	basePtr <- alloc 0
	-- Find smallest element in first threshold and place it at the
	-- array's beginning. This is the smallest array element,
	-- and the operation speeds up insertion sort's inner loop.
	runPtr <- alloc 1
	while ((<= min endVal maxThresh) <$> get runPtr) $ do
	ifM (lt runPtr tmpPtr) $
	putM tmpPtr (get runPtr)
	inc runPtr
	ifM (getOp (/=) tmpPtr basePtr) $
	swap tmpPtr basePtr
	-- Insertion sort, running from left-hand-side up to right-hand-side.
	put runPtr 1
	while (inc runPtr >> (<= endVal) <$> get runPtr) $ do
	putM tmpPtr (pred <$> get runPtr)
	while (lt runPtr tmpPtr) $
	dec tmpPtr
	inc tmpPtr
	ifM (getOp (/=) tmpPtr runPtr) $ do
	lo <- allocM (get runPtr)
	hi <- alloc 0
	while (putM hi (get lo) >> dec lo >> getOp (>=) lo tmpPtr) $
	swap lo hi
	where
	totalElems = MV.length v
	endVal = totalElems - 1
	-- define a DSL for C-like programming in Haskell
	swapPure = MV.swap v
	swap aVar bVar = do
	a <- get aVar
	b <- get bVar
	swapPure a b
	alloc = I.newIORef
	get = I.readIORef
	allocM = (alloc =<<)
	getPair aVar bVar = (,) <$> get aVar <*> get bVar
	getAt var = get var >>= MV.read v
	getOp op aVar bVar = do
	a <- get aVar
	b <- get bVar
	return (op a b)
	put varName x = I.writeIORef varName $! x
	putM varName a = a >>= put varName
	modify varName = I.modifyIORef' varName
	inc varName = modify varName succ
	dec varName = modify varName pred
	lt aVar bVar = do
	res <- cmp <$> getAt aVar <*> getAt bVar
	return (LT == res)
	ifM c a = c >>= \x -> when x a
	ifElseM c aThen aElse = c >>= \x -> when x aThen >> when (not x) aElse
	while c a = ifM c (a >> while c a)
	doWhile c a = a >> while c a
	-- Define a stack abstraction, similar to the macros in C but without
	-- contiguous storage for simplicity
	push stack = modify stack . (:)
	pop stack low high = do
	(a, b):rest <- get stack
	put low a
	put high b
	put stack rest
	stackNotEmpty stack =
	not . null <$> get stack