public
Last active

First pass at Haskell port of glibc qsort.c

  • Download Gist
QSort1.hs
Haskell
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
module QSort1 ( qsort, quicksort, main, prop_qsort ) where
 
-- Ported from glibc's qsort.c
-- Copyright (C) 1991-2014 Free Software Foundation, Inc.
-- This file is part of the GNU C Library.
-- Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
 
-- The GNU C Library is free software; you can redistribute it and/or
-- modify it under the terms of the GNU Lesser General Public
-- License as published by the Free Software Foundation; either
-- version 2.1 of the License, or (at your option) any later version.
 
-- The GNU C Library is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-- Lesser General Public License for more details.
 
-- You should have received a copy of the GNU Lesser General Public
-- License along with the GNU C Library; if not, see
-- <http://www.gnu.org/licenses/>.
 
-- If you consider tuning this algorithm, you should consult first:
-- Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
-- Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993.
 
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as MV
import Data.Vector.Mutable ( IOVector )
import Control.Monad ( when )
import qualified Data.IORef as I
import Control.Applicative ( (<$>), (<*>) )
 
import Data.List ( sort )
import Test.QuickCheck ( Property, quickCheck )
import Test.QuickCheck.Monadic ( monadicIO, run, assert )
 
main :: IO ()
main = quickCheck prop_qsort
 
prop_qsort :: [Int] -> Property
prop_qsort xs = monadicIO $ do
xs' <- run (qsort xs)
assert (xs' == sort xs)
 
-- | Discontinue quicksort algorithm when partition gets below this size.
-- This particular magic number was chosen to work best on a Sun 4/260.
maxThresh :: Int
maxThresh = 4
 
qsort :: (Ord a) => [a] -> IO [a]
qsort = go . V.fromList
where
go v = do
mv <- V.unsafeThaw v
quicksort mv compare
V.toList <$> V.unsafeFreeze mv
 
-- Order size using quicksort. This implementation incorporates
-- four optimizations discussed in Sedgewick:
 
-- 1. Non-recursive, using an explicit stack of pointer that store the
-- next array partition to sort. To save time, this maximum amount
-- of space required to store an array of SIZE_MAX is allocated on the
-- stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
-- only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
-- Pretty cheap, actually.
 
-- 2. Chose the pivot element using a median-of-three decision tree.
-- This reduces the probability of selecting a bad pivot value and
-- eliminates certain extraneous comparisons.
 
-- 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
-- insertion sort to order the MAX_THRESH items within each partition.
-- This is a big win, since insertion sort is faster for small, mostly
-- sorted array segments.
 
-- 4. The larger of the two sub-partitions is always pushed onto the
-- stack first, with the algorithm then concentrating on the
-- smaller partition. This *guarantees* no more than log (total_elems)
-- stack size is needed (actually O(1) in this case)!
quicksort :: IOVector a -- ^ vector to sort
-> (a -> a -> Ordering) -- ^ comparator
-> IO ()
quicksort v cmp
| totalElems == 0 =
-- Avoid lossage with unsigned arithmetic below.
return ()
| otherwise = do
when (totalElems > maxThresh) $ do
lo <- alloc 0
hi <- alloc endVal
top <- alloc [(0, 0)]
while (stackNotEmpty top) $ do
-- Select median value from among LO, MID, and HI. Rearrange
-- LO and HI so the three values are sorted. This lowers the
-- probability of picking a pathological pivot value and
-- skips a comparison for both the LEFT_PTR and RIGHT_PTR in
-- the while loops.
mid <- getPair lo hi >>= \(l, h) -> alloc (l + (h - l) `div` 2)
ifM (lt mid lo) (swap mid lo)
ifM (lt hi mid) $ do
swap mid hi
ifM (lt mid lo) (swap mid lo)
leftPtr <- allocM (succ <$> get lo)
rightPtr <- allocM (pred <$> get hi)
-- Here's the famous ``collapse the walls'' section of quicksort.
-- Gotta like those tight inner loops! They are the main reason
-- that this algorithm runs much faster than others.
doWhile (getOp (<=) leftPtr rightPtr) $ do
while (lt leftPtr mid) $
inc leftPtr
while (lt mid rightPtr) $
dec rightPtr
ifElseM (getOp (<) leftPtr rightPtr)
(do
swap leftPtr rightPtr
ifElseM (getOp (==) mid leftPtr)
(putM mid (get leftPtr))
(ifM (getOp (==) mid rightPtr) $
putM mid (get rightPtr))
inc leftPtr
dec rightPtr)
(ifM (getOp (==) leftPtr rightPtr) $ do
inc leftPtr
dec rightPtr)
-- Set up pointers for next iteration. First determine whether
-- left and right partitions are below the threshold size. If so,
-- ignore one or both. Otherwise, push the larger partition's
-- bounds on the stack and continue sorting the smaller one.
loVal <- get lo
hiVal <- get hi
leftVal <- get leftPtr
rightVal <- get rightPtr
case (rightVal - loVal, hiVal - leftVal) of
(loSize, hiSize)
| loSize <= maxThresh ->
if hiSize <= maxThresh
-- Ignore both small partitions.
then pop top lo hi
-- Ignore small left partition.
else put lo leftVal
| hiSize <= maxThresh ->
-- Ignore small right partition.
put hi rightVal
| loSize > hiSize -> do
-- Push larger left partition indices.
push top (loVal, rightVal)
put lo leftVal
| otherwise -> do
-- Push larger right partition indices.
push top (leftVal, hiVal)
put hi rightVal
-- Once the BASE_PTR array is partially sorted by quicksort the rest
-- is completely sorted using insertion sort, since this is efficient
-- for partitions below MAX_THRESH size. BASE_PTR points to the beginning
-- of the array to sort, and END_PTR points at the very last element in
-- the array (*not* one beyond it!).
tmpPtr <- alloc 0
basePtr <- alloc 0
-- Find smallest element in first threshold and place it at the
-- array's beginning. This is the smallest array element,
-- and the operation speeds up insertion sort's inner loop.
runPtr <- alloc 1
while ((<= min endVal maxThresh) <$> get runPtr) $ do
ifM (lt runPtr tmpPtr) $
putM tmpPtr (get runPtr)
inc runPtr
ifM (getOp (/=) tmpPtr basePtr) $
swap tmpPtr basePtr
-- Insertion sort, running from left-hand-side up to right-hand-side.
put runPtr 1
while (inc runPtr >> (<= endVal) <$> get runPtr) $ do
putM tmpPtr (pred <$> get runPtr)
while (lt runPtr tmpPtr) $
dec tmpPtr
inc tmpPtr
ifM (getOp (/=) tmpPtr runPtr) $ do
lo <- allocM (get runPtr)
hi <- alloc 0
while (putM hi (get lo) >> dec lo >> getOp (>=) lo tmpPtr) $
swap lo hi
where
totalElems = MV.length v
endVal = totalElems - 1
-- define a DSL for C-like programming in Haskell
swapPure = MV.swap v
swap aVar bVar = do
a <- get aVar
b <- get bVar
swapPure a b
alloc = I.newIORef
get = I.readIORef
allocM = (alloc =<<)
getPair aVar bVar = (,) <$> get aVar <*> get bVar
getAt var = get var >>= MV.read v
getOp op aVar bVar = do
a <- get aVar
b <- get bVar
return (op a b)
put varName x = I.writeIORef varName $! x
putM varName a = a >>= put varName
modify varName = I.modifyIORef' varName
inc varName = modify varName succ
dec varName = modify varName pred
lt aVar bVar = do
res <- cmp <$> getAt aVar <*> getAt bVar
return (LT == res)
ifM c a = c >>= \x -> when x a
ifElseM c aThen aElse = c >>= \x -> when x aThen >> when (not x) aElse
while c a = ifM c (a >> while c a)
doWhile c a = a >> while c a
-- Define a stack abstraction, similar to the macros in C but without
-- contiguous storage for simplicity
push stack = modify stack . (:)
pop stack low high = do
(a, b):rest <- get stack
put low a
put high b
put stack rest
stackNotEmpty stack =
not . null <$> get stack

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.