kputnam/Evaluation.hs

## Evaluation.hs
import Data.Ord
import Data.List
import Test.QuickCheck

-- | Quantify how "unsorted" a given list of elements is, by computing
--   at each position in the list: how many later elements are smaller?
--
--     e.g. cost "abc"   == 0
--          cost "acb"   == 1
--          cost "cba"   == 2
--          cost "baa"   == 2
--          cost "baaa"  == 3
--          cost "baaaa" == 4
--
--   The best case is a sorted list, which has cost == 0. The worst
--   case is a list sorted in reverse, which has a cost of n*(n-1)/2,
--   where n = length xs
--
cost :: Ord a => [a] -> Int
cost = sum . map (max 0 . uncurry (-)) . zip [0..] . rank
  where
    -- This doesn't rank tied elements equally (by design)
    --   e.g. rank "abcde" == [0,1,2,3,4]
    --        rank "edcba" == [4,3,2,1,0]
    --        rank "aaacb" == [0,1,2,4,3]
    rank :: Ord a => [a] -> [Int]
    rank = map fst . sortBy (comparing snd) . zip [0..]


-- | Count number of pairs (p,n) where n >= p, where p is drawn from
--   the first list and n is drawn from the second, given we know that
--   all elements in the first list (positives) *should* have higher
--   predicted values than elements in the second list (negatives). We
--   don't make any comparisons between elements within the same list.
--
--   The theoretical best performing algorithm would have zero inversions
--   while the worse would have |P|x|N| -- the entire Cartesian product.
--
inversions :: Ord a => [a] -> [a] -> Int
inversions ps ns
  = aux (sort ps) (sort ns, length ns)
  where
    aux [] _      = 0
    aux _ ([], _) = 0

    -- | Drop elements in ns until we find the first error/inversion
    aux (p:ps) (n:ns, nN)
      = case compare p n of
          -- Not inverted, drop this n
          GT -> aux (p:ps) (ns, nN-1)

          -- This n >= p, and all of remaining ns too
          _  -> nN + aux ps (n:ns, nN)

-- Tests
-------------------------------------------------------------------------------

-- | Naive implementation
inversions' ps ns = length [ (p,n) | p <- ps, n <- ns, n >= p ]

-- | Test that the naive inversions' and optimized inversions agree
pCompareNaive :: Ord a => [a] -> [a] -> Bool
pCompareNaive ps ns = inversions' ps ns == inversions ps ns

-- | These examples are P(x) values when y is known to be 1
positives :: [Double]
positives = [0.033, 0.034, 0.410, 0.698, 0.712, 0.928]

-- | These examples are P(x) values when y is known to be 0
negatives :: [Double]
negatives = [0.021, 0.039, 0.041, 0.041, 0.042, 0.187, 0.813]

-- Example:
--   inversion positives negatives == 15

main :: IO ()
main = do
  quickCheck (pCompareNaive :: [Int] -> [Int] -> Bool)
  quickCheck (pCompareNaive :: [Char] -> [Char] -> Bool)
  quickCheck (pCompareNaive :: [Float] -> [Float] -> Bool)
  quickCheck (pCompareNaive :: [Double] -> [Double] -> Bool)
	import Data.Ord
	import Data.List
	import Test.QuickCheck

	-- \| Quantify how "unsorted" a given list of elements is, by computing
	-- at each position in the list: how many later elements are smaller?
	--
	-- e.g. cost "abc" == 0
	-- cost "acb" == 1
	-- cost "cba" == 2
	-- cost "baa" == 2
	-- cost "baaa" == 3
	-- cost "baaaa" == 4
	--
	-- The best case is a sorted list, which has cost == 0. The worst
	-- case is a list sorted in reverse, which has a cost of n*(n-1)/2,
	-- where n = length xs
	--
	cost :: Ord a => [a] -> Int
	cost = sum . map (max 0 . uncurry (-)) . zip [0..] . rank
	where
	-- This doesn't rank tied elements equally (by design)
	-- e.g. rank "abcde" == [0,1,2,3,4]
	-- rank "edcba" == [4,3,2,1,0]
	-- rank "aaacb" == [0,1,2,4,3]
	rank :: Ord a => [a] -> [Int]
	rank = map fst . sortBy (comparing snd) . zip [0..]


	-- \| Count number of pairs (p,n) where n >= p, where p is drawn from
	-- the first list and n is drawn from the second, given we know that
	-- all elements in the first list (positives) should have higher
	-- predicted values than elements in the second list (negatives). We
	-- don't make any comparisons between elements within the same list.
	--
	-- The theoretical best performing algorithm would have zero inversions
	-- while the worse would have \|P\|x\|N\| -- the entire Cartesian product.
	--
	inversions :: Ord a => [a] -> [a] -> Int
	inversions ps ns
	= aux (sort ps) (sort ns, length ns)
	where
	aux [] _ = 0
	aux _ ([], _) = 0

	-- \| Drop elements in ns until we find the first error/inversion
	aux (p:ps) (n:ns, nN)
	= case compare p n of
	-- Not inverted, drop this n
	GT -> aux (p:ps) (ns, nN-1)

	-- This n >= p, and all of remaining ns too
	_ -> nN + aux ps (n:ns, nN)

	-- Tests
	-------------------------------------------------------------------------------

	-- \| Naive implementation
	inversions' ps ns = length [ (p,n) \| p <- ps, n <- ns, n >= p ]

	-- \| Test that the naive inversions' and optimized inversions agree
	pCompareNaive :: Ord a => [a] -> [a] -> Bool
	pCompareNaive ps ns = inversions' ps ns == inversions ps ns

	-- \| These examples are P(x) values when y is known to be 1
	positives :: [Double]
	positives = [0.033, 0.034, 0.410, 0.698, 0.712, 0.928]

	-- \| These examples are P(x) values when y is known to be 0
	negatives :: [Double]
	negatives = [0.021, 0.039, 0.041, 0.041, 0.042, 0.187, 0.813]

	-- Example:
	-- inversion positives negatives == 15

	main :: IO ()
	main = do
	quickCheck (pCompareNaive :: [Int] -> [Int] -> Bool)
	quickCheck (pCompareNaive :: [Char] -> [Char] -> Bool)
	quickCheck (pCompareNaive :: [Float] -> [Float] -> Bool)
	quickCheck (pCompareNaive :: [Double] -> [Double] -> Bool)