aavogt/HistogramProb.hs

## HistogramProb.hs
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
module HistogramProb where

import qualified Data.Map as M
import Control.Lens
import Data.List.Split

-- this might be better with a better kernel function
-- than the constant one that leads to a histogram
declareClassy [d|
  data HP a = HP { total :: Integer,
                   freqs :: (M.Map a Integer) }
                   deriving Show
  |]

mkHP :: Ord a => [(a, Integer)] -> HP a
mkHP xs = HP (sum (map snd xs)) (M.fromListWith (+) xs)

mergeHP :: Ord c => (a -> b -> c) -> HP a -> HP b -> HP c
mergeHP f as bs = mkHP $ do
    let toPairs xs = M.toList (xs ^. freqs)
    (a,na) <- toPairs as
    (b,nb) <- toPairs bs
    return $ (f a b, na*nb)

-- | `mergeAdjBuckets n hp` reduces the number of buckets
-- in the histogram hp by n. An alternative method might
-- use a mean to figure out the larger bucket label.
mergeAdjBuckets :: forall a. Ord a => Int -> HP a -> HP a
mergeAdjBuckets n hp = mkHP
  $ map mergeChunks
  $ chunksOf n (M.toList (hp^.freqs))
  where
    mergeChunks :: [(a,Integer)] -> (a,Integer)
    mergeChunks cs
        | Just newLabel <- median (mkHP cs)
              = (newLabel, sum (map snd cs))
        | otherwise = error "Data.List.Split.chunksOf produced an [[]]"

median :: HP a -> Maybe a
median hp = go ((hp^. total) `div` 2) (M.toList (hp ^. freqs))
  where
    go n ((e,m):xs @ (_:_)) | m > n = Just e
      | otherwise = go (n-m) xs
    go _ [(x, _)] = Just x
    go _ _ = Nothing

die :: HP Int
die = mkHP [ (i,1) | i <- [1 .. 6] ]


sumDiePow :: Int -> HP Int
sumDiePow 1 = die
sumDiePow n = case divMod n 2 of
  (m, r) -> let sm = sumDiePow m
                sm2 = mergeHP (+) sm sm
            in if r == 1 then mergeHP (+) die sm2
                         else sm2
{- ^ calculate the histogram for the sum of 10 dice:

*HistogramProb> sumDiePow 10
HP 60466176 (fromList
[(10,1),(11,10),(12,55),(13,220),(14,715),(15,2002),(16,4995),(17,11340),(18,23760),(19,46420),(20,85228),(21,147940),(22,243925),(23,383470),(24,576565),(25,831204),(26,1151370),(27,1535040),(28,1972630),(29,2446300),(30,2930455),(31,3393610),(32,3801535),(33,4121260),(34,4325310),(35,4395456),(36,4325310),(37,4121260),(38,3801535),(39,3393610),(40,2930455),(41,2446300),(42,1972630),(43,1535040),(44,1151370),(45,831204),(46,576565),(47,383470),(48,243925),(49,147940),(50,85228),(51,46420),(52,23760),(53,11340),(54,4995),(55,2002),(56,715),(57,220),(58,55),(59,10),(60,1)])

This says that for example we have a 55/60466176 chance of
getting a sum of 12.

This calculation scales better with the number of dice than the
monte-carlo method of generating 10 dice rolls a very large
number of times. But with monte-carlo sampling (or a taylor
model) we can write

> sortOfJoin :: Ord a => HP (HP a) -> HP a

while mergeHP is liftA2 with an Ord constraint.

-}
	{-# LANGUAGE ScopedTypeVariables #-}
	{-# LANGUAGE PartialTypeSignatures #-}
	{-# LANGUAGE FlexibleInstances #-}
	{-# LANGUAGE FlexibleContexts #-}
	{-# LANGUAGE FunctionalDependencies #-}
	{-# LANGUAGE TemplateHaskell #-}
	module HistogramProb where

	import qualified Data.Map as M
	import Control.Lens
	import Data.List.Split

	-- this might be better with a better kernel function
	-- than the constant one that leads to a histogram
	declareClassy [d\|
	data HP a = HP { total :: Integer,
	freqs :: (M.Map a Integer) }
	deriving Show
	\|]

	mkHP :: Ord a => [(a, Integer)] -> HP a
	mkHP xs = HP (sum (map snd xs)) (M.fromListWith (+) xs)

	mergeHP :: Ord c => (a -> b -> c) -> HP a -> HP b -> HP c
	mergeHP f as bs = mkHP $ do
	let toPairs xs = M.toList (xs ^. freqs)
	(a,na) <- toPairs as
	(b,nb) <- toPairs bs
	return $ (f a b, na*nb)

	-- \| `mergeAdjBuckets n hp` reduces the number of buckets
	-- in the histogram hp by n. An alternative method might
	-- use a mean to figure out the larger bucket label.
	mergeAdjBuckets :: forall a. Ord a => Int -> HP a -> HP a
	mergeAdjBuckets n hp = mkHP
	$ map mergeChunks
	$ chunksOf n (M.toList (hp^.freqs))
	where
	mergeChunks :: [(a,Integer)] -> (a,Integer)
	mergeChunks cs
	\| Just newLabel <- median (mkHP cs)
	= (newLabel, sum (map snd cs))
	\| otherwise = error "Data.List.Split.chunksOf produced an [[]]"

	median :: HP a -> Maybe a
	median hp = go ((hp^. total) `div` 2) (M.toList (hp ^. freqs))
	where
	go n ((e,m):xs @ (_:_)) \| m > n = Just e
	\| otherwise = go (n-m) xs
	go _ [(x, _)] = Just x
	go _ _ = Nothing

	die :: HP Int
	die = mkHP [ (i,1) \| i <- [1 .. 6] ]


	sumDiePow :: Int -> HP Int
	sumDiePow 1 = die
	sumDiePow n = case divMod n 2 of
	(m, r) -> let sm = sumDiePow m
	sm2 = mergeHP (+) sm sm
	in if r == 1 then mergeHP (+) die sm2
	else sm2
	{- ^ calculate the histogram for the sum of 10 dice:

	*HistogramProb> sumDiePow 10
	HP 60466176 (fromList
	[(10,1),(11,10),(12,55),(13,220),(14,715),(15,2002),(16,4995),(17,11340),(18,23760),(19,46420),(20,85228),(21,147940),(22,243925),(23,383470),(24,576565),(25,831204),(26,1151370),(27,1535040),(28,1972630),(29,2446300),(30,2930455),(31,3393610),(32,3801535),(33,4121260),(34,4325310),(35,4395456),(36,4325310),(37,4121260),(38,3801535),(39,3393610),(40,2930455),(41,2446300),(42,1972630),(43,1535040),(44,1151370),(45,831204),(46,576565),(47,383470),(48,243925),(49,147940),(50,85228),(51,46420),(52,23760),(53,11340),(54,4995),(55,2002),(56,715),(57,220),(58,55),(59,10),(60,1)])

	This says that for example we have a 55/60466176 chance of
	getting a sum of 12.

	This calculation scales better with the number of dice than the
	monte-carlo method of generating 10 dice rolls a very large
	number of times. But with monte-carlo sampling (or a taylor
	model) we can write

	> sortOfJoin :: Ord a => HP (HP a) -> HP a

	while mergeHP is liftA2 with an Ord constraint.

	-}