Last active
May 11, 2016 19:31
-
-
Save aavogt/ffa9c3fcf3b338c49c284aadf7eefceb to your computer and use it in GitHub Desktop.
histograms are almost applicative
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE ScopedTypeVariables #-} | |
{-# LANGUAGE PartialTypeSignatures #-} | |
{-# LANGUAGE FlexibleInstances #-} | |
{-# LANGUAGE FlexibleContexts #-} | |
{-# LANGUAGE FunctionalDependencies #-} | |
{-# LANGUAGE TemplateHaskell #-} | |
module HistogramProb where | |
import qualified Data.Map as M | |
import Control.Lens | |
import Data.List.Split | |
-- this might be better with a better kernel function | |
-- than the constant one that leads to a histogram | |
declareClassy [d| | |
data HP a = HP { total :: Integer, | |
freqs :: (M.Map a Integer) } | |
deriving Show | |
|] | |
mkHP :: Ord a => [(a, Integer)] -> HP a | |
mkHP xs = HP (sum (map snd xs)) (M.fromListWith (+) xs) | |
mergeHP :: Ord c => (a -> b -> c) -> HP a -> HP b -> HP c | |
mergeHP f as bs = mkHP $ do | |
let toPairs xs = M.toList (xs ^. freqs) | |
(a,na) <- toPairs as | |
(b,nb) <- toPairs bs | |
return $ (f a b, na*nb) | |
-- | `mergeAdjBuckets n hp` reduces the number of buckets | |
-- in the histogram hp by n. An alternative method might | |
-- use a mean to figure out the larger bucket label. | |
mergeAdjBuckets :: forall a. Ord a => Int -> HP a -> HP a | |
mergeAdjBuckets n hp = mkHP | |
$ map mergeChunks | |
$ chunksOf n (M.toList (hp^.freqs)) | |
where | |
mergeChunks :: [(a,Integer)] -> (a,Integer) | |
mergeChunks cs | |
| Just newLabel <- median (mkHP cs) | |
= (newLabel, sum (map snd cs)) | |
| otherwise = error "Data.List.Split.chunksOf produced an [[]]" | |
median :: HP a -> Maybe a | |
median hp = go ((hp^. total) `div` 2) (M.toList (hp ^. freqs)) | |
where | |
go n ((e,m):xs @ (_:_)) | m > n = Just e | |
| otherwise = go (n-m) xs | |
go _ [(x, _)] = Just x | |
go _ _ = Nothing | |
die :: HP Int | |
die = mkHP [ (i,1) | i <- [1 .. 6] ] | |
sumDiePow :: Int -> HP Int | |
sumDiePow 1 = die | |
sumDiePow n = case divMod n 2 of | |
(m, r) -> let sm = sumDiePow m | |
sm2 = mergeHP (+) sm sm | |
in if r == 1 then mergeHP (+) die sm2 | |
else sm2 | |
{- ^ calculate the histogram for the sum of 10 dice: | |
*HistogramProb> sumDiePow 10 | |
HP 60466176 (fromList | |
[(10,1),(11,10),(12,55),(13,220),(14,715),(15,2002),(16,4995),(17,11340),(18,23760),(19,46420),(20,85228),(21,147940),(22,243925),(23,383470),(24,576565),(25,831204),(26,1151370),(27,1535040),(28,1972630),(29,2446300),(30,2930455),(31,3393610),(32,3801535),(33,4121260),(34,4325310),(35,4395456),(36,4325310),(37,4121260),(38,3801535),(39,3393610),(40,2930455),(41,2446300),(42,1972630),(43,1535040),(44,1151370),(45,831204),(46,576565),(47,383470),(48,243925),(49,147940),(50,85228),(51,46420),(52,23760),(53,11340),(54,4995),(55,2002),(56,715),(57,220),(58,55),(59,10),(60,1)]) | |
This says that for example we have a 55/60466176 chance of | |
getting a sum of 12. | |
This calculation scales better with the number of dice than the | |
monte-carlo method of generating 10 dice rolls a very large | |
number of times. But with monte-carlo sampling (or a taylor | |
model) we can write | |
> sortOfJoin :: Ord a => HP (HP a) -> HP a | |
while mergeHP is liftA2 with an Ord constraint. | |
-} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment