Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
import Data.List
import Data.Ord (comparing)
import Control.Applicative ((<|>))
import Criterion.Main
import GHC.Generics (Generic)
import Control.DeepSeq
import Data.Maybe (fromJust)
import Data.Word (Word8)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BS (c2w, w2c)
data HTree a = Node !Int !(HTree a) !(HTree a) | Leaf !a !Int deriving (Eq, Show, Generic)
instance NFData a => NFData (HTree a)
--nubBS :: BS.ByteString -> BS.ByteString
--nubBS = BS.pack . nub . BS.unpack
frequencies :: BS.ByteString -> [(Int, Word8)]
-- frequencies !xs = sortBy (comparing snd) . map (\xs -> (BS.length xs, BS.head xs)) . BS.group . BS.sort $ xs
frequencies xs = map (\c -> (BS.count c xs, c)) $ nub $ BS.unpack xs
probability :: HTree a -> Int
probability !(Node p _ _) = p
probability !(Leaf _ p) = p
initialTree :: [(Int, a)] -> HTree a
initialTree !ps = f (tail ps) ((\(p, a) -> Leaf a p) $ head ps)
where
f [] node = node
f [(p, a)] node = Node (probability node + p) (Leaf a p) node
f ((p_a, c_a) : (p_b, c_b) : ps) node =
f ps $ Node (p_a + p_b) (Node (probability node + p_a) node (Leaf c_a p_a)) (Leaf c_b p_b)
encode :: Eq a => HTree a -> a -> Maybe [Bool]
encode !tree !x =
case tree of
Node _ l r -> fmap (False :) (encode l x) <|> fmap (True :) (encode r x)
Leaf c _ | c == x -> Just []
Leaf _ _ -> Nothing
encodes :: HTree Word8 -> BS.ByteString -> Maybe [Bool]
encodes !tree = mconcat . map (encode tree) . BS.unpack
encodes' :: HTree Word8 -> BS.ByteString -> [Bool]
encodes' !tree !xs = foldr (\x ys -> fromJust (encode tree x) ++ ys) [] $ BS.unpack xs
encoding :: HTree Word8 -> [[Bool]]
encoding !tree = map (fromJust . encode tree) [0..255]
encodes'' :: [[Bool]] -> BS.ByteString -> [Bool]
encodes'' !enc = concatMap ((enc !!) . fromIntegral) . BS.unpack
main = do
!xs <- BS.readFile "pg2600.txt"
let !tree_xs = xs `deepseq` initialTree (frequencies xs)
--let e = encodes tree_xs xs
--let !e' = e `deepseq` e
--print e'
--print $ length $ encodes' tree_xs xs
let !enc = encoding tree_xs
defaultMain [ bgroup "pg2600" [ bench "1" $ nf (encodes' tree_xs) xs
, bench "2" $ nf (encodes'' enc) xs
] ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.