Last active
March 18, 2016 09:59
-
-
Save mdibaiee/c1dbf6eca4d16ca5de24 to your computer and use it in GitHub Desktop.
Huffman
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Huffman ( tree, charSequence, decode ) | |
where | |
import Data.List | |
import Data.Function (on) | |
import Node | |
import qualified Data.Map as Map | |
import Data.Maybe | |
import Debug.Trace | |
import Data.Char (chr) | |
pEOF = chr 999 | |
-- Create initial nodes (forest of trees) for each symbol | |
createNodes :: String -> [Node] | |
createNodes s = map (\ch -> createLeaf ch $ charWeight s ch) $ nub s | |
-- Coding table generation step (recursive) | |
step :: [Node] -> Node | |
step nodes = let cut = (tail . tail) sorted | |
in | |
if length nodes > 1 then | |
step (merge:cut) | |
else | |
head nodes | |
where | |
sorted = sortBy (compare `on` weight) nodes | |
merge = Node { symbol = Nothing, weight = sumWeights, left = Just first, right = Just second } | |
first = head sorted | |
second = (head . tail) sorted | |
sumWeights = weight first + weight second | |
tree :: String -> Node | |
tree = step . createNodes | |
-- Root-to-leaf search, find a character's sequence string | |
charSequence :: Node -> Char -> String | |
charSequence node ch = | |
fromJust $ helper node ch "" | |
where | |
helper (Node { symbol = s, left = l, right = r }) ch sequ | |
| (isJust s) && (fromJust s == ch) = Just sequ | |
| isNothing l && isNothing r = Nothing | |
| otherwise = let leftPath = helper (fromJust l) ch (sequ ++ "0") | |
rightPath = helper (fromJust r) ch (sequ ++ "1") | |
in if isJust leftPath then leftPath else rightPath | |
-- Root-to-leaf search, find a character based on sequence string | |
findChar :: Node -> String -> Maybe Char | |
findChar n@(Node { symbol = s, left = l, right = r}) sequ | |
| length sequ > 0 = let path = if head sequ == '0' then l else r | |
in if isJust path then | |
findChar (fromJust path) (tail sequ) | |
else | |
Nothing | |
| otherwise = if isJust s then s else Nothing | |
-- Encode an string into huffman coding | |
encode :: String -> String | |
encode input = let t = tree input | |
table = charTable t input | |
in concat $ map (\a -> fromJust $ Map.lookup a table) input | |
-- Character table, a Map representing each character's bit sequence | |
charTable :: Node -> String -> Map.Map Char String | |
charTable t input = Map.fromList $ map (\a -> (a, charSequence t a)) (nub input) | |
-- Each character's weight in string | |
charWeight :: String -> Char -> Float | |
charWeight s x = genericLength $ filter (==x) s | |
-- Decode a string, given the tree representing it | |
decode :: Node -> String -> String | |
decode t input = let (valid, next) = span (isNothing . findChar t) $ inits input | |
sequ = (head next) | |
ninput = (tails input) !! (length valid) | |
ch = fromJust $ findChar t sequ | |
in if length ninput > 0 then | |
ch:(decode t ninput) | |
else | |
[ch] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment