Created
October 14, 2012 19:02
-
-
Save ssboisen/3889505 to your computer and use it in GitHub Desktop.
Huffman coding in F#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let ct = createCodeTree "abcdefghijklmnopqrstuvxyz " | |
let encoded = fastEncode ct ("simon says fsharp rocks" |> List.ofSeq) | |
let decoded = decode ct encoded | |
printfn "%A" encoded | |
printfn "%A" decoded |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type CodeTree = | |
| Branch of CodeTree * CodeTree * list<char> * int | |
| Leaf of char * int | |
let chars tree = | |
match tree with | |
| Branch(_,_,chars,_) -> chars | |
| Leaf(char,_) -> [ char ] | |
let weight tree = | |
match tree with | |
| Branch(_,_,_, weight) | |
| Leaf(_, weight) -> weight | |
let createCodeTree text = | |
let rec combineTrees trees = | |
let makeCodeTree l r = | |
Branch(l,r, chars l @ chars r, weight l + weight r) | |
match trees with | |
| fst :: snd :: rest -> combineTrees (makeCodeTree fst snd :: rest |> List.sortBy weight) | |
| _ -> trees | |
let orderedLeafList = | |
text | |
|> Seq.countBy id | |
|> Seq.sortBy snd | |
|> Seq.map (fun f -> Leaf(fst f, snd f)) | |
|> List.ofSeq | |
orderedLeafList | |
|> combineTrees | |
|> List.head | |
let decode tree bits = | |
let rec doDecode _tree bits chars = | |
match _tree with | |
| Branch(l, r, _, _) -> | |
match bits |> List.head with | |
| 0 -> doDecode l bits.Tail chars | |
| 1 -> doDecode r bits.Tail chars | |
| _ -> raise (ArgumentOutOfRangeException "Invalid bit in bits") | |
| Leaf(c, _) when bits.IsEmpty -> c :: chars | |
| Leaf(c, _) -> doDecode tree bits (c :: chars) | |
doDecode tree bits [] |> List.rev | |
let encode tree text = | |
let hasCharInBranch tree c = | |
match tree with | |
| Branch(_, _, cs, _) -> cs |> List.exists (fun _c -> _c = c) | |
| Leaf(char, _) -> char = c | |
let rec doEncode _tree (chars : list<char>) bits = | |
if chars.IsEmpty then bits | |
else | |
match _tree with | |
| Branch(left, right, _, _) -> | |
if hasCharInBranch left chars.Head then | |
doEncode left chars (0 :: bits) | |
else | |
doEncode right chars (1 :: bits) | |
| Leaf(c, _) -> doEncode tree chars.Tail bits | |
doEncode tree text [] |> List.rev | |
let fastEncode tree = | |
let codeMap = chars tree | |
|> Seq.map (fun c -> (c, encode tree [c])) | |
|> Map.ofSeq | |
fun text -> List.foldBack (fun c s -> Map.find c codeMap @ s) (text |> List.ofSeq) [] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment