Skip to content

Instantly share code, notes, and snippets.

@pabloferz
Created December 4, 2019 20:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pabloferz/676ce7a8de79edd736e9d1e9c0535b28 to your computer and use it in GitHub Desktop.
Save pabloferz/676ce7a8de79edd736e9d1e9c0535b28 to your computer and use it in GitHub Desktop.
HuffmanCodes
# This example work for strings but can be easily adapted to other streams of data
function compress(str::String, hdict)
nbits = sum(c -> hdict[c][2], str)
bv = falses(nbits)
n = 0
for c in str
code, i = hdict[c] # code and number of bits
d = div(n, 64) + 1 # current storage chunk
r = 64 * d - n # number of bits available in the current chunk
u = (i < r) ? (code << (r - i)) : (code >> (i - r))
bv.chunks[d] |= u
# If there are not enough bits left, use the next chunk
if i > r
u = code << (64 - i + r)
bv.chunks[d + 1] |= u
end
n += i
end
return bv.chunks
end
function save(name, data)
file = open("name" * ".bin", "w")
for entry in data
write(file, entry)
end
close(file)
end
########################################################
## Example string and corresponding Huffman Coding. ##
########################################################
str = "this is an example for huffman encoding"
hdict = Dict(' ' => (UInt(0), 3, "000" ),
'a' => (UInt(8), 4, "1000" ),
'c' => (UInt(13), 5, "01101" ),
'd' => (UInt(12), 5, "01100" ),
'e' => (UInt(5), 4, "0101" ),
'f' => (UInt(2), 4, "0010" ),
'g' => (UInt(16), 6, "010000"),
'h' => (UInt(13), 4, "1101" ),
'i' => (UInt(3), 4, "0011" ),
'l' => (UInt(17), 6, "010001"),
'm' => (UInt(15), 4, "1111" ),
'n' => (UInt(5), 3, "101" ),
'o' => (UInt(14), 4, "1110" ),
'p' => (UInt(19), 5, "10011" ),
'r' => (UInt(18), 5, "10010" ),
's' => (UInt(12), 4, "1100" ),
't' => (UInt(15), 5, "01111" ),
'u' => (UInt(14), 5, "01110" ),
'x' => (UInt(9), 5, "01001" ),
)
nbits = sum(c -> hdict[c][2], str)
data = compress(str, hdict)
encoding = join(Iterators.take(join(bitstring(entry) for entry in data), nbits))
reference = join(hdict[c][3] for c in str)
encoding == reference # true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment