Skip to content

Instantly share code, notes, and snippets.

@lazyatom
Last active October 3, 2016 12:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lazyatom/b5a830f2c445c87f06994c1cd8d61a26 to your computer and use it in GitHub Desktop.
Save lazyatom/b5a830f2c445c87f06994c1cd8d61a26 to your computer and use it in GitHub Desktop.
Huffman encoding and decoding
#!/usr/bin/env ruby
# Demonstrating Huffman encoding of strings to compress them
# This script accepts a string as an argument, and builds a
# Huffman tree to help compress them, then decodes the message using
# the same tree to verify that it worked OK.
# probabilities_from_book = {
# a: 0.25,
# b: 0.21,
# c: 0.18,
# d: 0.14,
# e: 0.09,
# f: 0.07,
# g: 0.06
# }
class Node < Struct.new(:symbol, :probability, :left, :right, :label)
def to_s
[symbol, probability, label].inspect
end
end
def huffman_tree_for_probabilities(probabilities)
subtrees = probabilities.map { |s, prob|
Node.new(s, prob)
}
until subtrees.length == 1
x, y = subtrees.pop(2)
y.label = '0'
x.label = '1'
new_subtree = Node.new(nil, x.probability + y.probability, x, y)
subtrees.push(new_subtree)
subtrees.sort_by! { |s| -s.probability }
end
subtrees.first
end
def table_for(tree, label='')
compound_label = "#{label}#{tree.label}"
if tree.symbol
{tree.symbol => compound_label}
else
table_for(tree.left, compound_label).
merge(table_for(tree.right, compound_label))
end
end
def probabilities_from_string(string)
characters = string.split('')
uniq_characters = characters.uniq
character_counts = uniq_characters.inject({}) { |a,c| a.merge(c => characters.select { |x| x == c }.count) }
probabilities = character_counts.inject({}) { |a, (char,count)| a.merge(char => count.to_f / string.length) }
end
def huffman_tree_for_text(string)
huffman_tree_for_probabilities(probabilities_from_string(string))
end
def encoded_as_bits(string)
table = table_for(huffman_tree_for_text(string))
encoded_string = string.split('').map { |c| table[c] }.join
end
def decode(binary_string, huffman_tree)
table = table_for(huffman_tree).invert
bits = binary_string.split('')
current_word = []
symbols = []
while bits.any?
current_word << bits.shift
if symbol = table[current_word.join('')]
symbols << symbol
current_word.clear
end
end
symbols.join
end
if ARGV.empty?
puts "Usage: ruby huffman.rb <string to compress>"
exit
end
input_string = ARGV.join(' ')
input_string_length_in_bits = input_string.length * 8
puts "input length in bits #{input_string_length_in_bits} (#{input_string.length} * 8)"
huffman_tree = huffman_tree_for_text(input_string)
encoded_string = encoded_as_bits(input_string)
puts
puts "Binary #{encoded_string}"
puts "Encoded string length in bits: #{encoded_string.length}"
puts "Compression: #{(encoded_string.length.to_f / (input_string_length_in_bits)) * 100}%"
puts
puts "Decoded: #{decode(encoded_string, huffman_tree)}"
$ ruby huffman.rb this is a test of compression
input length in bits 232 (29 * 8)
Binary 1111100000100011001000110100101101111101000111111001111100110111010111001110001101111010000001001110110
Encoded string length in bits: 103
Compression: 44.396551724137936%
Decoded: this is a test of compression
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment