boykoc/huffman-coding-compression.rb

## huffman-coding-compression.rb
#
# This is a basic implementation of compression based on Huffman coding.
# My main source of reference to try and implement this was the Wikipedia page [1].
# Specifically the image there that explains the basic steps [2].
#
# I was initially tasked to implmenet this and was unable to at the time.
# About a week later, after doing other practice coding problems and reading some more
# docs I decided to try it again.
#
# This took a number of hours broken over 2 days. I tried to limit google searches to only
# Ruby docs and the wikipedia page to try and re-create the first time I was asked to do this.
# I did at one point come across an implementation on a github repo that used a Node class
# to create the nodes and traverse them. However for now I tried to continue with my script-like
# implementation. Possibly I can refactor and implement this as classes and in a better/easier way.
#
# Note:
# I did this in repl.it and then put it into a gist. I wish I had started a gist initially or a repo.
# The times will be off as I am attempting to show the basic steps I took when first creating and not
# the many changes I did.
#
# Links:
# 1. https://en.wikipedia.org/wiki/Huffman_coding#Compression
# 2. https://upload.wikimedia.org/wikipedia/commons/a/a0/Huffman_coding_visualisation.svg
#

string = "A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED"

x = string.split("").map { |chr| { chr => string.split("").count(chr) } }.inject(:merge)

# Sort and convert to 2D array.
x = x.sort_by { |key, value| value }

# Collect the symbols to use as the dictionary.
@dictionary = x.map { |arr| { arr[0] => "" } }.inject(:merge)

# Combine least 2 frequent letters, build tree inserting
# left/right leaf indications (bits), and re-sort until there's
# only 1 parent node.
while x.length > 1
  new_val = [
              (x[0][0] + x[1][0]),
              (x[0][1] + x[1][1]),
              [
                x[0].insert(2,"0"),
                x[1].insert(2,"1")
              ]
            ]
  x.push(new_val)
  x.delete_at(1)
  x.delete_at(0)
  x = x.sort_by { |key, value| value }
end

# Traverse the tree to build the dictionary to compress the
# text with.
#
# The left/right bits are always at index 2 and are always a string.
# Knowing this I can loop over the tree looking any array,
# checking if it's symbol includes the symbol being searched for,
# and if true add the bit to the end of the dictionary for that symbol.

# Note: This was the hardest part to figure out.
def loop(symbol, ar)
  ar.each do |value|
    if value.is_a?(Array) && value[0].include?(symbol) && value[2].is_a?(String)
      # Get the bits.
      @dictionary[symbol] += value[2]
    end
    # Recusivily dig deeper into the tree if I can.
    if value.is_a?(Array)
      loop(symbol, value)
    end
  end
end

# Start the search of the tree. Loop over each symbol
# in the dictionary until complete.
@dictionary.each do |key, value|
  loop(key, x)
end

# Print out the final dictionary with bits populated.
puts @dictionary

# Encode the string.
encoded_string = []
string.chars.each do |chr|
  encoded_string << @dictionary[chr]
end
encoded_string = encoded_string.join("")

# expected output from wikipedia page used to base entire compression from.
expected_output = "1000011101001000110010011101100111001001000111110010011111011111100010001111110100111001001011111011101000111111001"

puts "----------"
puts encoded_string
# This is not an ideal test, I know, but repl.it wouldn't let me add a real one.
if expected_output == encoded_string
  puts "Test Passed!"
else
  puts "Test Failed!"
end
	#
	# This is a basic implementation of compression based on Huffman coding.
	# My main source of reference to try and implement this was the Wikipedia page [1].
	# Specifically the image there that explains the basic steps [2].
	#
	# I was initially tasked to implmenet this and was unable to at the time.
	# About a week later, after doing other practice coding problems and reading some more
	# docs I decided to try it again.
	#
	# This took a number of hours broken over 2 days. I tried to limit google searches to only
	# Ruby docs and the wikipedia page to try and re-create the first time I was asked to do this.
	# I did at one point come across an implementation on a github repo that used a Node class
	# to create the nodes and traverse them. However for now I tried to continue with my script-like
	# implementation. Possibly I can refactor and implement this as classes and in a better/easier way.
	#
	# Note:
	# I did this in repl.it and then put it into a gist. I wish I had started a gist initially or a repo.
	# The times will be off as I am attempting to show the basic steps I took when first creating and not
	# the many changes I did.
	#
	# Links:
	# 1. https://en.wikipedia.org/wiki/Huffman_coding#Compression
	# 2. https://upload.wikimedia.org/wikipedia/commons/a/a0/Huffman_coding_visualisation.svg
	#

	string = "A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED"

	x = string.split("").map { \|chr\| { chr => string.split("").count(chr) } }.inject(:merge)

	# Sort and convert to 2D array.
	x = x.sort_by { \|key, value\| value }

	# Collect the symbols to use as the dictionary.
	@dictionary = x.map { \|arr\| { arr[0] => "" } }.inject(:merge)

	# Combine least 2 frequent letters, build tree inserting
	# left/right leaf indications (bits), and re-sort until there's
	# only 1 parent node.
	while x.length > 1
	new_val = [
	(x[0][0] + x[1][0]),
	(x[0][1] + x[1][1]),
	[
	x[0].insert(2,"0"),
	x[1].insert(2,"1")
	]
	]
	x.push(new_val)
	x.delete_at(1)
	x.delete_at(0)
	x = x.sort_by { \|key, value\| value }
	end

	# Traverse the tree to build the dictionary to compress the
	# text with.
	#
	# The left/right bits are always at index 2 and are always a string.
	# Knowing this I can loop over the tree looking any array,
	# checking if it's symbol includes the symbol being searched for,
	# and if true add the bit to the end of the dictionary for that symbol.

	# Note: This was the hardest part to figure out.
	def loop(symbol, ar)
	ar.each do \|value\|
	if value.is_a?(Array) && value[0].include?(symbol) && value[2].is_a?(String)
	# Get the bits.
	@dictionary[symbol] += value[2]
	end
	# Recusivily dig deeper into the tree if I can.
	if value.is_a?(Array)
	loop(symbol, value)
	end
	end
	end

	# Start the search of the tree. Loop over each symbol
	# in the dictionary until complete.
	@dictionary.each do \|key, value\|
	loop(key, x)
	end

	# Print out the final dictionary with bits populated.
	puts @dictionary

	# Encode the string.
	encoded_string = []
	string.chars.each do \|chr\|
	encoded_string << @dictionary[chr]
	end
	encoded_string = encoded_string.join("")

	# expected output from wikipedia page used to base entire compression from.
	expected_output = "1000011101001000110010011101100111001001000111110010011111011111100010001111110100111001001011111011101000111111001"

	puts "----------"
	puts encoded_string
	# This is not an ideal test, I know, but repl.it wouldn't let me add a real one.
	if expected_output == encoded_string
	puts "Test Passed!"
	else
	puts "Test Failed!"
	end