Skip to content

Instantly share code, notes, and snippets.

@kirlf
Forked from jasonrdsouza/huffman.py
Last active April 24, 2024 14:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kirlf/2eb242f225f9bfed4ecbfc8e1e2f5f71 to your computer and use it in GitHub Desktop.
Save kirlf/2eb242f225f9bfed4ecbfc8e1e2f5f71 to your computer and use it in GitHub Desktop.
Simple Huffman coding implementation
# Python 3.8
import math
from queue import Queue
from collections import Counter
class HuffmanCodes:
def __init__(self):
pass
def __huffman_tree_to_table(self, root, prefix, lookup_table):
"""Converts the Huffman tree rooted at "root" to a lookup table"""
if type(root[1]) != tuple:
# leaf node
lookup_table[root[1]] = prefix
else:
self.__huffman_tree_to_table(root[1][0], prefix + "0", lookup_table)
self.__huffman_tree_to_table(root[1][1], prefix + "1", lookup_table)
return lookup_table
def huffman(self, input_text):
"""
This function generates the huffman tree for the given input.
The input is a list of "symbols".
"""
symbol_list = list(input_text)
# figure out the frequency of each symbol
counts = Counter(symbol_list).most_common()[::-1]
total = len(symbol_list)
if len(counts) < 2:
# 0 or 1 unique symbols, so no sense in performing huffman coding
return
""" There are some implementation difficulties to apply PriorityQueue class:
this try to compare all of the elements, and hint from https://docs.python.org/3/library/queue.html
that suggests to implement PrioritizedItem class makes presented below implementation more sophisticated.
So, let's think about sorted in ascending order queue as the priority queue.
I guess it is not so big assumption because this queue is used once. """
queue_v = Queue()
for (val, count) in counts:
queue_v.put((count, val))
# Create the huffman tree
largest_node_count = 0
while total != largest_node_count:
node1 = queue_v.get()
node2 = queue_v.get()
new_count = node1[0] + node2[0]
if new_count > largest_node_count:
largest_node_count = new_count
queue_v.put((new_count, (node1, node2)))
# generate the symbol to huffman code mapping
huffman_tree_root = queue_v.get()
prefix = ""
lookup_table = {}
lookup_table = self.__huffman_tree_to_table(huffman_tree_root, prefix, lookup_table)
return lookup_table
def encode(self, input_text):
""" Encodes string by Huffman """
lookup_table = self.huffman(input_text)
symbol_list = list(input_text)
encoded = "".join([lookup_table[symb] for symb in symbol_list])
return encoded, lookup_table
def decode(self, input_binary, lookup_table):
""" Decodes sequence by Huffman """
lookup_table = {v: k for k, v in lookup_table.items()}
out_str = ""
out_symb = ""
for bin_symb in input_binary:
out_symb = out_symb + bin_symb
if out_symb in lookup_table:
out_str = out_str + lookup_table[out_symb]
out_symb = ""
return out_str
# Message:
input_str = 'Hello, world!'
print("Input string: %s\n" % input_str)
# How much bits is required to encode this string uniformly
symbol_list = tuple(input_str)
counts = Counter(symbol_list).most_common()
bits_per_symb = int(math.ceil(math.log2(len(counts))))
print('%d bits are required for the encoding of each symbol.' % bits_per_symb)
print('Total number of the bits that required for the encoding of the message is %d.\n' % (bits_per_symb*len(input_str)))
# Apply Huffman coding
huff_table = HuffmanCodes().huffman(input_str)
print("Huffman table:\n%s\n" % str(huff_table))
encoded_str, lookup_table = HuffmanCodes().encode(input_str)
print("Encoded string: %s\n" % encoded_str)
print('Total number of the bits that required for the encoding of the message is %d.' % len(encoded_str))
decoded_str = HuffmanCodes().decode(encoded_str, lookup_table)
print("Decoded string: %s" % decoded_str)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment