-
-
Save kirlf/2eb242f225f9bfed4ecbfc8e1e2f5f71 to your computer and use it in GitHub Desktop.
Simple Huffman coding implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python 3.8 | |
import math | |
from queue import Queue | |
from collections import Counter | |
class HuffmanCodes: | |
def __init__(self): | |
pass | |
def __huffman_tree_to_table(self, root, prefix, lookup_table): | |
"""Converts the Huffman tree rooted at "root" to a lookup table""" | |
if type(root[1]) != tuple: | |
# leaf node | |
lookup_table[root[1]] = prefix | |
else: | |
self.__huffman_tree_to_table(root[1][0], prefix + "0", lookup_table) | |
self.__huffman_tree_to_table(root[1][1], prefix + "1", lookup_table) | |
return lookup_table | |
def huffman(self, input_text): | |
""" | |
This function generates the huffman tree for the given input. | |
The input is a list of "symbols". | |
""" | |
symbol_list = list(input_text) | |
# figure out the frequency of each symbol | |
counts = Counter(symbol_list).most_common()[::-1] | |
total = len(symbol_list) | |
if len(counts) < 2: | |
# 0 or 1 unique symbols, so no sense in performing huffman coding | |
return | |
""" There are some implementation difficulties to apply PriorityQueue class: | |
this try to compare all of the elements, and hint from https://docs.python.org/3/library/queue.html | |
that suggests to implement PrioritizedItem class makes presented below implementation more sophisticated. | |
So, let's think about sorted in ascending order queue as the priority queue. | |
I guess it is not so big assumption because this queue is used once. """ | |
queue_v = Queue() | |
for (val, count) in counts: | |
queue_v.put((count, val)) | |
# Create the huffman tree | |
largest_node_count = 0 | |
while total != largest_node_count: | |
node1 = queue_v.get() | |
node2 = queue_v.get() | |
new_count = node1[0] + node2[0] | |
if new_count > largest_node_count: | |
largest_node_count = new_count | |
queue_v.put((new_count, (node1, node2))) | |
# generate the symbol to huffman code mapping | |
huffman_tree_root = queue_v.get() | |
prefix = "" | |
lookup_table = {} | |
lookup_table = self.__huffman_tree_to_table(huffman_tree_root, prefix, lookup_table) | |
return lookup_table | |
def encode(self, input_text): | |
""" Encodes string by Huffman """ | |
lookup_table = self.huffman(input_text) | |
symbol_list = list(input_text) | |
encoded = "".join([lookup_table[symb] for symb in symbol_list]) | |
return encoded, lookup_table | |
def decode(self, input_binary, lookup_table): | |
""" Decodes sequence by Huffman """ | |
lookup_table = {v: k for k, v in lookup_table.items()} | |
out_str = "" | |
out_symb = "" | |
for bin_symb in input_binary: | |
out_symb = out_symb + bin_symb | |
if out_symb in lookup_table: | |
out_str = out_str + lookup_table[out_symb] | |
out_symb = "" | |
return out_str | |
# Message: | |
input_str = 'Hello, world!' | |
print("Input string: %s\n" % input_str) | |
# How much bits is required to encode this string uniformly | |
symbol_list = tuple(input_str) | |
counts = Counter(symbol_list).most_common() | |
bits_per_symb = int(math.ceil(math.log2(len(counts)))) | |
print('%d bits are required for the encoding of each symbol.' % bits_per_symb) | |
print('Total number of the bits that required for the encoding of the message is %d.\n' % (bits_per_symb*len(input_str))) | |
# Apply Huffman coding | |
huff_table = HuffmanCodes().huffman(input_str) | |
print("Huffman table:\n%s\n" % str(huff_table)) | |
encoded_str, lookup_table = HuffmanCodes().encode(input_str) | |
print("Encoded string: %s\n" % encoded_str) | |
print('Total number of the bits that required for the encoding of the message is %d.' % len(encoded_str)) | |
decoded_str = HuffmanCodes().decode(encoded_str, lookup_table) | |
print("Decoded string: %s" % decoded_str) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment