Skip to content

Instantly share code, notes, and snippets.

@akamhy
Created January 20, 2023 09:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akamhy/4ba8fe28e7cfae9c21c6031642705ab9 to your computer and use it in GitHub Desktop.
Save akamhy/4ba8fe28e7cfae9c21c6031642705ab9 to your computer and use it in GitHub Desktop.
Compress The Data - Huffman Coding
import heapq
from collections import defaultdict
def huffman_encoding(data_list):
"""
Huffman encoding algorithm.
"""
freq_table = defaultdict(int)
for data in data_list:
for char in data:
freq_table[char] += 1
priority_queue = [[weight, [char, ""]] for char, weight in freq_table.items()]
heapq.heapify(priority_queue)
while len(priority_queue) > 1:
lo = heapq.heappop(priority_queue)
hi = heapq.heappop(priority_queue)
for pair in lo[1:]:
pair[1] = "0" + pair[1]
for pair in hi[1:]:
pair[1] = "1" + pair[1]
heapq.heappush(priority_queue, [lo[0] + hi[0]] + lo[1:] + hi[1:])
encoding_table = dict(priority_queue[0][1:])
encoded_data_list = []
for data in data_list:
encoded_data = ""
for char in data:
encoded_data += encoding_table[char]
encoded_data_list.append(encoded_data)
return encoded_data_list, encoding_table
def huffman_decoding(data_list, encoding_table):
"""
Huffman decoding algorithm.
"""
decoding_table = {
code: char for char, code in encoding_table.items()
} # Reverse the encoding table
decoded_data_list = []
for data in data_list:
decoded_data = ""
current_code = ""
for bit in data:
current_code += bit
if current_code in decoding_table:
decoded_data += decoding_table[current_code]
current_code = ""
decoded_data_list.append(decoded_data)
return decoded_data_list, decoding_table
if __name__ == "__main__":
data_list = ["abc", "aa", "b", "aaa", "baz", "zzx"]
encoded_data_list, encoding_table = huffman_encoding(data_list)
decoded_data_list, decoding_table = huffman_decoding(
encoded_data_list, encoding_table
)
original_size = sum(
[len(data) * 7 for data in data_list]
) # 7 bits per character (ASCII)
compressed_size = sum([len(encoded_data) for encoded_data in encoded_data_list])
compression_percentage = (1 - (compressed_size / original_size)) * 100
print("Original size:", original_size)
print("Compressed size:", compressed_size)
print("Compression percentage: {:.2f}%".format(compression_percentage))
print("Original data:", data_list)
print("Encoded data:", encoded_data_list)
print("Encoding table:", encoding_table)
print("Decoded data:", decoded_data_list)
print("Decoding table:", decoding_table)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment