Skip to content

Instantly share code, notes, and snippets.

@asdfMaciej
Last active May 8, 2020 12:31
Show Gist options
  • Save asdfMaciej/8252c048c018ba18d70e3d5473406bac to your computer and use it in GitHub Desktop.
Save asdfMaciej/8252c048c018ba18d70e3d5473406bac to your computer and use it in GitHub Desktop.
Basic implementation of the Huffman algorithm in Python.
"""
Basic implementation of the Huffman algorithm for school.
Created according to https://riptutorial.com/algorithm/example/23995/huffman-coding
Author: Maciej Kaszkowiak (@asdfMaciej), 2020-05-08
Licensed under GNU GPL
"""
from heapq import heappop, heappush
from collections import Counter
class Node:
def __init__(self, frequency=None, letter=None, left=None, right=None):
self.left = left
self.right = right
self.frequency = frequency
self.letter = letter
def __lt__(self, other):
"""Comparison operator override, required by heapq"""
return self.frequency < other.frequency
def traverse(node, combination=''):
"""Traverses over the nodes, adding 0 to the left side and 1 to the right side"""
if (node.letter):
print(f"{node.letter}: {combination} ({node.frequency})")
else:
traverse(node.left, combination+'0')
traverse(node.right, combination+'1')
# Get the letter frequency in a (letter, count) form
unencoded = "maciejkaszkowiakklasatrzecia"
letter_frequency = Counter(unencoded).most_common()
# ALGORITHM: Create a leaf node for each symbol and add it to the priority queue.
forest = []
for letter, frequency in letter_frequency:
heappush(forest, Node(frequency, letter))
# ALGORITHM: While there is more than one node in the queue:
while len(forest) > 1:
# ALGORITHM: Remove the two nodes of highest priority from the queue.
left, right = heappop(forest), heappop(forest)
# ALGORITHM: Create a new internal node with these two nodes as children
# ALGORITHM: and with frequency equal to the sum of the two nodes' frequency.
frequency = left.frequency + right.frequency
node = Node(frequency, None, left, right)
# ALGORITHM: Add the new node to the queue.
heappush(forest, node)
# Ensure that every letter was counted
assert(len(unencoded) == node.frequency)
# ALGORITHM: The remaining node is the root node and the Huffman tree is complete.
traverse(forest[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment