Skip to content

Instantly share code, notes, and snippets.

@SergeyNarozhny
Last active April 23, 2017 23:13
Show Gist options
  • Save SergeyNarozhny/76066f5b0a1a8b90d30b532632336395 to your computer and use it in GitHub Desktop.
Save SergeyNarozhny/76066f5b0a1a8b90d30b532632336395 to your computer and use it in GitHub Desktop.
import heapq
from collections import Counter, namedtuple
class Node(namedtuple("Node", ["left", "right"])):
def walk(self, code, acc):
self.left.walk(code, acc + "0")
self.right.walk(code, acc + "1")
class Leaf(namedtuple("Leaf", ["char"])):
def walk(self, code, acc):
code[self.char] = acc or "0"
def huffman_encode(s):
h = []
for ch, freq in Counter(s).items():
h.append((freq, len(h), Leaf(ch)))
heapq.heapify(h)
count = len(h)
while len(h) > 1:
freq1, _count1, left = heapq.heappop(h)
freq2, _count2, right = heapq.heappop(h)
heapq.heappush(h, (freq1 + freq2, count, Node(left, right)))
count += 1
code = {}
if h:
[(_freq, _count, root)] = h
root.walk(code, "")
return code
def main():
s = input()
code = huffman_encode(s)
encoded = "".join(code[ch], for ch in s)
print(len(code), len(encoded))
for ch in sorted(code):
print("{}: {}".format(ch, code[ch]))
print(encoded)
if __name__ == "__main__":
main()
def test(n_iter = 100):
import random
import string
for i in range(n_iter):
length = random.randint(0, 32)
s = "".join(random.choice(string.ascii_letters) for _ range(length))
code = huffman_encode(s)
encoded = "".join(code[ch] for ch in s)
assert huffman_dencode(encoded, code) = s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment