Skip to content

Instantly share code, notes, and snippets.

@ArmedGuy
Created January 11, 2015 20:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ArmedGuy/867137f4d9c6f21ca5d5 to your computer and use it in GitHub Desktop.
Save ArmedGuy/867137f4d9c6f21ca5d5 to your computer and use it in GitHub Desktop.
Implementation of huffman encoding with no regards to performance. Only idiots would use this
class BinaryTree:
def __init__(self, rootNode):
self.root = rootNode
def __getNodes(self):
return [self.root] + self.root.nodes
nodes = property(__getNodes)
class BinaryNode:
def __init__(self, name, value):
self.n1 = None
self.n2 = None
self.parent = None
self.name = name
self.value = value
def __str__(self):
return str(self.value)
def __getNodes(self):
l = []
if self.n1 != None:
l.append(self.n1)
l = l + self.n1.nodes
if self.n2 != None:
l.append(self.n2)
l = l + self.n2.nodes
return l
nodes = property(__getNodes)
def smallestNodes(nodes):
n1 = -1
n2 = -1
for i in xrange(len(nodes)):
if n1 == -1 or nodes[i].value <= nodes[n1].value:
n2 = n1
n1 = i
elif n2 == -1 or nodes[i].value < nodes[n2].value:
n2 = i
return (n1, n2)
def huffmanWeights(inp):
l = list(inp)
return [(k, l.count(k)) for k in set(l)]
def huffmanTree(values):
nodes = []
for v in values:
nodes.append(BinaryNode(v[0], v[1]))
while True:
sm = smallestNodes(nodes)
if sm[0] == -1 or sm[1] == -1:
return BinaryTree(nodes[0])
newNode = BinaryNode(nodes[sm[0]].name + nodes[sm[1]].name, nodes[sm[0]].value + nodes[sm[1]].value)
newNode.n1 = nodes[sm[0]]
newNode.n2 = nodes[sm[1]]
nodes[sm[0]].parent = newNode
nodes[sm[1]].parent = newNode
if sm[0] > sm[1]:
del nodes[sm[0]]
del nodes[sm[1]]
else:
del nodes[sm[1]]
del nodes[sm[0]]
nodes.append(newNode)
def huffmanPath(n):
path = []
c = n
while c.parent != None:
path.append(1 if c.parent.n1 == c else 0)
c = c.parent
return path[::-1]
def huffmanEncode(tree, message):
enc = []
for i in xrange(len(message)):
n = [n for n in tree.nodes if n.name == message[i]][0]
enc = enc + huffmanPath(n)
return enc
def huffmanDecode(tree, encoded):
message = []
c = tree.root
for e in encoded:
c = c.n1 if e == 1 else c.n2
if c.n1 == None or c.n2 == None:
message.append(c.name)
c = tree.root
return ''.join(message)
if __name__ == '__main__':
import math
message = "Bacon ipsum dolor amet pork kevin short ribs sirloin, consequat incididunt nisi strip steak spare ribs. Fugiat beef ribs capicola meatball consectetur t-bone shankle et, alcatra commodo venison bresaola ribeye. Elit in spare ribs ea aliqua chicken sed ham quis velit ad cillum. Eiusmod ut t-bone meatloaf. Tail consectetur voluptate pariatur shankle dolore eu non pork in pork loin tri-tip venison drumstick. T-bone non short loin, incididunt pig shank meatloaf ut turducken velit."
vals = huffmanWeights(message) # calculate weights
tree = huffmanTree(vals) # build tree
enc = huffmanEncode(tree, message) # encode
print "----------------------------------"
print "Encoded: "
encStr = []
enc = enc + [0] * (8 - len(enc) % 8)
for b in range(len(enc) / 8):
byte = enc[b*8:(b+1)*8]
encStr.append(chr(int(''.join([str(bit) for bit in byte]), 2)))
print ''.join(encStr)
print "----------------------------------"
print math.ceil(len(enc) / 8.0), "bytes vs", len(message), "bytes"
print "ratio", ((len(message) * 8.0) / len(enc))
print "----------------------------------"
print "Decoded: "
print huffmanDecode(tree, enc)
print "----------------------------------"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment