Skip to content

Instantly share code, notes, and snippets.

@GoWind
Created March 6, 2014 07:33
Show Gist options
  • Save GoWind/9384259 to your computer and use it in GitHub Desktop.
Save GoWind/9384259 to your computer and use it in GitHub Desktop.
Huffman encoding in python
# Huffman Encoding is an encoding scheme to compress the size of data .
# http://www.cs.cf.ac.uk/Dave/Multimedia/node210.html
#
# I designed a simple Huffman Prefix tree maker and a Huffman encoder to encode text strings
import sys
from collections import defaultdict
class HufNode:
def __init__(self,char,value):
self.char = char
self.value = value
self.left = None
self.right = None
def addleft(self,left):
self.left = left
def addright(self,right):
self.right = right
def stingy(self):
return "%s : %d"%(self.char,self.value)
def __getitem__(self,key):
if key == 1:
return self.value
elif key == 0:
return self.char
raise KeyError
def getValue(self):
return self.value
def texToDic(text):
d = defaultdict(int)
for x in text:
d[x] += 1
return d
def toTuple(dic):
''' returns a list of HufNodes from a given character count dict
'''
return [ HufNode(x[0],x[1]) for x in dic.iteritems() ]
def huffmanTree(HufNodes):
''' creates a HuffmanTree from a list of HufNodes
'''
HufNodes = sorted(HufNodes,key = lambda x : x[1])
while len(HufNodes) > 1:
x = HufNodes.pop(0)
y = HufNodes.pop(0)
#print "Merging %s :%d , %s : %d"%(x[0],x[1],y[0],y[1])
totalcount = x[1]+y[1]
char = 'blank'
RHuf = HufNode(char,totalcount)
RHuf.addleft(x)
RHuf.addright(y)
HufNodes.append(RHuf)
HufNodes = sorted(HufNodes,key= lambda x : x[1] )
return HufNodes[0]
def printHuffman(HNode):
'''
pre order traversal
'''
print "%s : %d"%(HNode.char,HNode.value)
if HNode.left is not None:
print "going left from %s : %d "%(HNode.char,HNode.value)
printHuffman(HNode.left)
if HNode.right is not None:
print "going right from %s : %d "%(HNode.char,HNode.value)
printHuffman(HNode.right)
def getPrefix(Root,queue,string):
'''
retrieves the prefixes from the HuffmanTree and saves them to a list(called queue but not one)
input Root node of HuffmanTree
an empty python list
a '' string if the call is the first call to getPrefix
'''
if Root.left is None and Root.right is None:
queue.append((Root.char,string))
else:
if Root.left:
s = string[:]
getPrefix(Root.left,queue,s+'0')
if Root.right:
s = string[:]
getPrefix(Root.right,queue,s+'1')
def encode(string):
newstring = ''
HuffmanTree = huffmanTree(toTuple(texToDic(string)))
prefixes = []
getPrefix(HuffmanTree,prefixes,'')
#print prefixes
prefixdict = { x[0] : x[1] for x in prefixes }
for x in string:
newstring += prefixdict[x]
return newstring
if __name__ == '__main__':
encoded = encode(sys.argv[1])
print encoded
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment