Skip to content

Instantly share code, notes, and snippets.

@GasparCorrea
Created October 31, 2017 17:56
Show Gist options
  • Save GasparCorrea/9a0704e6b7f1df78942c5b4813720fb5 to your computer and use it in GitHub Desktop.
Save GasparCorrea/9a0704e6b7f1df78942c5b4813720fb5 to your computer and use it in GitHub Desktop.
Implementación del código Huffman en Python
#!/usr/bin/env python3
from collections import Counter
import json
import pickle
import sys
import argparse
class Tree(object):
def __init__(self,left,right,data):
self.left = left
self.right = right
self.data = data
def get_freq(lines):
freq = Counter()
total = len(lines)+1
for char in lines:
freq[char]+=1
freq["end"] = 1.0
return [(k, v/total) for k, v in freq.items()]
def append_sorted(lst,element):
for i in range(len(lst)):
if lst[i][1] >= element[1]:
lst.insert(i,element)
return lst
lst.append(element)
return lst
def create_parent(freq):
left = Tree(None,None,freq[0][0]) if isinstance(freq[0][0],str) else freq[0][0]
right = Tree(None,None,freq[1][0]) if isinstance(freq[1][0],str) else freq[1][0]
parent = Tree(left,right,None)
return (parent,freq[0][1]+freq[1][1])
def tree_to_dict(tree,acc,dic):
if tree.left.data != None: dic[tree.left.data] = acc+"0"
elif tree.left != None: dic = tree_to_dict(tree.left,acc+"0",dic)
if tree.right.data != None: dic[tree.right.data] = acc+"1"
elif tree.right != None: dic = tree_to_dict(tree.right,acc+"1",dic)
return dic
def compress(dic,content):
res = ""
for ch in content:
code = dic[ch]
res = res + code
res = '1' + res + dic['end']
res = res + (len(res) % 8 * "0")
return int(res,2)
def save(data,dic,name):
with open(name,'wb') as out: pickle.dump(data,out)
with open(name+".json",'w') as out: json.dump(dic,out)
def create_tree(freq):
freq.sort(key=lambda tup: tup[1])
while len(freq)!=1:
new = create_parent(freq)
freq.pop(0)
freq.pop(0)
append_sorted(freq,new)
return freq[0][0]
def encode(input,output):
with open(input,"r") as f:
content = " ".join(str(x) for x in f.readlines())
freq = get_freq(content)
tree = create_tree(freq)
dic = tree_to_dict(tree,"",dict())
save(compress(dic,content),dic,output)
def decode(input,output,dic):
with open(dic) as d: dic = json.load(d)
with open(input,'rb') as f: bits = str(bin(pickle.load(f)))[3:]
dic = {v:k for k,v in dic.items()}
acc = ""
content = ""
for i in bits:
acc+=i
if acc in dic:
content = content+"" if dic[acc] == "end" else content+dic[acc]
acc = ""
with open(output,"w") as f: f.write(content)
def get_args(args):
description = "huffman - Compress/Uncompress files"
arg = argparse.ArgumentParser(description=description)
arg.add_argument("-c", action="store",
help="Compress the given file.")
arg.add_argument("-d", action="store",
help="Uncompress the given file.")
arg.add_argument("-j", action="store",
help="Use the given json.")
return arg.parse_args(args)
def process_args(args):
if not len(sys.argv) > 1:
print("error: huffman needs to be given arguments to run.\n")
print(" Refer to \"huffman -h\" for more info.")
sys.exit(1)
if args.c and args.d:
print("error: Conflicting arguments -c and -d.\n")
print(" Refer to \"huffman -h\" for more info")
sys.exit(1)
if args.c:
encode(args.c,args.c)
if args.d and args.j:
decode(args.d,args.d,args.j)
args = get_args(sys.argv[1:])
process_args(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment