Created
October 31, 2017 17:56
-
-
Save GasparCorrea/9a0704e6b7f1df78942c5b4813720fb5 to your computer and use it in GitHub Desktop.
Implementación del código Huffman en Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from collections import Counter | |
import json | |
import pickle | |
import sys | |
import argparse | |
class Tree(object): | |
def __init__(self,left,right,data): | |
self.left = left | |
self.right = right | |
self.data = data | |
def get_freq(lines): | |
freq = Counter() | |
total = len(lines)+1 | |
for char in lines: | |
freq[char]+=1 | |
freq["end"] = 1.0 | |
return [(k, v/total) for k, v in freq.items()] | |
def append_sorted(lst,element): | |
for i in range(len(lst)): | |
if lst[i][1] >= element[1]: | |
lst.insert(i,element) | |
return lst | |
lst.append(element) | |
return lst | |
def create_parent(freq): | |
left = Tree(None,None,freq[0][0]) if isinstance(freq[0][0],str) else freq[0][0] | |
right = Tree(None,None,freq[1][0]) if isinstance(freq[1][0],str) else freq[1][0] | |
parent = Tree(left,right,None) | |
return (parent,freq[0][1]+freq[1][1]) | |
def tree_to_dict(tree,acc,dic): | |
if tree.left.data != None: dic[tree.left.data] = acc+"0" | |
elif tree.left != None: dic = tree_to_dict(tree.left,acc+"0",dic) | |
if tree.right.data != None: dic[tree.right.data] = acc+"1" | |
elif tree.right != None: dic = tree_to_dict(tree.right,acc+"1",dic) | |
return dic | |
def compress(dic,content): | |
res = "" | |
for ch in content: | |
code = dic[ch] | |
res = res + code | |
res = '1' + res + dic['end'] | |
res = res + (len(res) % 8 * "0") | |
return int(res,2) | |
def save(data,dic,name): | |
with open(name,'wb') as out: pickle.dump(data,out) | |
with open(name+".json",'w') as out: json.dump(dic,out) | |
def create_tree(freq): | |
freq.sort(key=lambda tup: tup[1]) | |
while len(freq)!=1: | |
new = create_parent(freq) | |
freq.pop(0) | |
freq.pop(0) | |
append_sorted(freq,new) | |
return freq[0][0] | |
def encode(input,output): | |
with open(input,"r") as f: | |
content = " ".join(str(x) for x in f.readlines()) | |
freq = get_freq(content) | |
tree = create_tree(freq) | |
dic = tree_to_dict(tree,"",dict()) | |
save(compress(dic,content),dic,output) | |
def decode(input,output,dic): | |
with open(dic) as d: dic = json.load(d) | |
with open(input,'rb') as f: bits = str(bin(pickle.load(f)))[3:] | |
dic = {v:k for k,v in dic.items()} | |
acc = "" | |
content = "" | |
for i in bits: | |
acc+=i | |
if acc in dic: | |
content = content+"" if dic[acc] == "end" else content+dic[acc] | |
acc = "" | |
with open(output,"w") as f: f.write(content) | |
def get_args(args): | |
description = "huffman - Compress/Uncompress files" | |
arg = argparse.ArgumentParser(description=description) | |
arg.add_argument("-c", action="store", | |
help="Compress the given file.") | |
arg.add_argument("-d", action="store", | |
help="Uncompress the given file.") | |
arg.add_argument("-j", action="store", | |
help="Use the given json.") | |
return arg.parse_args(args) | |
def process_args(args): | |
if not len(sys.argv) > 1: | |
print("error: huffman needs to be given arguments to run.\n") | |
print(" Refer to \"huffman -h\" for more info.") | |
sys.exit(1) | |
if args.c and args.d: | |
print("error: Conflicting arguments -c and -d.\n") | |
print(" Refer to \"huffman -h\" for more info") | |
sys.exit(1) | |
if args.c: | |
encode(args.c,args.c) | |
if args.d and args.j: | |
decode(args.d,args.d,args.j) | |
args = get_args(sys.argv[1:]) | |
process_args(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment