Skip to content

Instantly share code, notes, and snippets.

@PeterZhizhin
Created March 22, 2016 22:04
Show Gist options
  • Save PeterZhizhin/b55011a99097c400b5eb to your computer and use it in GitHub Desktop.
Save PeterZhizhin/b55011a99097c400b5eb to your computer and use it in GitHub Desktop.
from queue import PriorityQueue
import re
class HuffmanTree:
class HuffmanNode:
def __init__(self, node0=None, node1=None, symbol=None):
self.node0 = node0
self.node1 = node1
self.symbol = symbol
def __repr__(self):
if self.symbol is not None:
return str(self.symbol)
return "({left}, {right})".format(left=self.node0.__repr__(),
right=self.node1.__repr__())
def __str__(self):
if self.symbol is not None:
return str(self.symbol)
return "({left}, {right})".format(left=self.node0.__str__(),
right=self.node1.__str__())
# Regexp to build Huffman tree from code
tree_regexp = re.compile("\s*\((.*)\)\s*")
@staticmethod
def gen_node(node_str):
match = HuffmanTree.tree_regexp.match(node_str)
if match is None:
node_str = node_str.strip()
if node_str == '':
node_str = ' '
assert len(node_str) == 1
return HuffmanTree.HuffmanNode(symbol=node_str), set(node_str)
# Find zero-balance point in the center
node_str = match.groups()[0]
balance = 0
left = ""
right = ""
for i in range(len(node_str)):
if node_str[i] == '(':
balance += 1
elif node_str[i] == ')':
balance -= 1
elif node_str[i] == ',' and balance == 0:
left = node_str[:i]
right = node_str[i + 1:]
break
node0, node0_symb = HuffmanTree.gen_node(left)
node1, node1_symb = HuffmanTree.gen_node(right)
node0_symb.update(node1_symb)
return HuffmanTree.HuffmanNode(node0=node0, node1=node1), \
node0_symb
def get_table(self, str, node):
if node.symbol is not None:
self.table[node.symbol] = str
return
self.get_table(str + "0", node.node0)
self.get_table(str + "1", node.node1)
def __init__(self, tree):
assert isinstance(tree, str)
self.tree, self.symbols = HuffmanTree.gen_node(tree)
self.table = {}
if self.tree.node0 is None and self.tree.node1 is None:
self.table[self.tree.symbol] = '0'
else:
self.get_table("", self.tree)
def decompress(self, str):
if len(str) == 0:
return ''
assert all(c == '0' or c == '1' for c in str)
i = 0
result = ""
current_node = self.tree
if current_node.node0 is None and current_node.node1 is None:
return current_node.symbol
while i < len(str):
if current_node.symbol is not None:
result += current_node.symbol
current_node = self.tree
else:
c = str[i]
i += 1
if c == '1':
current_node = current_node.node1
else:
current_node = current_node.node0
assert current_node.symbol is not None
return result + current_node.symbol
if __name__ == "__main__":
tree = input()
data = input()
tree_class = HuffmanTree(tree)
print(tree_class.decompress(data))
import re
from queue import PriorityQueue
from sys import stdin
class HuffmanTree:
class HuffmanNode:
def __init__(self, node0=None, node1=None, symbol=None):
self.node0 = node0
self.node1 = node1
self.symbol = symbol
def __repr__(self):
if self.symbol is not None:
return str(self.symbol)
return "({left},{right})".format(left=self.node0.__repr__(),
right=self.node1.__repr__())
def __str__(self):
if self.symbol is not None:
return str(self.symbol)
return "({left},{right})".format(left=self.node0.__str__(),
right=self.node1.__str__())
def __lt__(self, other):
return False
# Regexp to build Huffman tree from code
tree_regexp = re.compile("\((.*)\)")
@staticmethod
def gen_node(node_str):
match = HuffmanTree.tree_regexp.match(node_str)
if match is None:
assert len(node_str) == 1
if node_str == 'з':
node_str = ','
elif node_str == 'й':
node_str = ')'
elif node_str == 'ё':
node_str = '('
elif node_str == 'ъ':
node_str = '\n'
return HuffmanTree.HuffmanNode(symbol=node_str), set(node_str)
# Find zero-balance point in the center
node_str = match.groups()[0]
balance = 0
left = ""
right = ""
for i in range(len(node_str)):
if node_str[i] == '(':
balance += 1
elif node_str[i] == ')':
balance -= 1
elif node_str[i] == ',' and balance == 0:
left = node_str[:i]
right = node_str[i + 1:]
break
node0, node0_symb = HuffmanTree.gen_node(left)
node1, node1_symb = HuffmanTree.gen_node(right)
node0_symb.update(node1_symb)
return HuffmanTree.HuffmanNode(node0=node0, node1=node1), \
node0_symb
@staticmethod
def create_tree(frequencies):
if len(frequencies) == 0:
return HuffmanTree.HuffmanNode(symbol='')
frequencies = [(f, a) for a, f in frequencies.items()]
p = PriorityQueue()
for val in frequencies:
p.put((val[0], HuffmanTree.HuffmanNode(symbol=val[1])))
while p.qsize() > 1:
l, r = p.get(), p.get()
l_c = l[1]
r_c = r[1]
node = HuffmanTree.HuffmanNode(node0=l_c, node1=r_c)
p.put((l[0] + r[0], node))
return p.get()[1]
def get_table(self, str, node):
if node.symbol is not None:
self.table[node.symbol] = str
return
self.get_table(str + "0", node.node0)
self.get_table(str + "1", node.node1)
@staticmethod
def build_prepared_tree(tree_str):
tree_str = tree_str.replace("\n", "ъ")
if ',,' in tree_str:
# Try to build tree changing the first comma
try:
return HuffmanTree.build_prepared_tree(tree_str.replace(",,", "з,"))
except AssertionError:
return HuffmanTree.build_prepared_tree(tree_str.replace(",,", ",з"))
if ",()," in tree_str:
if 'й' in tree_str:
return HuffmanTree.build_prepared_tree(tree_str.replace(",(),", ",ё),", 1))
elif 'ё' in tree_str:
return HuffmanTree.build_prepared_tree(tree_str.replace(",(),", ",(й,", 1))
else:
try:
return HuffmanTree.build_prepared_tree(tree_str.replace(",(),", ",(й,", 1))
except AssertionError:
return HuffmanTree.build_prepared_tree(tree_str.replace(",(),", ",ё),", 1))
else:
if 'й' not in tree_str:
tree_str = tree_str.replace("(),", "(й,")
if 'ё' not in tree_str:
tree_str = tree_str.replace(",()", ",ё)")
if 'й' not in tree_str:
tree_str = tree_str.replace(",)", ",й")
if 'ё' not in tree_str:
tree_str = tree_str.replace("(,", "ё,")
return HuffmanTree.gen_node(tree_str)
def __init__(self, tree):
assert isinstance(tree, (str, dict))
if isinstance(tree, str):
if len(tree) == 0:
self.tree = HuffmanTree.HuffmanNode(symbol='')
self.symbols = set()
else:
self.tree, self.symbols = HuffmanTree.build_prepared_tree(tree)
else:
self.symbols = set(tree.keys())
assert all(len(s) == 1 for s in self.symbols)
self.tree = HuffmanTree.create_tree(tree)
self.table = {}
if self.tree.node0 is None and self.tree.node1 is None:
self.table[self.tree.symbol] = '0'
else:
self.get_table("", self.tree)
def __str__(self):
s = ""
s += str(self.tree)
s += "\n"
s += str(self.symbols)
s += "\n"
s += str(self.table)
s += "\n"
return s
def decompress(self, str):
if len(str) == 0:
return ''
assert all(c == '0' or c == '1' for c in str)
i = 0
result = ""
current_node = self.tree
if current_node.node0 is None and current_node.node1 is None:
return current_node.symbol
while i < len(str):
if current_node.symbol is not None:
result += current_node.symbol
current_node = self.tree
else:
c = str[i]
i += 1
if c == '1':
current_node = current_node.node1
else:
current_node = current_node.node0
assert current_node.symbol is not None
return result + current_node.symbol
def compress(self, str):
assert all(c in self.table for c in str)
return "".join(self.table[c] for c in str)
if __name__ == "__main__":
for line in stdin:
tree_str = line.strip()
code = stdin.readline().strip()
tree = HuffmanTree(tree_str)
print(tree.decompress(code))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment