Created
January 5, 2016 13:33
-
-
Save fahaddaniyal/0dc86c80f266fd9f8cdb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Node(): | |
def __init__(self, name, id='0', level=None, parent = None, val= None, visited=False): | |
self.name = name | |
self.children = [] | |
self.val = val | |
self.parent= parent | |
self.level = level | |
self.id = id | |
self.dodelete = False | |
self.visited = visited | |
def add_child(self, node): | |
node.parent = self | |
node.level = self.level + 1 | |
self.children.append(node) | |
return node | |
def has_children(self): | |
return len([c for c in self.children if c.dodelete== False])> 0 | |
def sum_all_child(self): | |
if self.has_children(): | |
return sum([child.val for child in self.children]) | |
def get_by_name(self, name, level=None): | |
if self.name == name: | |
if level is not None and level== self.level: | |
return self | |
for child in self.children: | |
n = child.get_by_name(name, level) | |
if n: | |
return n | |
return None | |
def get_by_path(self, path): | |
if self.id == path: | |
return self | |
for child in self.children: | |
n = child.get_by_path(path) | |
if n: | |
return n | |
return None | |
def __iter__(self): | |
yield self | |
for child in self.children: | |
for node in child: | |
yield node | |
def convert_to_json(self, file_path=None): | |
x = str(self).replace("'", "").replace("\\", "") | |
if file_path: | |
with open(file_path, 'wb') as fid: | |
fid.write(x) | |
return x | |
def __str__(self): | |
sint = ignore_exception(TypeError, DefaultVal=0)(int) | |
if self.has_children(): | |
return '{{ "name" : "{0}", "_id": "{1}", "count": {2} , "children": {3} }}'\ | |
.format(self.name, self.id , sint(self.val), [str(child) for child in self.children if child.dodelete == False]) | |
return '{{ "name" : "{0}" , "_id" : "{1}", "count" : {2}, "children": []}}'.format(self.name, self.id, sint(self.val)) | |
def flatten_tree(tree): | |
keywords = [] | |
for n in tree: | |
if n.dodelete: | |
continue | |
if not n.has_children(): | |
row = [] | |
row += [n.name, n.val] | |
x = n | |
while (True): | |
y = x.parent | |
if y.level == 0: | |
break | |
row.insert(0,y.name) | |
x = y | |
keywords.append(row) | |
return keywords | |
def collapse_nodes(tree, thresh = 100): | |
# To get nodes at a level | |
for n in tree: | |
if n.dodelete: | |
continue | |
sub_tree = tree.get_by_path (n.id) | |
sub_tree_stack = [] | |
for child in sub_tree.children: | |
if child.val is not None and thresh > child.val: | |
sub_tree_stack.append(child) | |
tree.get_by_path(child.id).dodelete = True | |
if sub_tree_stack: | |
sub_tree.add_child(Node(",".join([subnode.name for subnode in sub_tree_stack]), | |
val = sum([subnode.val for subnode in sub_tree_stack]), | |
id= sorted([subnode.id for subnode in sub_tree_stack])[0])) | |
return tree | |
def roll_up(tree, thresh = 20, level=5): | |
for n in tree: | |
if n.dodelete or n.visited: | |
continue | |
if n.level != level: | |
continue | |
sub_tree = tree.get_by_path(n.id) | |
# sub_tree = tree.get_by_name(n.name, n.level) | |
if sub_tree is None: | |
continue | |
sub_tree_stack = [] | |
for child in tree.get_by_path(n.id).children: | |
if child.val is not None and child.val <= thresh: | |
sub_tree_stack.append((child.name, child.id, child.val)) | |
# Also mark this for deletion | |
tree.get_by_path(child.id).dodelete = True | |
if sub_tree_stack: | |
node_name = n.name + ": [" + ",".join([subnode[0] for subnode in sub_tree_stack]) + "]" | |
node_val = sum([subnode[2]for subnode in sub_tree_stack]) | |
node_id = sorted([subnode[1] for subnode in sub_tree_stack])[0] | |
parent_name = n.parent.name | |
parent_level = n.parent.level | |
parent_id= n.parent.id | |
tree.get_by_path(n.id).dodelete = True | |
tree.get_by_path(parent_id).add_child(Node(node_name, val=node_val, id = n.id, visited=True) ) | |
return tree | |
def add_node(n, string, val=None, level=None, parent_id = '0'): | |
# my_node = n.get_by_name(string, level) | |
parent_node = n.get_by_path(parent_id) | |
for child in parent_node.children: | |
if child.name == string: | |
return child | |
return n.add_child(Node(string, val = val, id= '{0}:{1}'.format(parent_id, len(parent_node.children)) )) | |
def create_tree(flat_hierarchy): | |
''' | |
flat_hierarchy is a list of the form [l0, l1 , l2, l3, v1] | |
''' | |
tree = Node('root', level=0, id = '0') | |
for idx, entry in enumerate(flat_hierarchy): | |
n = tree | |
val_level = 0 | |
for level, i in enumerate(entry[:-2]): | |
n = add_node(n, i, level =level+1, parent_id=n.id) | |
val_level = level | |
add_node(n, entry[-2], val = entry[-1], level=val_level+2, parent_id=n.id) | |
return tree |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment