Skip to content

Instantly share code, notes, and snippets.

@m-mizutani
Created November 14, 2015 08:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save m-mizutani/19ac6f08a3f9a7684ef1 to your computer and use it in GitHub Desktop.
Save m-mizutani/19ac6f08a3f9a7684ef1 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import msgpack
domain_map = {}
class DomainMap:
def __init__(self, parent, name, fqdn):
self._parent = parent
self._name = name
self._map = {}
self._l_size = 0
self._r_size = 0
self._c_size = (0, 0)
self._fqdn = fqdn
self._pruned = False
def add(self, dn, fqdn, l_size, r_size):
if len(dn) == 0:
self._l_size += l_size
self._r_size += r_size
return self
else:
dmap = self._map.setdefault(dn[0], DomainMap(self, dn[0], fqdn))
dmap.add(dn[1:], fqdn, l_size, r_size)
return dmap
def to_s(self, depth=0):
dname = self._name if len(self._map) > 0 else self._fqdn
if depth < 3:
l_size = self._l_size
r_size = self._r_size
else:
l_size = self.l_sum()
r_size = self.r_sum()
print (' ' * depth), '({0})'.format(depth), dname, l_size, r_size, self._c_size
if depth < 3:
for dname, dmap in self._map.iteritems(): dmap.to_s(depth + 1)
def to_dict(self, arr=[], dn=[]):
cdn = dn[:]
if len(self._name) > 0: cdn.insert(0, self._name)
obj = {'name': '.'.join(cdn)}
if self.has_children():
obj['children'] = []
map(lambda x: x.to_dict(obj['children'], cdn), self._map.itervalues())
else:
obj['size'] = self._c_size[0]
obj['l_size'] = self._c_size[0]
obj['r_size'] = self._c_size[1]
obj['l_byte'] = to_byte(self._c_size[0])
obj['r_byte'] = to_byte(self._c_size[1])
arr.append(obj)
return arr
def l_sum(self):
s = map(lambda x: x[1].l_sum(), self._map.iteritems())
return self._l_size + sum(s)
def r_sum(self):
s = map(lambda x: x[1].r_sum(), self._map.iteritems())
return self._r_size + sum(s)
def count(self):
s = map(lambda x: x[1].count(), self._map.iteritems())
return 1 + sum(s)
def combine(self):
r = (self.l_sum(), self.r_sum())
map(lambda x: x.prune(), self._map.itervalues())
self._map.clear()
return r
def prune(self):
self._pruned = True
map(lambda x: x.prune(), self._map.itervalues())
def freeze(self):
if len(self._map) == 0:
self._c_size = (self._l_size, self._r_size)
return self._c_size
else:
r = map(lambda x: x.freeze(), self._map.itervalues())
s = reduce(lambda x, y: (x[0] + y[0], x[1] + y[1]), r)
self._c_size = s
return (s[0] + self._l_size, s[1] + self._r_size)
def values(self):
if len(self._map) > 0:
r = reduce(lambda x,y: x + y,
map(lambda x: x.values(), self._map.itervalues()))
else:
r = []
return [self] + r
def c_size(self): return self._c_size
def pruned(self): return self._pruned
def has_children(self): return (len(self._map) > 0)
def merge(self):
self.prune()
del self._parent._map[self._name]
return self._parent.add(['*'], '*', self._c_size[0], self._c_size[1])
import json
import pprint
pp = pprint.PrettyPrinter()
def to_byte(n):
u = 'Byte'
n = float(n)
if n > 2000: n, u = n/1000, 'KB'
if n > 2000: n, u = n/1000, 'MB'
if n > 2000: n, u = n/1000, 'GB'
return '{0:.2f} {1}'.format(n, u)
def main(fname):
dmap = DomainMap(None, '', '')
for msg in msgpack.Unpacker(open(fname, 'r')):
if msg[0] == 'flow.log':
obj = msg[2]
dname = obj.get('r_name')
if dname is not None:
dn_list = dname.split('.')[:-1]
dn_list.reverse()
dmap.add(dn_list, dname, obj.get('l_size', 0), obj.get('r_size', 0))
dmap.freeze()
darr = map(lambda x: (sum(list(x.c_size())), x), dmap.values())
darr.sort()
# dmap.to_s()
while len(darr) > 200:
tgt = darr[0][1]
if tgt.has_children(): tgt.combine()
else:
d = tgt.merge()
# darr.append((d.c_size()[0], d))
darr = filter(lambda x: not x[1].pruned(), darr)
dmap.freeze()
# pp.pprint(dmap.to_dict())
json.dump(dmap.to_dict()[0], sys.stdout)
# for d in darr: print d
if __name__ == '__main__':
for fname in sys.argv[1:]:
main(fname)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment