Skip to content

Instantly share code, notes, and snippets.

@t3rmin4t0r
Last active April 22, 2022 02:28
Show Gist options
  • Save t3rmin4t0r/6991ce21b41b2558c5362455c249204b to your computer and use it in GitHub Desktop.
Save t3rmin4t0r/6991ce21b41b2558c5362455c249204b to your computer and use it in GitHub Desktop.
Convert Hive Tez explains into images for debugging
import re, sys
NX = True
try:
import networkx as nx
except:
NX = False
sys.stderr.write("Could not import nx\npip install networkx, please\n")
plan39 = """
Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
Map 10 <- Map 12 (BROADCAST_EDGE), Map 13 (BROADCAST_EDGE), Map 15 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
Reducer 11 <- Map 10 (SIMPLE_EDGE)
Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE)
Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (BROADCAST_EDGE)
Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
"""
plan58 = """
Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
Map 13 <- Map 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE)
Map 14 <- Map 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE)
Map 18 <- Map 13 (BROADCAST_EDGE), Map 20 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE)
Map 2 <- Map 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE)
Map 20 <- Reducer 8 (BROADCAST_EDGE)
Map 22 <- Map 23 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE)
Map 23 <- Map 25 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE)
Map 27 <- Map 22 (BROADCAST_EDGE), Map 29 (BROADCAST_EDGE), Reducer 30 (BROADCAST_EDGE)
Map 29 <- Reducer 9 (BROADCAST_EDGE)
Map 6 <- Map 1 (BROADCAST_EDGE), Map 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE)
Reducer 10 <- Reducer 7 (SIMPLE_EDGE)
Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE)
Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE)
Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE)
Reducer 19 <- Map 18 (SIMPLE_EDGE)
Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE)
Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE)
Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE)
Reducer 28 <- Map 27 (SIMPLE_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE)
Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE)
Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
Reducer 9 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
"""
plan = plan58
VALID=re.compile("([A-Za-z0-9 ]*) <-(.*)")
EDGE = re.compile("([A-Za-z0-9 ]*) \(([A-Z_]*)\)")
COLOURS =['',
'red', 'green', 'blue', 'cyan',
'purple', 'magenta', 'pink',
'forestgreen', 'teal'
]
def mark_cycles(edges):
if NX:
return _mark_cycles(edges)
def _mark_cycles(edges):
import networkx as nx
g = nx.DiGraph()
for k in edges:
for e in edges[k]:
g.add_edge(e.src, e.target)
cycles = sorted(list(nx.simple_cycles(g)), key=lambda a : len(a))
for (i,c) in enumerate(cycles):
b = c[1:]+[c[0]]
bad = zip(c,b)
for (x,y) in bad:
# remember edges is target -> sources
for z in filter(lambda a : a.src == x, edges[y]):
z.cycle = i+1
return len(cycles)
class TezEdge(object):
def __init__(self, target, (src, kind)):
self.target = target
self.src = src
self.kind = kind
self.cycle = 0
def __repr__(self):
return "%s -> %s (cycle=%s)" % (self.src, self.target, self.cycle)
def parse(l):
m = VALID.match(l)
if m:
target = m.group(1)
sources = [TezEdge(target, EDGE.match(x.strip()).groups()) for x in m.group(2).split(",")]
return (target,sources)
edges = dict(filter(lambda a : a, [parse(l.strip()) for l in plan.split("\n")]))
label = {"BROADCAST_EDGE" : "broadcast", "CUSTOM_SIMPLE_EDGE" : "unsorted", "SIMPLE_EDGE" : "sorted", "CUSTOM_EDGE" : "bucketed"}
n = mark_cycles(edges)
print "digraph {"
if n:
print 'label = "%s";labelloc="t";' % ("%d Cycles" % n)
for k in edges:
v=edges[k]
for e in v:
print '"%s" -> "%s" [label="%s", color="%s"];' % (e.src,e.target,label.get(e.kind), COLOURS[e.cycle])
print "}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment