Dot is a graph description language in extensions .dot
or .gv
, and can be read by programs such as Graphviz (try it online). However, it's syntax is too complicated for the goal of generating a simple control-flow graph, and is not very well human readable. Therefore, I defined a new language, prettydot, for this purpose.
// Prettydot graph format, for better human readability.
// This can be highlighted via Graphviz(dot) syntax highlighting.
// the 'label' keyword must be on the beginning of its line
label = "CFG for 'main()'"
// each node starts with '<name>:'
// the indentation on the next line will be used to detect further blocks
// create double links using {T<>|F<>} format
Node0:
%3 = alloca i32, align 4
%4 = alloca i32, align 4
%5 = alloca i8**, align 8
%6 = alloca i32, align 4
%7 = alloca i32, align 4
store i32 0, i32* %3, align 4
store i32 %0, i32* %4, align 4
store i8** %1, i8*** %5, align 8
call void @A()
store i32 -1, i32* %6, align 4
%8 = load i32, i32* %4, align 4
%9 = icmp sle i32 %8, 1
br i1 %9, label Node1, label Node2
{T<Node1>|F<Node2>}
// create single links via 'label <Name>' format
// alternatively, place '{<name>}' at the end to prevent <name> from showing
Node1:
%11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16\l... x i8]* @.str, i32 0, i32 0))
store i32 -1, i32* %3, align 4
br label Node3
Node2:
call void @B()
%14 = load i32, i32* %7, align 4
br i1 %14, label Node1, label Node3
{T<Node1>|F<Node3>}
Node3:
%15 = load i32, i32* %3, align 4
ret i32 %15
As you can see it's more elegant and consice. The number of characters also reduced 45% in contrast to .dot
format.
In order to compile our language to .dot
format, we use a python script:
#! /usr/bin/python3
"""
Translate a 'prettydot' graph to a dot graph. Written by davidhcefx, 2020.7.8.
A prettydot graph is a more human-readable format for control-flow graphs.
See example.prettydot for its basic syntax.
"""
from sys import argv
from typing import Optional, List, Tuple, Dict
indent = ''
class Node:
def __init__(self, code: List[str], T_link: Optional[str], F_link: Optional[str]):
self.code = code
self.T_link = T_link # single links will store here
self.F_link = F_link
self.links_count = 2 - [T_link, F_link].count(None)
# check if line is in format: '^[^[:space:]]+:$'
def is_block_header(line: str) -> bool:
return line.endswith(':') and not line[0].isspace()
# find begin and end line numbers of the next node block. Return -1 if not found.
def find_next_node(lines: List[str], start: int) -> Tuple[int, int]:
begin = -1
end = len(lines) - 1
for i in range(start, len(lines)):
if is_block_header(lines[i]):
begin = i
break
if begin >= 0:
for i in range(begin + 1, len(lines)):
if not lines[i].startswith(indent): # indent ended
end = i - 1
break
return (begin, end)
def assert_no_empty_links(links: List[str], line: str):
if links.count('') != 0:
raise SystemExit(f"Invalid link name in: '{line}'")
def encode_special_char(line: str) -> str:
return line.translate({
ord('"'): '\\"',
ord('<'): '\\<',
ord('>'): '\\>',
ord('{'): '\\{',
ord('}'): '\\}',
ord('\\'): '\\\\'
})
# find the line that starts with 'label =', and extract the label name.
def parse_label(lines: List[str]) -> str:
for ln in lines:
if ln.startswith('label'):
start = ln.find('"') + 1
end = ln.rfind('"')
if not 0 < start < end:
raise SystemExit('Invalid label string.')
return ln[start:end]
raise SystemExit("Cannot find a line that starts with 'label'.")
# parse indent from line
def parse_indent(line: str, atline: int) -> str:
line_strp = line.strip()
if line_strp == '':
raise SystemExit(f'#{atline}: Expecting at least one line of code.')
else:
return line[0:line.index(line_strp[0])]
# return (T_link, F_link, do_pop), where the links may be None
def parse_transition_links(line: str) -> Tuple[Optional[str], Optional[str], bool]:
line = line.strip()
if line.startswith('{T<') and line.endswith('>}') and '>|F<' in line:
# double links: {T<...>|F<...>}
T_link, F_link = line.split('>|F<')
T_link = T_link[3:]
F_link = F_link[:-2]
assert_no_empty_links([T_link, F_link], line)
return (encode_special_char(T_link), encode_special_char(F_link), True)
elif line.startswith('{<') and line.endswith('>}'):
# single link: {<name>}
T_link = line[2:-2]
assert_no_empty_links([T_link], line)
return (encode_special_char(T_link), None, True)
else:
idx = line.find('label')
if idx != -1:
# single link in format: label <name>
name = line[idx + 5:].strip().split()[0]
assert_no_empty_links([name], line)
return (encode_special_char(name), None, False)
else:
# no link
return (None, None, False)
# extract each node, and build up their connections.
def parse_nodes(lines: List[str]) -> Dict[str, Node]:
graph = dict()
# determine indentation
global indent
for i, _ in enumerate(lines):
if is_block_header(lines[i]):
indent = parse_indent(lines[i + 1], atline=i + 1)
break
if indent == '':
raise SystemExit('No indentation found.')
# parse each node block
start, end = find_next_node(lines, 0)
while start >= 0:
name = encode_special_char(lines[start].strip(':'))
code = [ln[len(indent):] for ln in lines[start + 1:end + 1]]
if len(code) == 0:
raise SystemExit(f'#{end}: Expecting at least one line of code.')
T_link, F_link, do_pop = parse_transition_links(code[-1])
if do_pop:
code = code[:-1]
code = [encode_special_char(c) for c in code] # encode code block
graph[name] = Node(code, T_link, F_link)
start, end = find_next_node(lines, end)
return graph
def println(level: int, line: str):
print(indent * level + line)
def gen_dot_graph(label: str, graph: Dict[str, Node]):
println(0, 'digraph "%s" {' % label)
println(1, f'label="{label}"\n')
for name in graph:
node = graph[name]
println(1, f'{name} [')
println(2, 'shape=record,')
println(2, 'label="{')
println(3, f'{name}:\\l')
for code in node.code:
println(3, f'{code}\\l')
if node.links_count == 2:
println(3, '|{<%s>T|<%s>F}' % (node.T_link, node.F_link))
println(3, '}"]')
# links
if node.links_count == 2:
println(1, f'{name}:{node.T_link} -> {node.T_link}')
println(1, f'{name}:{node.F_link} -> {node.F_link}')
elif node.links_count == 1:
println(1, f'{name} -> {node.T_link}')
println(1, '')
println(0, '}')
def main(filename: str):
lines = open(filename).read().split('\n')
label = encode_special_char(parse_label(lines))
graph = parse_nodes(lines)
gen_dot_graph(label, graph)
if __name__ == '__main__':
if len(argv) <= 1:
print(f'Usage: {argv[0]} [filename]')
else:
main(argv[1])