Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save davidhcefx/45aab91541fcce6a68a8e37aefa5ac74 to your computer and use it in GitHub Desktop.
Save davidhcefx/45aab91541fcce6a68a8e37aefa5ac74 to your computer and use it in GitHub Desktop.

Prettydot - Human readable dot graph language for CFG

Background

Dot is a graph description language in extensions .dot or .gv, and can be read by programs such as Graphviz (try it online). However, it's syntax is too complicated for the goal of generating a simple control-flow graph, and is not very well human readable. Therefore, I defined a new language, prettydot, for this purpose.

Usage

// Prettydot graph format, for better human readability.
// This can be highlighted via Graphviz(dot) syntax highlighting.

// the 'label' keyword must be on the beginning of its line
label = "CFG for 'main()'"

// each node starts with '<name>:'
// the indentation on the next line will be used to detect further blocks
// create double links using {T<>|F<>} format
Node0:
    %3 = alloca i32, align 4
    %4 = alloca i32, align 4
    %5 = alloca i8**, align 8
    %6 = alloca i32, align 4
    %7 = alloca i32, align 4
    store i32 0, i32* %3, align 4
    store i32 %0, i32* %4, align 4
    store i8** %1, i8*** %5, align 8
    call void @A()
    store i32 -1, i32* %6, align 4
    %8 = load i32, i32* %4, align 4
    %9 = icmp sle i32 %8, 1
    br i1 %9, label Node1, label Node2
    {T<Node1>|F<Node2>}

// create single links via 'label <Name>' format
// alternatively, place '{<name>}' at the end to prevent <name> from showing
Node1:
    %11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16\l... x i8]* @.str, i32 0, i32 0))
    store i32 -1, i32* %3, align 4
    br label Node3

Node2:
    call void @B()
    %14 = load i32, i32* %7, align 4
    br i1 %14, label Node1, label Node3
    {T<Node1>|F<Node3>}

Node3:
    %15 = load i32, i32* %3, align 4
    ret i32 %15

As you can see it's more elegant and consice. The number of characters also reduced 45% in contrast to .dot format.

Compilation

In order to compile our language to .dot format, we use a python script:

#! /usr/bin/python3
"""
Translate a 'prettydot' graph to a dot graph. Written by davidhcefx, 2020.7.8.
A prettydot graph is a more human-readable format for control-flow graphs.
See example.prettydot for its basic syntax.
"""
from sys import argv
from typing import Optional, List, Tuple, Dict

indent = ''

class Node:
    def __init__(self, code: List[str], T_link: Optional[str], F_link: Optional[str]):
        self.code = code
        self.T_link = T_link  # single links will store here
        self.F_link = F_link
        self.links_count = 2 - [T_link, F_link].count(None)

# check if line is in format: '^[^[:space:]]+:$'
def is_block_header(line: str) -> bool:
    return line.endswith(':') and not line[0].isspace()

# find begin and end line numbers of the next node block. Return -1 if not found.
def find_next_node(lines: List[str], start: int) -> Tuple[int, int]:
    begin = -1
    end = len(lines) - 1
    for i in range(start, len(lines)):
        if is_block_header(lines[i]):
            begin = i
            break
    if begin >= 0:
        for i in range(begin + 1, len(lines)):
            if not lines[i].startswith(indent):  # indent ended
                end = i - 1
                break
    return (begin, end)

def assert_no_empty_links(links: List[str], line: str):
    if links.count('') != 0:
        raise SystemExit(f"Invalid link name in: '{line}'")

def encode_special_char(line: str) -> str:
    return line.translate({
        ord('"'): '\\"',
        ord('<'): '\\<',
        ord('>'): '\\>',
        ord('{'): '\\{',
        ord('}'): '\\}',
        ord('\\'): '\\\\'
        })

# find the line that starts with 'label =', and extract the label name.
def parse_label(lines: List[str]) -> str:
    for ln in lines:
        if ln.startswith('label'):
            start = ln.find('"') + 1
            end = ln.rfind('"')
            if not 0 < start < end:
                raise SystemExit('Invalid label string.')
            return ln[start:end]
    raise SystemExit("Cannot find a line that starts with 'label'.")

# parse indent from line
def parse_indent(line: str, atline: int) -> str:
    line_strp = line.strip()
    if line_strp == '':
        raise SystemExit(f'#{atline}: Expecting at least one line of code.')
    else:
        return line[0:line.index(line_strp[0])]

# return (T_link, F_link, do_pop), where the links may be None
def parse_transition_links(line: str) -> Tuple[Optional[str], Optional[str], bool]:
    line = line.strip()
    if line.startswith('{T<') and line.endswith('>}') and '>|F<' in line:
        # double links: {T<...>|F<...>}
        T_link, F_link = line.split('>|F<')
        T_link = T_link[3:]
        F_link = F_link[:-2]
        assert_no_empty_links([T_link, F_link], line)
        return (encode_special_char(T_link), encode_special_char(F_link), True)
    elif line.startswith('{<') and line.endswith('>}'):
        # single link: {<name>}
        T_link = line[2:-2]
        assert_no_empty_links([T_link], line)
        return (encode_special_char(T_link), None, True)
    else:
        idx = line.find('label')
        if idx != -1:
            # single link in format: label <name>
            name = line[idx + 5:].strip().split()[0]
            assert_no_empty_links([name], line)
            return (encode_special_char(name), None, False)
        else:
            # no link
            return (None, None, False)

# extract each node, and build up their connections.
def parse_nodes(lines: List[str]) -> Dict[str, Node]:
    graph = dict()
    # determine indentation
    global indent
    for i, _ in enumerate(lines):
        if is_block_header(lines[i]):
            indent = parse_indent(lines[i + 1], atline=i + 1)
            break
    if indent == '':
        raise SystemExit('No indentation found.')

    # parse each node block
    start, end = find_next_node(lines, 0)
    while start >= 0:
        name = encode_special_char(lines[start].strip(':'))
        code = [ln[len(indent):] for ln in lines[start + 1:end + 1]]
        if len(code) == 0:
            raise SystemExit(f'#{end}: Expecting at least one line of code.')
        T_link, F_link, do_pop = parse_transition_links(code[-1])
        if do_pop:
            code = code[:-1]
        code = [encode_special_char(c) for c in code]  # encode code block
        graph[name] = Node(code, T_link, F_link)
        start, end = find_next_node(lines, end)

    return graph

def println(level: int, line: str):
    print(indent * level + line)

def gen_dot_graph(label: str, graph: Dict[str, Node]):
    println(0, 'digraph "%s" {' % label)
    println(1, f'label="{label}"\n')
    for name in graph:
        node = graph[name]
        println(1, f'{name} [')
        println(2, 'shape=record,')
        println(2, 'label="{')
        println(3, f'{name}:\\l')
        for code in node.code:
            println(3, f'{code}\\l')
        if node.links_count == 2:
            println(3, '|{<%s>T|<%s>F}' % (node.T_link, node.F_link))
        println(3, '}"]')

        # links
        if node.links_count == 2:
            println(1, f'{name}:{node.T_link} -> {node.T_link}')
            println(1, f'{name}:{node.F_link} -> {node.F_link}')
        elif node.links_count == 1:
            println(1, f'{name} -> {node.T_link}')

        println(1, '')

    println(0, '}')

def main(filename: str):
    lines = open(filename).read().split('\n')
    label = encode_special_char(parse_label(lines))
    graph = parse_nodes(lines)
    gen_dot_graph(label, graph)


if __name__ == '__main__':
    if len(argv) <= 1:
        print(f'Usage: {argv[0]} [filename]')
    else:
        main(argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment