Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Show the AST of a translation unit with libclang and python
# inspired by http://eli.thegreenplace.net/2011/07/03/parsing-c-in-python-with-clang/
import sys
import clang.cindex
def verbose(*args, **kwargs):
'''filter predicate for show_ast: show all'''
return True
def no_system_includes(cursor, level):
'''filter predicate for show_ast: filter out verbose stuff from system include files'''
return (level!= 1) or (cursor.location.file is not None and not cursor.location.file.name.startswith('/usr/include'))
# A function show(level, *args) would have been simpler but less fun
# and you'd need a separate parameter for the AST walkers if you want it to be exchangeable.
class Level(int):
'''represent currently visited level of a tree'''
def show(self, *args):
'''pretty print an indented line'''
print '\t'*self + ' '.join(map(str, args))
def __add__(self, inc):
'''increase level'''
return Level(super(Level, self).__add__(inc))
def is_valid_type(t):
'''used to check if a cursor has a type'''
return t.kind != clang.cindex.TypeKind.INVALID
def qualifiers(t):
'''set of qualifiers of a type'''
q = set()
if t.is_const_qualified(): q.add('const')
if t.is_volatile_qualified(): q.add('volatile')
if t.is_restrict_qualified(): q.add('restrict')
return q
def show_type(t, level, title):
'''pretty print type AST'''
level.show(title, str(t.kind), ' '.join(qualifiers(t)))
if is_valid_type(t.get_pointee()):
show_type(t.get_pointee(), level+1, 'points to:')
def show_ast(cursor, filter_pred=verbose, level=Level()):
'''pretty print cursor AST'''
if filter_pred(cursor, level):
level.show(cursor.kind, cursor.spelling, cursor.displayname, cursor.location)
if is_valid_type(cursor.type):
show_type(cursor.type, level+1, 'type:')
show_type(cursor.type.get_canonical(), level+1, 'canonical type:')
for c in cursor.get_children():
show_ast(c, filter_pred, level+1)
if __name__ == '__main__':
index = clang.cindex.Index.create()
tu = index.parse(sys.argv[1])
print 'Translation unit:', tu.spelling
for f in tu.get_includes():
print '\t'*f.depth, f.include.name
show_ast(tu.cursor, no_system_includes)
@pmokri01

This comment has been minimized.

Copy link

commented May 12, 2017

Hi
Can I parse a code into expressions (and not cindex) using this parse function? I want to parse a c code and add the Opcode of operations to AST. I figured out how to extract opcode from expressions but don't know how to parse a code to get to expressions.

Thanks in advance!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.