Skip to content

Instantly share code, notes, and snippets.

@YosukeM
Last active April 1, 2016 00:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save YosukeM/1603874ab12e864cb7d1 to your computer and use it in GitHub Desktop.
Save YosukeM/1603874ab12e864cb7d1 to your computer and use it in GitHub Desktop.
C++のヘッダファイルにつけられた属性マクロを解析
# coding: utf-8
# @file
# @brief C++リフレクション
# @author Yosuke Morimoto
# @license Public Domain
import sys
import clang.cindex
import os
import re
from clang.cindex import Index
from clang.cindex import TranslationUnit
from clang.cindex import Cursor
from clang.cindex import CursorKind
from clang.cindex import Type
from clang.cindex import TokenKind
from clang.cindex import SourceRange
from clang.cindex import Diagnostic
#
# 属性付き・簡易抽象構文木
#
class Directive(object):
def __init__(self):
self.content = ""
self.location = None
class AttributeInfo(object):
def __init__(self, name):
self.name = name
self.args = []
self.hash = {}
self.location = None
# 引数をパースして、pythonの該当する型に変換
# 実装はかなり適当です
#
# 文字列リテラル -> str
# float, doubleリテラル -> float
# 数値リテラル -> int
# 論理値リテラル -> bool
# 文字リテラル -> str
# それ以外の不明なもの -> None
def parse_arg(self, index):
return self._parse(self.args[index])
def parse_hash(self, key):
if key in self.hash:
return self._parse(self.hash[key])
else:
return None
def _parse(self, literal):
if re.match('\-?[0-9][0-9a-z]*$', literal, flags=re.IGNORECASE):
# 数値
return int(literal, 0) # note: 第2引数を0にすると基数を自動推定する
elif re.match('\-?[0-9]+[\.e][0-9]*f?$', literal):
# 浮動小数点
# NaNやInfinityに未対応
return float(literal)
elif literal.lower() == "true":
# true
return True
elif literal.lower() == "false":
# false
return False
elif literal.startswith('"') and literal.endswith('"'):
# 文字列
# クオート外すだけ
return literal.lstrip('"').rstrip('"')
elif literal.startswith("'") and literal.endswith("'"):
# 文字
# クオート外すだけ
return literal.lstrip("'").rstrip("'")
else:
return None
def push(self, key, val):
if val == "":
if key != "":
self.args.append(key)
else:
self.hash[key] = val
class AstNode(object):
def __init__(self, cursor):
self.cursor = cursor
self.name = cursor.spelling
self.location = cursor.location
self.attributes = []
def find_attr(self, name):
for attr in self.attributes:
if attr.name == name:
return attr
class TypeInfo(AstNode):
def __init__(self, cursor, outer_namespaces, outer_classes):
super(TypeInfo, self).__init__(cursor)
self.outer_namespaces = outer_namespaces
self.outer_classes = outer_classes
self._name_fq = None
def get_name_with_outer_classes(self):
return "".join([x + "::" for x in self.outer_classes]) + self.name
def get_name_fully_qualified(self):
if self._name_fq:
return self._name_fq
outer_entities = self.outer_namespaces + self.outer_classes
self._name_fq = "".join([x + "::" for x in outer_entities]) + self.name
return self._name_fq
class RecordInfo(TypeInfo):
def __init__(self, cursor, outer_namespaces, outer_classes):
super(RecordInfo, self).__init__(cursor, outer_namespaces, outer_classes)
self.fields = []
self.methods = []
self.inner_types =[]
self.inner_attrs = []
self.inner_directives = []
self.inner_entities = [] # field, method, type, attr, directiveを混ぜたもの
self.base_type_names = []
self._parser = None
# クラス内部の定義を読み進める
def read_inner(self, parser, outer_namespaces, outer_classes):
self._parser = parser
prev_offset = self.cursor.location
for cursor in self.cursor.get_children():
if cursor.kind == CursorKind.CXX_BASE_SPECIFIER:
canonical_name = cursor.type.get_canonical().spelling
self.base_type_names.append(canonical_name)
elif cursor.kind == CursorKind.CLASS_DECL or cursor.kind == CursorKind.STRUCT_DECL:
record_info = RecordInfo(cursor, outer_namespaces, outer_classes)
record_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset)
parser.type_infos.append(record_info)
record_info.read_inner(parser, outer_namespaces, outer_classes + [cursor.spelling])
self.inner_types.append(record_info)
elif cursor.kind == CursorKind.ENUM_DECL:
enum_info = EnumInfo(cursor, outer_namespaces, outer_classes)
enum_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset)
parser.type_infos.append(enum_info)
enum_info.read_inner(parser)
self.inner_types.append(enum_info)
elif cursor.kind == CursorKind.FIELD_DECL:
field_info = FieldInfo(cursor)
field_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset)
self.fields.append(field_info)
elif cursor.kind in { CursorKind.CONSTRUCTOR, CursorKind.DESTRUCTOR, CursorKind.CXX_METHOD }:
method_info = MethodInfo(cursor)
method_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset)
self.methods.append(method_info)
prev_offset = cursor.extent.end.offset
# クラスが対象ファイル内で定義されている場合のみ、属性とディレクティブを取得する
if parser.header_path == self.cursor.location.file.name:
start = self.cursor.extent.start.offset
end = self.cursor.extent.end.offset
self.inner_attrs = parser.find_attributes_within(start, end)
self.inner_directives = parser.find_directives_within(start, end)
# field, method, type, attr, directiveを混ぜてソート
inner_entities = self.fields + self.methods + self.inner_types + self.inner_attrs + self.inner_directives
self.inner_entities = sorted(inner_entities, key=lambda x: x.location.offset)
def is_derived_from(self, type_name_fq):
for base_type_name in self.base_type_names:
if base_type_name == type_name_fq:
return True
for base_type_name in self.base_type_names:
type_info = self._parser.find_type_info(base_type_name)
if type_info and isinstance(type_info, RecordInfo):
if type_info.is_derived_from(type_name_fq):
return True
return False
class FieldInfo(AstNode):
def __init__(self, cursor):
super(FieldInfo, self).__init__(cursor)
class MethodInfo(AstNode):
def __init__(self, cursor):
super(MethodInfo, self).__init__(cursor)
def is_static(self):
return self.cursor.is_static_method()
def is_const(self):
usr = self.cursor.get_usr()
bang_pos = usr.rfind("#")
if bang_pos == -1 or bang_pos == len(usr) - 1:
return False
flags = 0
try:
flags = int(usr[bang_pos+1])
except:
return False
return (flags & 1) != 0
def is_virtual(self):
return clang.cindex.conf.lib.clang_CXXMethod_isVirtual(self.cursor)
def is_pure_virtual(self):
return clang.cindex.conf.lib.clang_CXXMethod_isPureVirtual(self.cursor)
class EnumInfo(TypeInfo):
def __init__(self, cursor, outer_namespaces, outer_classes):
super(EnumInfo, self).__init__(cursor, outer_namespaces, outer_classes)
self.constants = []
def read_inner(self, parser):
prev_offset = self.cursor.location
for cursor in self.cursor.get_children():
if cursor.kind == CursorKind.ENUM_CONSTANT_DECL:
constant_info = EnumConstantInfo(cursor)
constant_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset)
self.constants.append(constant_info)
prev_offset = cursor.extent.end.offset
class EnumConstantInfo(AstNode):
def __init__(self, cursor):
super(EnumConstantInfo, self).__init__(cursor)
#
# パーサ
#
class Parser(object):
def __init__(self, attr_regex):
self.clang_args = []
# 属性の設定
self.attr_pattern = re.compile(attr_regex)
# パース対象のヘッダのパス
self.header_path = ""
# ここに結果を格納
self.attr_infos = []
self.directives = []
self.type_infos = []
def set_clang_library_file(self, library_file):
clang.cindex.Config.set_library_file(library_file)
def set_clang_args(self, args):
self.clang_args = args
# 属性と条件付きディレクティブをパース
def collect_tokens(self, tokens):
depth = 0
attr = None
after_equals = False
tmp_key = ""
tmp_val = ""
directive = None
directive_end = 0
for token in tokens:
if directive:
# 行末に\が置かれることがあるので、この条件は正確でない
# しかし、簡単に判定する方法がないのでひとまずこのままにする
if token.location.line != directive.location.line:
self.directives.append(directive)
directive = None
# 下のif attrへ
elif token.kind in [TokenKind.KEYWORD, TokenKind.IDENTIFIER, TokenKind.PUNCTUATION]:
space_num = token.extent.start.column - directive_end
directive.content += " " * space_num
directive.content += token.spelling
directive_end = token.extent.end.column
continue
if attr:
# カッコの処理
if token.kind == TokenKind.PUNCTUATION and token.spelling == "(":
depth += 1
# 最初の(は結果に含めない
if depth == 1:
continue
elif token.kind == TokenKind.PUNCTUATION and token.spelling == ")":
depth -= 1
# この時点でカッコの外に出ていたら終わり
if depth == 0:
attr.push(tmp_key, tmp_val)
self.attr_infos.append(attr)
attr = None
continue
if token.kind == TokenKind.PUNCTUATION and token.spelling == "=":
after_equals = True
elif token.kind == TokenKind.PUNCTUATION and token.spelling == ",":
attr.push(tmp_key, tmp_val)
tmp_key = tmp_val = ""
after_equals = False
elif token.kind != TokenKind.COMMENT:
if after_equals:
if tmp_val != "":
tmp_val += " "
tmp_val += token.spelling
else:
if tmp_key != "":
tmp_key += " "
tmp_key += token.spelling
elif token.kind == TokenKind.IDENTIFIER:
if self.attr_pattern.match(token.spelling):
attr = AttributeInfo(token.spelling)
attr.location = token.location
depth = 0
after_equals = False
tmp_key = tmp_val = ""
elif token.kind == TokenKind.PUNCTUATION:
if token.spelling == "#":
directive = Directive()
directive.content = token.spelling
directive.location = token.location
directive_end = token.extent.end.column
# ASTを表示(デバッグ用)
def print_node_tree(self, node, indent = ""):
print("%s%s : %s" % (indent, node.kind.name, node.spelling))
for child in node.get_children():
self.print_node_tree(child, indent + "\t")
# ある範囲内の属性を検索
def find_attributes_within(self, start_offset, end_offset):
# note: 速度が問題になるなら、2分探索を検討
result = []
for attr in self.attr_infos:
if start_offset < attr.location.offset < end_offset:
result.append(attr)
return result
# ある範囲内のディレクティブを検索
def find_directives_within(self, start_offset, end_offset):
# note: 速度が問題になるなら、2分探索を検討
result = []
for directive in self.directives:
if start_offset < directive.location.offset < end_offset:
result.append(directive)
return result
# 名前空間の直下にある型を読む
def collect_types(self, cursor, prev_offset = 0, outer_namespaces = [], outer_classes = []):
if cursor.kind == CursorKind.CLASS_DECL or cursor.kind == CursorKind.STRUCT_DECL:
record_info = RecordInfo(cursor, outer_namespaces, outer_classes)
record_info.attributes = self.find_attributes_within(prev_offset, cursor.location.offset)
self.type_infos.append(record_info)
record_info.read_inner(self, outer_namespaces, outer_classes + [cursor.spelling])
elif cursor.kind == CursorKind.ENUM_DECL:
enum_info = EnumInfo(cursor, outer_namespaces, outer_classes)
enum_info.attributes = self.find_attributes_within(prev_offset, cursor.location.offset)
self.type_infos.append(enum_info)
enum_info.read_inner(self)
elif cursor.kind == CursorKind.NAMESPACE:
for child in cursor.get_children():
self.collect_types(child, prev_offset, outer_namespaces + [cursor.spelling], outer_classes)
prev_offset = child.extent.end.offset
def find_type_info(self, canonical_name):
# 型名からテンプレート引数を取り除く
pos = canonical_name.find("<")
name_fq = ""
if pos == -1:
name_fq = canonical_name
else:
name_fq = canonical_name[0:pos]
# 同名の型を探す
for type_info in self.type_infos:
if name_fq == type_info.get_name_fully_qualified():
return type_info
return None
# ファイルをパースしてTypeInfoの配列を返す
def parse(self, header_file, inc_path):
index = Index.create()
src_header_file = os.path.join(inc_path, header_file)
self.header_path = src_header_file
# ファイルがなんであれcppファイルとして渡す
# そうしないと宣言部分がC++として解釈されない
cpp_file = header_file + ".cpp"
buf = "#include \"" + header_file + "\""
tu = index.parse(cpp_file, self.clang_args, [(cpp_file, buf)], TranslationUnit.PARSE_SKIP_FUNCTION_BODIES)
# エラーを取得
errors = []
for diagnostic in tu.diagnostics:
if diagnostic.severity >= Diagnostic.Error:
if diagnostic.location.file:
errors.append("file:" + diagnostic.location.file.name)
errors.append("line:" + str(diagnostic.location.line))
errors.append(diagnostic.spelling)
if errors:
print("\n".join(errors))
return None
# ヘッダファイル部分をちょうど覆うようなSourceRangeを作る
size = int(os.path.getsize(src_header_file))
start = tu.get_location(src_header_file, 0)
end = tu.get_location(src_header_file, size)
extent = SourceRange.from_locations(start, end)
# 属性と条件つきディレクティブを集める
self.collect_tokens(tu.get_tokens(None, extent))
# 型情報を集める
for child in tu.cursor.get_children():
self.collect_types(child)
# ファイルの外にある宣言を除いたリストを作成
types_in_file = filter(lambda x: x.location.file and x.location.file.name == start.file.name, self.type_infos)
return types_in_file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment