Last active
April 1, 2016 00:28
-
-
Save YosukeM/1603874ab12e864cb7d1 to your computer and use it in GitHub Desktop.
C++のヘッダファイルにつけられた属性マクロを解析
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# @file | |
# @brief C++リフレクション | |
# @author Yosuke Morimoto | |
# @license Public Domain | |
import sys | |
import clang.cindex | |
import os | |
import re | |
from clang.cindex import Index | |
from clang.cindex import TranslationUnit | |
from clang.cindex import Cursor | |
from clang.cindex import CursorKind | |
from clang.cindex import Type | |
from clang.cindex import TokenKind | |
from clang.cindex import SourceRange | |
from clang.cindex import Diagnostic | |
# | |
# 属性付き・簡易抽象構文木 | |
# | |
class Directive(object): | |
def __init__(self): | |
self.content = "" | |
self.location = None | |
class AttributeInfo(object): | |
def __init__(self, name): | |
self.name = name | |
self.args = [] | |
self.hash = {} | |
self.location = None | |
# 引数をパースして、pythonの該当する型に変換 | |
# 実装はかなり適当です | |
# | |
# 文字列リテラル -> str | |
# float, doubleリテラル -> float | |
# 数値リテラル -> int | |
# 論理値リテラル -> bool | |
# 文字リテラル -> str | |
# それ以外の不明なもの -> None | |
def parse_arg(self, index): | |
return self._parse(self.args[index]) | |
def parse_hash(self, key): | |
if key in self.hash: | |
return self._parse(self.hash[key]) | |
else: | |
return None | |
def _parse(self, literal): | |
if re.match('\-?[0-9][0-9a-z]*$', literal, flags=re.IGNORECASE): | |
# 数値 | |
return int(literal, 0) # note: 第2引数を0にすると基数を自動推定する | |
elif re.match('\-?[0-9]+[\.e][0-9]*f?$', literal): | |
# 浮動小数点 | |
# NaNやInfinityに未対応 | |
return float(literal) | |
elif literal.lower() == "true": | |
# true | |
return True | |
elif literal.lower() == "false": | |
# false | |
return False | |
elif literal.startswith('"') and literal.endswith('"'): | |
# 文字列 | |
# クオート外すだけ | |
return literal.lstrip('"').rstrip('"') | |
elif literal.startswith("'") and literal.endswith("'"): | |
# 文字 | |
# クオート外すだけ | |
return literal.lstrip("'").rstrip("'") | |
else: | |
return None | |
def push(self, key, val): | |
if val == "": | |
if key != "": | |
self.args.append(key) | |
else: | |
self.hash[key] = val | |
class AstNode(object): | |
def __init__(self, cursor): | |
self.cursor = cursor | |
self.name = cursor.spelling | |
self.location = cursor.location | |
self.attributes = [] | |
def find_attr(self, name): | |
for attr in self.attributes: | |
if attr.name == name: | |
return attr | |
class TypeInfo(AstNode): | |
def __init__(self, cursor, outer_namespaces, outer_classes): | |
super(TypeInfo, self).__init__(cursor) | |
self.outer_namespaces = outer_namespaces | |
self.outer_classes = outer_classes | |
self._name_fq = None | |
def get_name_with_outer_classes(self): | |
return "".join([x + "::" for x in self.outer_classes]) + self.name | |
def get_name_fully_qualified(self): | |
if self._name_fq: | |
return self._name_fq | |
outer_entities = self.outer_namespaces + self.outer_classes | |
self._name_fq = "".join([x + "::" for x in outer_entities]) + self.name | |
return self._name_fq | |
class RecordInfo(TypeInfo): | |
def __init__(self, cursor, outer_namespaces, outer_classes): | |
super(RecordInfo, self).__init__(cursor, outer_namespaces, outer_classes) | |
self.fields = [] | |
self.methods = [] | |
self.inner_types =[] | |
self.inner_attrs = [] | |
self.inner_directives = [] | |
self.inner_entities = [] # field, method, type, attr, directiveを混ぜたもの | |
self.base_type_names = [] | |
self._parser = None | |
# クラス内部の定義を読み進める | |
def read_inner(self, parser, outer_namespaces, outer_classes): | |
self._parser = parser | |
prev_offset = self.cursor.location | |
for cursor in self.cursor.get_children(): | |
if cursor.kind == CursorKind.CXX_BASE_SPECIFIER: | |
canonical_name = cursor.type.get_canonical().spelling | |
self.base_type_names.append(canonical_name) | |
elif cursor.kind == CursorKind.CLASS_DECL or cursor.kind == CursorKind.STRUCT_DECL: | |
record_info = RecordInfo(cursor, outer_namespaces, outer_classes) | |
record_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset) | |
parser.type_infos.append(record_info) | |
record_info.read_inner(parser, outer_namespaces, outer_classes + [cursor.spelling]) | |
self.inner_types.append(record_info) | |
elif cursor.kind == CursorKind.ENUM_DECL: | |
enum_info = EnumInfo(cursor, outer_namespaces, outer_classes) | |
enum_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset) | |
parser.type_infos.append(enum_info) | |
enum_info.read_inner(parser) | |
self.inner_types.append(enum_info) | |
elif cursor.kind == CursorKind.FIELD_DECL: | |
field_info = FieldInfo(cursor) | |
field_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset) | |
self.fields.append(field_info) | |
elif cursor.kind in { CursorKind.CONSTRUCTOR, CursorKind.DESTRUCTOR, CursorKind.CXX_METHOD }: | |
method_info = MethodInfo(cursor) | |
method_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset) | |
self.methods.append(method_info) | |
prev_offset = cursor.extent.end.offset | |
# クラスが対象ファイル内で定義されている場合のみ、属性とディレクティブを取得する | |
if parser.header_path == self.cursor.location.file.name: | |
start = self.cursor.extent.start.offset | |
end = self.cursor.extent.end.offset | |
self.inner_attrs = parser.find_attributes_within(start, end) | |
self.inner_directives = parser.find_directives_within(start, end) | |
# field, method, type, attr, directiveを混ぜてソート | |
inner_entities = self.fields + self.methods + self.inner_types + self.inner_attrs + self.inner_directives | |
self.inner_entities = sorted(inner_entities, key=lambda x: x.location.offset) | |
def is_derived_from(self, type_name_fq): | |
for base_type_name in self.base_type_names: | |
if base_type_name == type_name_fq: | |
return True | |
for base_type_name in self.base_type_names: | |
type_info = self._parser.find_type_info(base_type_name) | |
if type_info and isinstance(type_info, RecordInfo): | |
if type_info.is_derived_from(type_name_fq): | |
return True | |
return False | |
class FieldInfo(AstNode): | |
def __init__(self, cursor): | |
super(FieldInfo, self).__init__(cursor) | |
class MethodInfo(AstNode): | |
def __init__(self, cursor): | |
super(MethodInfo, self).__init__(cursor) | |
def is_static(self): | |
return self.cursor.is_static_method() | |
def is_const(self): | |
usr = self.cursor.get_usr() | |
bang_pos = usr.rfind("#") | |
if bang_pos == -1 or bang_pos == len(usr) - 1: | |
return False | |
flags = 0 | |
try: | |
flags = int(usr[bang_pos+1]) | |
except: | |
return False | |
return (flags & 1) != 0 | |
def is_virtual(self): | |
return clang.cindex.conf.lib.clang_CXXMethod_isVirtual(self.cursor) | |
def is_pure_virtual(self): | |
return clang.cindex.conf.lib.clang_CXXMethod_isPureVirtual(self.cursor) | |
class EnumInfo(TypeInfo): | |
def __init__(self, cursor, outer_namespaces, outer_classes): | |
super(EnumInfo, self).__init__(cursor, outer_namespaces, outer_classes) | |
self.constants = [] | |
def read_inner(self, parser): | |
prev_offset = self.cursor.location | |
for cursor in self.cursor.get_children(): | |
if cursor.kind == CursorKind.ENUM_CONSTANT_DECL: | |
constant_info = EnumConstantInfo(cursor) | |
constant_info.attributes = parser.find_attributes_within(prev_offset, cursor.location.offset) | |
self.constants.append(constant_info) | |
prev_offset = cursor.extent.end.offset | |
class EnumConstantInfo(AstNode): | |
def __init__(self, cursor): | |
super(EnumConstantInfo, self).__init__(cursor) | |
# | |
# パーサ | |
# | |
class Parser(object): | |
def __init__(self, attr_regex): | |
self.clang_args = [] | |
# 属性の設定 | |
self.attr_pattern = re.compile(attr_regex) | |
# パース対象のヘッダのパス | |
self.header_path = "" | |
# ここに結果を格納 | |
self.attr_infos = [] | |
self.directives = [] | |
self.type_infos = [] | |
def set_clang_library_file(self, library_file): | |
clang.cindex.Config.set_library_file(library_file) | |
def set_clang_args(self, args): | |
self.clang_args = args | |
# 属性と条件付きディレクティブをパース | |
def collect_tokens(self, tokens): | |
depth = 0 | |
attr = None | |
after_equals = False | |
tmp_key = "" | |
tmp_val = "" | |
directive = None | |
directive_end = 0 | |
for token in tokens: | |
if directive: | |
# 行末に\が置かれることがあるので、この条件は正確でない | |
# しかし、簡単に判定する方法がないのでひとまずこのままにする | |
if token.location.line != directive.location.line: | |
self.directives.append(directive) | |
directive = None | |
# 下のif attrへ | |
elif token.kind in [TokenKind.KEYWORD, TokenKind.IDENTIFIER, TokenKind.PUNCTUATION]: | |
space_num = token.extent.start.column - directive_end | |
directive.content += " " * space_num | |
directive.content += token.spelling | |
directive_end = token.extent.end.column | |
continue | |
if attr: | |
# カッコの処理 | |
if token.kind == TokenKind.PUNCTUATION and token.spelling == "(": | |
depth += 1 | |
# 最初の(は結果に含めない | |
if depth == 1: | |
continue | |
elif token.kind == TokenKind.PUNCTUATION and token.spelling == ")": | |
depth -= 1 | |
# この時点でカッコの外に出ていたら終わり | |
if depth == 0: | |
attr.push(tmp_key, tmp_val) | |
self.attr_infos.append(attr) | |
attr = None | |
continue | |
if token.kind == TokenKind.PUNCTUATION and token.spelling == "=": | |
after_equals = True | |
elif token.kind == TokenKind.PUNCTUATION and token.spelling == ",": | |
attr.push(tmp_key, tmp_val) | |
tmp_key = tmp_val = "" | |
after_equals = False | |
elif token.kind != TokenKind.COMMENT: | |
if after_equals: | |
if tmp_val != "": | |
tmp_val += " " | |
tmp_val += token.spelling | |
else: | |
if tmp_key != "": | |
tmp_key += " " | |
tmp_key += token.spelling | |
elif token.kind == TokenKind.IDENTIFIER: | |
if self.attr_pattern.match(token.spelling): | |
attr = AttributeInfo(token.spelling) | |
attr.location = token.location | |
depth = 0 | |
after_equals = False | |
tmp_key = tmp_val = "" | |
elif token.kind == TokenKind.PUNCTUATION: | |
if token.spelling == "#": | |
directive = Directive() | |
directive.content = token.spelling | |
directive.location = token.location | |
directive_end = token.extent.end.column | |
# ASTを表示(デバッグ用) | |
def print_node_tree(self, node, indent = ""): | |
print("%s%s : %s" % (indent, node.kind.name, node.spelling)) | |
for child in node.get_children(): | |
self.print_node_tree(child, indent + "\t") | |
# ある範囲内の属性を検索 | |
def find_attributes_within(self, start_offset, end_offset): | |
# note: 速度が問題になるなら、2分探索を検討 | |
result = [] | |
for attr in self.attr_infos: | |
if start_offset < attr.location.offset < end_offset: | |
result.append(attr) | |
return result | |
# ある範囲内のディレクティブを検索 | |
def find_directives_within(self, start_offset, end_offset): | |
# note: 速度が問題になるなら、2分探索を検討 | |
result = [] | |
for directive in self.directives: | |
if start_offset < directive.location.offset < end_offset: | |
result.append(directive) | |
return result | |
# 名前空間の直下にある型を読む | |
def collect_types(self, cursor, prev_offset = 0, outer_namespaces = [], outer_classes = []): | |
if cursor.kind == CursorKind.CLASS_DECL or cursor.kind == CursorKind.STRUCT_DECL: | |
record_info = RecordInfo(cursor, outer_namespaces, outer_classes) | |
record_info.attributes = self.find_attributes_within(prev_offset, cursor.location.offset) | |
self.type_infos.append(record_info) | |
record_info.read_inner(self, outer_namespaces, outer_classes + [cursor.spelling]) | |
elif cursor.kind == CursorKind.ENUM_DECL: | |
enum_info = EnumInfo(cursor, outer_namespaces, outer_classes) | |
enum_info.attributes = self.find_attributes_within(prev_offset, cursor.location.offset) | |
self.type_infos.append(enum_info) | |
enum_info.read_inner(self) | |
elif cursor.kind == CursorKind.NAMESPACE: | |
for child in cursor.get_children(): | |
self.collect_types(child, prev_offset, outer_namespaces + [cursor.spelling], outer_classes) | |
prev_offset = child.extent.end.offset | |
def find_type_info(self, canonical_name): | |
# 型名からテンプレート引数を取り除く | |
pos = canonical_name.find("<") | |
name_fq = "" | |
if pos == -1: | |
name_fq = canonical_name | |
else: | |
name_fq = canonical_name[0:pos] | |
# 同名の型を探す | |
for type_info in self.type_infos: | |
if name_fq == type_info.get_name_fully_qualified(): | |
return type_info | |
return None | |
# ファイルをパースしてTypeInfoの配列を返す | |
def parse(self, header_file, inc_path): | |
index = Index.create() | |
src_header_file = os.path.join(inc_path, header_file) | |
self.header_path = src_header_file | |
# ファイルがなんであれcppファイルとして渡す | |
# そうしないと宣言部分がC++として解釈されない | |
cpp_file = header_file + ".cpp" | |
buf = "#include \"" + header_file + "\"" | |
tu = index.parse(cpp_file, self.clang_args, [(cpp_file, buf)], TranslationUnit.PARSE_SKIP_FUNCTION_BODIES) | |
# エラーを取得 | |
errors = [] | |
for diagnostic in tu.diagnostics: | |
if diagnostic.severity >= Diagnostic.Error: | |
if diagnostic.location.file: | |
errors.append("file:" + diagnostic.location.file.name) | |
errors.append("line:" + str(diagnostic.location.line)) | |
errors.append(diagnostic.spelling) | |
if errors: | |
print("\n".join(errors)) | |
return None | |
# ヘッダファイル部分をちょうど覆うようなSourceRangeを作る | |
size = int(os.path.getsize(src_header_file)) | |
start = tu.get_location(src_header_file, 0) | |
end = tu.get_location(src_header_file, size) | |
extent = SourceRange.from_locations(start, end) | |
# 属性と条件つきディレクティブを集める | |
self.collect_tokens(tu.get_tokens(None, extent)) | |
# 型情報を集める | |
for child in tu.cursor.get_children(): | |
self.collect_types(child) | |
# ファイルの外にある宣言を除いたリストを作成 | |
types_in_file = filter(lambda x: x.location.file and x.location.file.name == start.file.name, self.type_infos) | |
return types_in_file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment