Last active
August 2, 2018 06:07
-
-
Save kurtbrose/1a99167917998647a4707b5d475edbbe to your computer and use it in GitHub Desktop.
parser for .thrift files (work in progress)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import types | |
import parsley | |
from thriftpy.thrift import gen_init, TType, TPayload | |
Document = collections.namedtuple('Document', 'headers definitions') | |
Exception_ = collections.namedtuple('Exception_', 'name fields') | |
Function = collections.namedtuple('Function', 'name ttype fields oneway throws') | |
Field = collections.namedtuple('Field', 'id req ttype name default') | |
_DEFERRED_MODULE_NAME = '<string>' | |
def _make_union(name, fields): | |
cls = _make_empty_struct(name) | |
return _fill_in_struct(cls, fields) | |
def _make_enum(name, kvs): | |
attrs = {'__module__': _DEFERRED_MODULE_NAME, '_ttype': TType.I32} | |
cls = type(name, (object,), attrs) | |
_values_to_names = {} | |
_names_to_values = {} | |
if kvs: | |
val = kvs[0][1] | |
if val is None: | |
val = -1 | |
for item in kvs: | |
if item[1] is None: | |
val = val + 1 | |
else: | |
val = item[1] | |
for key, val in kvs: | |
setattr(cls, key, val) | |
_values_to_names[val] = key | |
_names_to_values[key] = val | |
cls._VALUE_TO_NAMES = _values_to_names | |
cls._NAMES_TO_VALUES = _names_to_values | |
return cls | |
def _make_empty_struct(name, ttype=TType.STRUCT, base_cls=TPayload): | |
attrs = {'_ttype': ttype, '__module__': _DEFERRED_MODULE_NAME} | |
return type(name, (base_cls, ), attrs) | |
def _fill_in_struct(cls, fields, _gen_init=True): | |
thrift_spec = {} | |
default_spec = [] | |
_tspec = {} | |
for field in fields: | |
if field.id in thrift_spec: | |
raise DuplicateField('offset {} used by fields {} and {}'.format( | |
field.id, field.name, thrift_spec[field.id][1])) | |
if field.name in _tspec: | |
raise DuplicateField('name {} used twice'.format(field.name)) | |
thrift_spec[field.id] = _ttype_spec(field.ttype, field.name, field.req) | |
default_spec.append((field.name, field.default)) | |
_tspec[field.name] = field | |
cls.thrift_spec = thrift_spec | |
cls.default_spec = default_spec | |
cls._tspec = _tspec | |
if _gen_init: | |
gen_init(cls, thrift_spec, default_spec) | |
return cls # remember -- assign __module__ after these come back | |
def _make_struct(name, fields, ttype=TType.STRUCT, base_cls=TPayload, | |
_gen_init=True): | |
cls = _make_empty_struct(name, ttype=ttype, base_cls=base_cls) | |
return _fill_in_struct(cls, fields or (), _gen_init=_gen_init) | |
# TODO: how to get concrete object here? -- some kinda registry object that is passed around? | |
def _make_service(name, funcs, extends): | |
if extends is None: | |
extends = object | |
attrs = {'__module__': _DEFERRED_MODULE_NAME} | |
cls = type(name, (extends,), attrs) | |
thrift_services = [] | |
for func in funcs: | |
# args payload cls | |
args_name = '%s_args' % func.name | |
args_fields = func.fields | |
args_cls = _make_struct(args_name, args_fields) | |
setattr(cls, args_name, args_cls) | |
# result payload cls | |
result_name = '%s_result' % func.name | |
result_throws = func.throws | |
result_cls = _make_struct(result_name, result_throws, | |
_gen_init=False) | |
result_cls.oneway = func.oneway | |
if func.ttype != TType.VOID: | |
result_cls.thrift_spec[0] = _ttype_spec(func.ttype, 'success') | |
result_cls.default_spec.insert(0, ('success', None)) | |
gen_init(result_cls, result_cls.thrift_spec, result_cls.default_spec) | |
setattr(cls, result_name, result_cls) | |
thrift_services.append(func.name) | |
if extends is not None and hasattr(extends, 'thrift_services'): | |
thrift_services.extend(extends.thrift_services) | |
cls.thrift_services = thrift_services | |
return cls | |
def _ttype_spec(ttype, name, required=False): | |
if isinstance(ttype, int): | |
return ttype, name, required | |
else: | |
return ttype[0], name, ttype[1], required | |
BASE_TYPE_MAP = { | |
'bool': TType.BOOL, | |
'byte': TType.BYTE, | |
'i8': TType.BYTE, | |
'i16': TType.I16, | |
'i32': TType.I32, | |
'i64': TType.I64, | |
'double': TType.DOUBLE, | |
'string': TType.STRING, | |
'binary': TType.BINARY | |
} | |
_GRAMMAR = parsley.makeGrammar(''' | |
Document = (brk Header)*:hs (brk Definition)*:ds brk -> Document(hs, ds) | |
Header = <Include | Namespace> | |
Include = brk <'include' brk Literal> | |
Namespace = brk <'namespace' brk ((NamespaceScope ('.' Identifier)?)| unsupported_namespacescope) brk Identifier>:hdr brk uri? -> hdr | |
uri = '(' ws 'uri' ws '=' ws Literal:uri ws ')' -> uri | |
NamespaceScope = '*' | 'cpp' | 'java' | 'py.twisted' | 'py' | 'perl' | 'rb' | 'cocoa' | 'csharp' | 'xsd' | 'c_glib' | 'js' | 'st' | 'go' | 'php' | 'delphi' | 'lua' | |
unsupported_namespacescope = Identifier | |
Definition = brk (Const | Typedef | Enum | Struct | Union | Exception | Service) | |
Const = 'const' brk FieldType:type brk Identifier:name brk '=' brk ConstValue:val brk ListSeparator? -> 'const', type, name, val | |
Typedef = 'typedef' brk DefinitionType:type brk Identifier:alias -> 'typedef', type, alias | |
Enum = 'enum' brk Identifier:name brk '{' enum_item*:vals '}' -> Enum(name, vals) | |
enum_item = brk Identifier:name brk ('=' brk IntConstant)?:value brk ListSeparator? brk -> name, value | |
Struct = 'struct' brk name_fields:nf brk immutable? -> Struct(nf[0], nf[1]) | |
Union = 'union' brk name_fields:nf -> Union(nf[0], nf[1]) | |
Exception = 'exception' brk name_fields:nf -> Exception_(nf[0], nf[1]) | |
name_fields = Identifier:name brk '{' (brk Field)*:fields brk '}' -> name, fields | |
Service = 'service' brk Identifier:name brk ('extends' Identifier)?:extends '{' (brk Function)*:funcs brk '}' -> Service(name, funcs, extends) | |
Field = brk FieldID:id brk FieldReq?:req brk FieldType:ttype brk Identifier:name brk ('=' brk ConstValue)?:default brk ListSeparator? -> Field(id, req, ttype, name, default) | |
FieldID = IntConstant:val ':' -> val | |
FieldReq = 'required' | 'optional' | !('default') | |
# Functions | |
Function = 'oneway'?:oneway brk FunctionType:ft brk Identifier:name '(' (brk Field*):fs ')' brk Throws?:throws brk ListSeparator? -> Function(name, ft, fs, oneway, throws) | |
FunctionType = ('void' !(TType.VOID)) | FieldType | |
Throws = 'throws' '(' (brk Field)*:fs ')' -> fs | |
# Types | |
FieldType = ContainerType | BaseType | StructType | |
DefinitionType = BaseType | ContainerType | |
BaseType = ('bool' | 'byte' | 'i8' | 'i16' | 'i32' | 'i64' | 'double' | 'string' | 'binary'):ttype -> BaseTType(ttype) | |
ContainerType = (MapType | SetType | ListType):type brk immutable? -> type | |
MapType = 'map' CppType? brk '<' brk FieldType:keyt brk ',' brk FieldType:valt brk '>' -> TType.MAP, (keyt, valt) | |
SetType = 'set' CppType? brk '<' brk FieldType:valt brk '>' -> TType.SET, valt | |
ListType = 'list' brk '<' brk FieldType:valt brk '>' brk CppType? -> TType.LIST, valt | |
StructType = Identifier:name -> TType.STRUCT, name | |
CppType = 'cpp_type' Literal -> None | |
# Constant Values | |
ConstValue = IntConstant | DoubleConstant | ConstList | ConstMap | Literal | Identifier | |
IntConstant = <('+' | '-')? Digit+>:val -> int(val) | |
DoubleConstant = <('+' | '-')? (Digit* '.' Digit+) | Digit+ (('E' | 'e') IntConstant)?> -> float(val) | |
ConstList = '[' (ConstValue:val ListSeparator? -> val)*:vals ']' -> vals | |
ConstMap = '{' (ConstValue:key ':' ConstValue:val ListSeparator? -> key, val)*:items '}' -> dict(items) | |
# Basic Definitions | |
Literal = ('"' <(~'"' anything)*>:val '"') | ("'" <(~"'" anything)*>:val "'") -> val | |
Identifier = not_reserved <(Letter | '_') (Letter | Digit | '.' | '_')*> | |
ListSeparator = ',' | ';' | |
Letter = letter # parsley built-in | |
Digit = digit # parsley built-in | |
Comment = cpp_comment | c_comment | |
brk = <(' ' | '\t' | '\n' | '\r' | c_comment | cpp_comment)*> | |
cpp_comment = '//' <('\\\n' | (~'\n' anything))*> | |
c_comment = '/*' <(~'*/' anything)*>:body '*/' -> body | |
immutable = '(' brk 'python.immutable' brk '=' brk '""' brk ')' | |
Reserved = ('__CLASS__' | '__DIR__' | '__FILE__' | '__FUNCTION__' | '__LINE__' | '__METHOD__' | | |
'__NAMESPACE__' | 'abstract' | 'alias' | 'and' | 'args' | 'as' | 'assert' | 'BEGIN' | | |
'begin' | 'binary' | 'bool' | 'break' | 'byte' | 'case' | 'catch' | 'class' | 'clone' | | |
'const' | 'continue' | 'declare' | 'def' | 'default' | 'del' | 'delete' | 'do' | | |
'double' | 'dynamic' | 'elif' | 'else' | 'elseif' | 'elsif' | 'END' | 'end' | | |
'enddeclare' | 'endfor' | 'endforeach' | 'endif' | 'endswitch' | 'endwhile' | 'ensure' | | |
'enum' | 'except' | 'exception' | 'exec' | 'extends' | 'finally' | 'float' | 'for' | | |
'foreach' | 'from' | 'function' | 'global' | 'goto' | 'i16' | 'i32' | 'i64' | 'if' | | |
'implements' | 'import' | 'in' | 'include' | 'inline' | 'instanceof' | 'interface' | | |
'is' | 'lambda' | 'list' | 'map' | 'module' | 'namespace' | 'native' | 'new' | 'next' | | |
'nil' | 'not' | 'oneway' | 'optional' | 'or' | 'pass' | 'print' | 'private' | | |
'protected' | 'public' | 'public' | 'raise' | 'redo' | 'register' | 'required' | | |
'rescue' | 'retry' | 'return' | 'self' | 'service' | 'set' | 'sizeof' | 'static' | | |
'string' | 'struct' | 'super' | 'switch' | 'synchronized' | 'then' | 'this' | | |
'throw' | 'throws' | 'transient' | 'try' | 'typedef' | 'undef' | 'union' | 'union' | | |
'unless' | 'unsigned' | 'until' | 'use' | 'var' | 'virtual' | 'void' | 'volatile' | | |
'when' | 'while' | 'with' | 'xor' | 'yield') | |
not_reserved = ~(Reserved (' ' | '\t' | '\n')) | |
''', | |
{ | |
'Document': Document, | |
'Enum': _make_enum, | |
'Struct': _make_struct, | |
'Union': _make_union, | |
'Exception_': Exception_, | |
'Service': _make_service, | |
'Function': Function, | |
'Field': Field, | |
'BaseTType': BASE_TYPE_MAP.get, | |
'TType': TType | |
}) | |
class Parser(object): | |
def __init__(self, include_dirs=('.',), enable_cache=True): | |
pass | |
def parse(self, path, module_name=None): | |
if not path.endswith('.thrift'): | |
raise ParserError('path {:r} does not have a valid .thrift suffix'.format(path)) | |
if not os.path.exists(path): | |
raise ParserError('path {:r} does not exist'.format(path)) | |
document = self._parse_document(open(path).read()) # TODO: encoding on python 3 | |
pass | |
def _parse_document(self, data, module_name): | |
document = _GRAMMAR(data).Document() | |
for defn in document.definitions: | |
if getattr(defn, '__module__') is _DEFERRED_MODULE_NAME: | |
defn.__module__ = module_name | |
class GrammarError(ValueError): | |
'an error in .thrift format spec' | |
class DuplicateField(GrammarError): | |
'a field was repeated' | |
def test(): | |
# just check that these valid things don't raise an exception | |
_GRAMMAR('union Foo { 1: bar.Bar first 2: string second }').Union() | |
_GRAMMAR('namespace /* */ cpp.noexist /* */ ThriftTest').Namespace() | |
_GRAMMAR('Numberz.ONE').Identifier() | |
_GRAMMAR('Numberz.ONE').ConstValue() | |
_GRAMMAR('const Numberz myNumberz = Numberz.ONE').Const() | |
_GRAMMAR('typedef i64 UserId').Typedef() | |
_GRAMMAR('typedef map<string,Bonk> MapType').Typedef() | |
_GRAMMAR('''Bools { | |
1: bool im_true, | |
2: bool im_false, | |
}''').name_fields() | |
_GRAMMAR('string_thing').Identifier() | |
# assert specific values here | |
assert _GRAMMAR('hello').Identifier() == 'hello' | |
assert _GRAMMAR('namespace py a.b.foo').Namespace() == 'namespace py a.b.foo' | |
assert _GRAMMAR('namespace /* */ cpp.noexist /* */ ThriftTest').Namespace() == 'namespace /* */ cpp.noexist /* */ ThriftTest' | |
assert _GRAMMAR('include "../bar.thrift"').Include() == 'include "../bar.thrift"' | |
return _GRAMMAR('enum Foo { VAL1 = 8 VAL2 = 10 }').Enum() # == Enum(name='Foo', vals=[('VAL1', 8), ('VAL2', 10)]) | |
return _GRAMMAR('struct Foo { 1: bar.Bar first 2: string second }').Struct() # == Struct(name='Foo', fields=[(1, None, 'bar.Bar', 'first', None), (2, None, 'string', 'second', None)]) | |
assert _GRAMMAR('list<Bar>').ListType() == ListType(valt='Bar') | |
assert _GRAMMAR('service Foo { void foo() }').Service() == Service(name='Foo', parent=None, funcs=[Function(name='foo', type='void', fields=[])]) | |
def parse_data(data, module): | |
document = _GRAMMAR(data).Document() | |
for defn in document.definitions: | |
if getattr(defn, '__module__', None) is _DEFERRED_MODULE_NAME: | |
defn.__module__ = module | |
return document | |
def parse_file(path): | |
return parse_data(open(path).read(), path) | |
test() | |
parse_file('ThriftTest.thrift') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment