Skip to content

Instantly share code, notes, and snippets.

@kurtbrose
Last active August 2, 2018 06:07
Show Gist options
  • Save kurtbrose/1a99167917998647a4707b5d475edbbe to your computer and use it in GitHub Desktop.
Save kurtbrose/1a99167917998647a4707b5d475edbbe to your computer and use it in GitHub Desktop.
parser for .thrift files (work in progress)
import collections
import types
import parsley
from thriftpy.thrift import gen_init, TType, TPayload
Document = collections.namedtuple('Document', 'headers definitions')
Exception_ = collections.namedtuple('Exception_', 'name fields')
Function = collections.namedtuple('Function', 'name ttype fields oneway throws')
Field = collections.namedtuple('Field', 'id req ttype name default')
_DEFERRED_MODULE_NAME = '<string>'
def _make_union(name, fields):
cls = _make_empty_struct(name)
return _fill_in_struct(cls, fields)
def _make_enum(name, kvs):
attrs = {'__module__': _DEFERRED_MODULE_NAME, '_ttype': TType.I32}
cls = type(name, (object,), attrs)
_values_to_names = {}
_names_to_values = {}
if kvs:
val = kvs[0][1]
if val is None:
val = -1
for item in kvs:
if item[1] is None:
val = val + 1
else:
val = item[1]
for key, val in kvs:
setattr(cls, key, val)
_values_to_names[val] = key
_names_to_values[key] = val
cls._VALUE_TO_NAMES = _values_to_names
cls._NAMES_TO_VALUES = _names_to_values
return cls
def _make_empty_struct(name, ttype=TType.STRUCT, base_cls=TPayload):
attrs = {'_ttype': ttype, '__module__': _DEFERRED_MODULE_NAME}
return type(name, (base_cls, ), attrs)
def _fill_in_struct(cls, fields, _gen_init=True):
thrift_spec = {}
default_spec = []
_tspec = {}
for field in fields:
if field.id in thrift_spec:
raise DuplicateField('offset {} used by fields {} and {}'.format(
field.id, field.name, thrift_spec[field.id][1]))
if field.name in _tspec:
raise DuplicateField('name {} used twice'.format(field.name))
thrift_spec[field.id] = _ttype_spec(field.ttype, field.name, field.req)
default_spec.append((field.name, field.default))
_tspec[field.name] = field
cls.thrift_spec = thrift_spec
cls.default_spec = default_spec
cls._tspec = _tspec
if _gen_init:
gen_init(cls, thrift_spec, default_spec)
return cls # remember -- assign __module__ after these come back
def _make_struct(name, fields, ttype=TType.STRUCT, base_cls=TPayload,
_gen_init=True):
cls = _make_empty_struct(name, ttype=ttype, base_cls=base_cls)
return _fill_in_struct(cls, fields or (), _gen_init=_gen_init)
# TODO: how to get concrete object here? -- some kinda registry object that is passed around?
def _make_service(name, funcs, extends):
if extends is None:
extends = object
attrs = {'__module__': _DEFERRED_MODULE_NAME}
cls = type(name, (extends,), attrs)
thrift_services = []
for func in funcs:
# args payload cls
args_name = '%s_args' % func.name
args_fields = func.fields
args_cls = _make_struct(args_name, args_fields)
setattr(cls, args_name, args_cls)
# result payload cls
result_name = '%s_result' % func.name
result_throws = func.throws
result_cls = _make_struct(result_name, result_throws,
_gen_init=False)
result_cls.oneway = func.oneway
if func.ttype != TType.VOID:
result_cls.thrift_spec[0] = _ttype_spec(func.ttype, 'success')
result_cls.default_spec.insert(0, ('success', None))
gen_init(result_cls, result_cls.thrift_spec, result_cls.default_spec)
setattr(cls, result_name, result_cls)
thrift_services.append(func.name)
if extends is not None and hasattr(extends, 'thrift_services'):
thrift_services.extend(extends.thrift_services)
cls.thrift_services = thrift_services
return cls
def _ttype_spec(ttype, name, required=False):
if isinstance(ttype, int):
return ttype, name, required
else:
return ttype[0], name, ttype[1], required
BASE_TYPE_MAP = {
'bool': TType.BOOL,
'byte': TType.BYTE,
'i8': TType.BYTE,
'i16': TType.I16,
'i32': TType.I32,
'i64': TType.I64,
'double': TType.DOUBLE,
'string': TType.STRING,
'binary': TType.BINARY
}
_GRAMMAR = parsley.makeGrammar('''
Document = (brk Header)*:hs (brk Definition)*:ds brk -> Document(hs, ds)
Header = <Include | Namespace>
Include = brk <'include' brk Literal>
Namespace = brk <'namespace' brk ((NamespaceScope ('.' Identifier)?)| unsupported_namespacescope) brk Identifier>:hdr brk uri? -> hdr
uri = '(' ws 'uri' ws '=' ws Literal:uri ws ')' -> uri
NamespaceScope = '*' | 'cpp' | 'java' | 'py.twisted' | 'py' | 'perl' | 'rb' | 'cocoa' | 'csharp' | 'xsd' | 'c_glib' | 'js' | 'st' | 'go' | 'php' | 'delphi' | 'lua'
unsupported_namespacescope = Identifier
Definition = brk (Const | Typedef | Enum | Struct | Union | Exception | Service)
Const = 'const' brk FieldType:type brk Identifier:name brk '=' brk ConstValue:val brk ListSeparator? -> 'const', type, name, val
Typedef = 'typedef' brk DefinitionType:type brk Identifier:alias -> 'typedef', type, alias
Enum = 'enum' brk Identifier:name brk '{' enum_item*:vals '}' -> Enum(name, vals)
enum_item = brk Identifier:name brk ('=' brk IntConstant)?:value brk ListSeparator? brk -> name, value
Struct = 'struct' brk name_fields:nf brk immutable? -> Struct(nf[0], nf[1])
Union = 'union' brk name_fields:nf -> Union(nf[0], nf[1])
Exception = 'exception' brk name_fields:nf -> Exception_(nf[0], nf[1])
name_fields = Identifier:name brk '{' (brk Field)*:fields brk '}' -> name, fields
Service = 'service' brk Identifier:name brk ('extends' Identifier)?:extends '{' (brk Function)*:funcs brk '}' -> Service(name, funcs, extends)
Field = brk FieldID:id brk FieldReq?:req brk FieldType:ttype brk Identifier:name brk ('=' brk ConstValue)?:default brk ListSeparator? -> Field(id, req, ttype, name, default)
FieldID = IntConstant:val ':' -> val
FieldReq = 'required' | 'optional' | !('default')
# Functions
Function = 'oneway'?:oneway brk FunctionType:ft brk Identifier:name '(' (brk Field*):fs ')' brk Throws?:throws brk ListSeparator? -> Function(name, ft, fs, oneway, throws)
FunctionType = ('void' !(TType.VOID)) | FieldType
Throws = 'throws' '(' (brk Field)*:fs ')' -> fs
# Types
FieldType = ContainerType | BaseType | StructType
DefinitionType = BaseType | ContainerType
BaseType = ('bool' | 'byte' | 'i8' | 'i16' | 'i32' | 'i64' | 'double' | 'string' | 'binary'):ttype -> BaseTType(ttype)
ContainerType = (MapType | SetType | ListType):type brk immutable? -> type
MapType = 'map' CppType? brk '<' brk FieldType:keyt brk ',' brk FieldType:valt brk '>' -> TType.MAP, (keyt, valt)
SetType = 'set' CppType? brk '<' brk FieldType:valt brk '>' -> TType.SET, valt
ListType = 'list' brk '<' brk FieldType:valt brk '>' brk CppType? -> TType.LIST, valt
StructType = Identifier:name -> TType.STRUCT, name
CppType = 'cpp_type' Literal -> None
# Constant Values
ConstValue = IntConstant | DoubleConstant | ConstList | ConstMap | Literal | Identifier
IntConstant = <('+' | '-')? Digit+>:val -> int(val)
DoubleConstant = <('+' | '-')? (Digit* '.' Digit+) | Digit+ (('E' | 'e') IntConstant)?> -> float(val)
ConstList = '[' (ConstValue:val ListSeparator? -> val)*:vals ']' -> vals
ConstMap = '{' (ConstValue:key ':' ConstValue:val ListSeparator? -> key, val)*:items '}' -> dict(items)
# Basic Definitions
Literal = ('"' <(~'"' anything)*>:val '"') | ("'" <(~"'" anything)*>:val "'") -> val
Identifier = not_reserved <(Letter | '_') (Letter | Digit | '.' | '_')*>
ListSeparator = ',' | ';'
Letter = letter # parsley built-in
Digit = digit # parsley built-in
Comment = cpp_comment | c_comment
brk = <(' ' | '\t' | '\n' | '\r' | c_comment | cpp_comment)*>
cpp_comment = '//' <('\\\n' | (~'\n' anything))*>
c_comment = '/*' <(~'*/' anything)*>:body '*/' -> body
immutable = '(' brk 'python.immutable' brk '=' brk '""' brk ')'
Reserved = ('__CLASS__' | '__DIR__' | '__FILE__' | '__FUNCTION__' | '__LINE__' | '__METHOD__' |
'__NAMESPACE__' | 'abstract' | 'alias' | 'and' | 'args' | 'as' | 'assert' | 'BEGIN' |
'begin' | 'binary' | 'bool' | 'break' | 'byte' | 'case' | 'catch' | 'class' | 'clone' |
'const' | 'continue' | 'declare' | 'def' | 'default' | 'del' | 'delete' | 'do' |
'double' | 'dynamic' | 'elif' | 'else' | 'elseif' | 'elsif' | 'END' | 'end' |
'enddeclare' | 'endfor' | 'endforeach' | 'endif' | 'endswitch' | 'endwhile' | 'ensure' |
'enum' | 'except' | 'exception' | 'exec' | 'extends' | 'finally' | 'float' | 'for' |
'foreach' | 'from' | 'function' | 'global' | 'goto' | 'i16' | 'i32' | 'i64' | 'if' |
'implements' | 'import' | 'in' | 'include' | 'inline' | 'instanceof' | 'interface' |
'is' | 'lambda' | 'list' | 'map' | 'module' | 'namespace' | 'native' | 'new' | 'next' |
'nil' | 'not' | 'oneway' | 'optional' | 'or' | 'pass' | 'print' | 'private' |
'protected' | 'public' | 'public' | 'raise' | 'redo' | 'register' | 'required' |
'rescue' | 'retry' | 'return' | 'self' | 'service' | 'set' | 'sizeof' | 'static' |
'string' | 'struct' | 'super' | 'switch' | 'synchronized' | 'then' | 'this' |
'throw' | 'throws' | 'transient' | 'try' | 'typedef' | 'undef' | 'union' | 'union' |
'unless' | 'unsigned' | 'until' | 'use' | 'var' | 'virtual' | 'void' | 'volatile' |
'when' | 'while' | 'with' | 'xor' | 'yield')
not_reserved = ~(Reserved (' ' | '\t' | '\n'))
''',
{
'Document': Document,
'Enum': _make_enum,
'Struct': _make_struct,
'Union': _make_union,
'Exception_': Exception_,
'Service': _make_service,
'Function': Function,
'Field': Field,
'BaseTType': BASE_TYPE_MAP.get,
'TType': TType
})
class Parser(object):
def __init__(self, include_dirs=('.',), enable_cache=True):
pass
def parse(self, path, module_name=None):
if not path.endswith('.thrift'):
raise ParserError('path {:r} does not have a valid .thrift suffix'.format(path))
if not os.path.exists(path):
raise ParserError('path {:r} does not exist'.format(path))
document = self._parse_document(open(path).read()) # TODO: encoding on python 3
pass
def _parse_document(self, data, module_name):
document = _GRAMMAR(data).Document()
for defn in document.definitions:
if getattr(defn, '__module__') is _DEFERRED_MODULE_NAME:
defn.__module__ = module_name
class GrammarError(ValueError):
'an error in .thrift format spec'
class DuplicateField(GrammarError):
'a field was repeated'
def test():
# just check that these valid things don't raise an exception
_GRAMMAR('union Foo { 1: bar.Bar first 2: string second }').Union()
_GRAMMAR('namespace /* */ cpp.noexist /* */ ThriftTest').Namespace()
_GRAMMAR('Numberz.ONE').Identifier()
_GRAMMAR('Numberz.ONE').ConstValue()
_GRAMMAR('const Numberz myNumberz = Numberz.ONE').Const()
_GRAMMAR('typedef i64 UserId').Typedef()
_GRAMMAR('typedef map<string,Bonk> MapType').Typedef()
_GRAMMAR('''Bools {
1: bool im_true,
2: bool im_false,
}''').name_fields()
_GRAMMAR('string_thing').Identifier()
# assert specific values here
assert _GRAMMAR('hello').Identifier() == 'hello'
assert _GRAMMAR('namespace py a.b.foo').Namespace() == 'namespace py a.b.foo'
assert _GRAMMAR('namespace /* */ cpp.noexist /* */ ThriftTest').Namespace() == 'namespace /* */ cpp.noexist /* */ ThriftTest'
assert _GRAMMAR('include "../bar.thrift"').Include() == 'include "../bar.thrift"'
return _GRAMMAR('enum Foo { VAL1 = 8 VAL2 = 10 }').Enum() # == Enum(name='Foo', vals=[('VAL1', 8), ('VAL2', 10)])
return _GRAMMAR('struct Foo { 1: bar.Bar first 2: string second }').Struct() # == Struct(name='Foo', fields=[(1, None, 'bar.Bar', 'first', None), (2, None, 'string', 'second', None)])
assert _GRAMMAR('list<Bar>').ListType() == ListType(valt='Bar')
assert _GRAMMAR('service Foo { void foo() }').Service() == Service(name='Foo', parent=None, funcs=[Function(name='foo', type='void', fields=[])])
def parse_data(data, module):
document = _GRAMMAR(data).Document()
for defn in document.definitions:
if getattr(defn, '__module__', None) is _DEFERRED_MODULE_NAME:
defn.__module__ = module
return document
def parse_file(path):
return parse_data(open(path).read(), path)
test()
parse_file('ThriftTest.thrift')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment