Skip to content

Instantly share code, notes, and snippets.

@MartinNowak
Created March 12, 2012 05:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MartinNowak/2019921 to your computer and use it in GitHub Desktop.
Save MartinNowak/2019921 to your computer and use it in GitHub Desktop.
module lexer;
import std.algorithm, std.conv, std.exception, std.functional, std.range, std.string, std.traits, std.typetuple;
/*
* TokenDeclaration
* name - used to define enum member
* pattern - used as lexer pattern
* action - expression to evaluate when pattern is matched
*/
struct TokenDeclaration(R)
{
string _name;
R _pattern;
string _action;
}
/*
* First stage of lexer. Defines an enum from the token names.
* Template Parameters:
* R - type of input range
* TokenSpec - An array which is used to declare an enum and
* define matching patterns. Entries are sequential. Each
* entry should consist of a name of type string, a pattern
* of type R and optionally an action of type string. A
* pattern might be empty in which case an enum member is
* declared but it is never matched. Names can be duplicated
* to declare multiple patterns that match the same token.
* Actions need to be a string expression of a callable with
* the signature Token(Tok id, ref R start, size_t len).
Example:
----
alias TokenDeclaration!string tok;
enum TokenSpec = [
tok("gt" , ">" , ),
tok("ge" , ">=", ),
tok("lt" , "<" , ),
tok("le" , "<=", ),
tok("plus" , "a+", q{binary!'+'}),
tok("plus" , "+a", q{unary!'+'} ),
tok("tilde", "~" ,
];
----
*/
template Lexer1(R, alias decls)
{
alias defineToken!(eagerMap!q{a._name}(decls)) Lexer1;
}
/*
* Second stage of lexer. Declares lexer fields, methods and range interface.
* This mixin is intended to be instatiated at struct scope.
* Template Parameters:
* R - type of input range
* defaultAction - called when a token pattern is matched
* skipAction - called once when input is set and then after each match
* recoverAction - called when input could not be matched
* TokenSpec - same as for Lexer1
*
Example:
----
module beanlexer;
import std.conv, std.stdio, std.range, std.typetuple, lexer;
struct Bean
{
int opCmp(in Bean rhs) const
{
return c - rhs.c;
}
dchar c;
}
struct BeanLexer
{
alias TokenDeclaration!(Bean[]) tok;
enum TokenSpec = [
tok("hello" , [Bean('H'), Bean('e'), Bean('l'), Bean('l'), Bean('o')]),
tok("world" , [Bean('w'), Bean('o'), Bean('r'), Bean('l'), Bean('d')]),
tok("escape", [Bean('$')], q{onEscape}),
tok("error" , null),
tok("", [Bean('A')], q{onA}),
];
alias Lexer1!(Bean[], TokenSpec) Tok;
struct Token
{
this(Tok id, Bean[] match)
{
assert(id != Tok.error);
_id = id;
_match = match;
}
this(Tok id, string text)
{
assert(id == Tok.error);
_id = id;
_text = text;
}
string toString()
{
if (_id == Tok.error)
return "Error: " ~ _text;
else
return to!string(_match);
}
Tok _id;
union
{
Bean[] _match;
string _text;
}
}
// Hook for constructing a custom user value
Token defaultMatch(Tok id, ref Bean[] input, size_t len)
{
auto tok = Token(id, input[0 .. len]);
input.popFrontN(len);
return tok;
}
// Hook for skipping input
void skip(ref Bean[] input)
{
// capture is empty so we can skip right away
while (!input.empty && input.front == Bean(' '))
input.popFront;
}
// Fallthrough hook
Token noMatch(ref Bean[] input)
{
auto saved = input.save;
// consume input up to the next space or until end
while (!input.empty && input.front != Bean(' '))
input.popFront;
// return an error token
return Token(Tok.error, "Can't match input: " ~ to!string(saved[0 .. $ - input.length]));
}
Token onEscape(Tok id, ref Bean[] input, size_t len)
{
input.popFrontN(len);
// skip opening braces '{'
if (input.empty || input.front != Bean('{'))
return Token(Tok.error, "Missing '{' after escape '$'.");
input.popFront;
auto start = input.save;
// save everything from front to the next '}'
while (!input.empty && input.front != Bean('}'))
input.popFront;
if (input.empty)
return Token(Tok.error, "Missing '}' after escape sequence.");
auto res = Token(Tok.escape, start[0 .. $ - input.length]);
input.popFront; // skip '}'
return res;
}
Token onA(ref Bean[] input, size_t len)
{
auto tok = Token(Tok.hello, input[0 .. len]);
input.popFrontN(len);
return tok;
}
mixin Lexer2!(Bean[], defaultMatch, skip, noMatch, TokenSpec);
}
void main()
{
BeanLexer lexer;
alias BeanLexer.Tok Tok;
auto input = [
Bean('H'), Bean('e'), Bean('l'), Bean('l'), Bean('o'), Bean(' '),
Bean('$'), Bean('{'), Bean('n'), Bean('a'), Bean('m'), Bean('e'), Bean('}'),
Bean('A'),
];
lexer.input = input;
foreach(tok; lexer)
{
writeln(tok);
}
}
----
*/
mixin template Lexer2
(R, alias defaultAction, alias skipAction, alias recoverAction, alias decls) if(decls.length)
{
import std.range, std.traits;
alias defineToken!(eagerMap!q{a._name}(decls)) _TokId;
alias typeof(defaultAction(_TokId.init, _input, 0)) Token;
this(R input)
{
this.input = input;
}
/*
* Reset the input of the lexer. Will initially call skipAction
* and clear any buffered token.
*/
@property void input(R input)
{
_input = input;
skipAction(_input);
_token = Token.init;
_hasFront = false;
}
@property R input()
{
return _input;
}
// Range interface
bool empty()
{
return !_hasFront && _input.empty;
}
void popFront()
{
_hasFront = false;
}
Token front()
{
if (!_hasFront)
{
_token = nextToken(_input);
skipAction(_input);
_hasFront = true;
}
return _token;
}
private:
Token nextToken(ref R input)
{
auto saved = input.save;
auto tok = nextTokenImpl(saved, input.save);
input = saved;
return tok;
}
Token nextTokenImpl(ref R saved, R input)
{
enum _trie = makeTrie(decls);
enum _elems = _trie.elems; // needed by mixin code
mixin(_trie.makeCode());
}
R _input;
Token _token;
bool _hasFront;
}
/*
* Turns sorted pattern definitions into a trie like switch function.
*/
Trie!R makeTrie(R)(TokenDeclaration!R[] decls)
// public to be accessible from mixin
{
Trie!R trie;
foreach(d; decls)
{
if (d._pattern.empty)
continue;
string actexp;
if (d._action.empty)
{
assert(!d._name.empty, "Cannot have empty name without action.");
actexp = "defaultAction(_TokId."~d._name~", saved, "~to!string(walkLength(d._pattern))~")";
}
else if (!d._name.empty)
actexp = d._action ~ "(_TokId."~d._name~", saved, "~to!string(walkLength(d._pattern))~")";
else
actexp = d._action ~ "(saved, "~to!string(walkLength(d._pattern))~")";
trie.insert(d._pattern, actexp);
}
return trie;
}
auto eagerMap(alias fun, R)(R r)
// public to be accessible from mixin
{
alias unaryFun!fun _fun;
alias typeof(_fun(ElementType!(R).init)) E;
static if (hasLength!R)
{
E[] result;
result.length = r.length;
foreach(i, e; r)
result[i] = _fun(e);
}
else
{
E[] result;
foreach(e; r)
result ~= _fun(e);
}
return result;
}
private:
template defineToken(alias names) if(names.length)
{
mixin(enumMixin(names));
}
string enumMixin()(string[] names)
{
/*
* Make enum members unique. Use index sort to preserve the
* declared order.
*/
auto sorted = makeIndex(names);
auto src = sorted;
auto tgt = sorted;
while (!src.empty)
{
immutable idx = src.front;
src.popFront;
while (!src.empty && names[idx] == names[src.front])
src.popFront;
if (names[idx].empty)
continue;
tgt.front = idx;
tgt.popFront;
}
auto onames = sort!compare(sorted[0 .. $-tgt.length]);
string res = "enum defineToken\n{\n";
foreach(idx; onames)
{
res ~= " " ~ names[idx] ~ ",\n";
}
res ~= "}\n";
return res;
}
/*
* IR for sorting patterns and associating actions.
*/
struct Trie(R)
{
alias ElementType!R E;
static struct Node
{
ref Node getNode(E e)
{
auto idx = transitionIndex!q{a.elem < b}(children, e);
if (idx == children.length || children[idx].elem != e)
children = children[0 .. idx] ~ Node(e) ~ children[idx .. $];
return children[idx];
}
E elem;
string payload;
Node[] children;
}
void insert(R pattern, string payload)
{
Node *p = &root;
foreach(e; pattern)
{
auto eidx = transitionIndex(elems, e);
if (eidx == elems.length || elems[eidx] != e)
{
version (none) // BUG 6815
{
elems = elems[0 .. eidx] ~ e ~ elems[eidx .. $];
}
else
{
E[] nelems = [];
if (eidx)
nelems = elems[0 .. eidx];
nelems ~= e;
if (eidx < elems.length)
nelems ~= elems[eidx .. $];
elems = nelems;
}
}
p = &p.getNode(e);
}
p.payload = payload;
}
string makeCode() const
{
enum tabw = 4;
enum spaces = " "; // 64
string code;
size_t indent;
void write(string s, size_t add=0)
{
add += tabw * indent;
foreach(_; 0 .. add / spaces.length)
code ~= spaces;
code ~= spaces[0 .. add % spaces.length];
code ~= s;
}
void writeln(string s, size_t add=0)
{
if (s.length)
write(s, add);
code ~= std.ascii.newline;
}
string elemString(E e)
{
// try to provide readable elements
static if (is(E == dchar))
return charLit(e);
else static if (isIntegral!E)
return to!string(e);
else
{ // fall back on index into constant _elems
immutable idx = transitionIndex(elems, e);
assert(elems[idx] == e);
return "_elems["~to!string(idx)~"]";
}
}
void emit(in Node[] children)
{
auto cmp = elemString(children[$/2].elem);
if (children.length >= 2)
{ // narrow interval using a binary search
writeln("if (e < "~cmp~")");
writeln("{");
++indent;
emit(children[0 .. $/2]);
--indent;
writeln("}");
writeln("else");
writeln("{");
++indent;
emit(children[$/2 .. $]);
--indent;
writeln("}");
}
else
{ // recurse on pattern if not exhausted, add
// fallthrough return for accepting nodes
auto node = children.front;
writeln("if (e == "~cmp~")");
writeln("{");
if (!node.children.empty)
{
++indent;
writeln("if (!input.empty)");
writeln("{");
++indent;
writeln("e = input.front;");
writeln("input.popFront;");
emit(node.children);
--indent;
writeln("}");
--indent;
}
if (node.payload)
{
++indent;
writeln("return "~node.payload~";");
--indent;
}
writeln("}");
}
}
// save input, get first element and recurse on root, recover
// on fallthrough
writeln("if (!input.empty)");
writeln("{");
++indent;
writeln("Unqual!(ElementType!(typeof(input))) e = input.front;");
writeln("input.popFront;");
emit(root.children);
--indent;
writeln("}");
writeln("return recoverAction(saved);");
return code;
}
E[] elems;
Node root;
}
/*
* Returns a string that will be lexed as $(D_Param c).
*/
public string charLit()(dchar c)
{
string res;
void put(string s) { res ~= s; }
put("'");
if (std.uni.isGraphical(c))
{
if (c == '\"' || c == '\\' || c == '\'')
put("\\");
put(to!string(c));
}
else
{
static char toHex(uint v)
{
assert(v < 16);
return cast(char)(v < 10 ? '0' + v : 'a' + v - 10);
}
put("\\");
switch (c)
{
case '\a': put("a"); break;
case '\b': put("b"); break;
case '\f': put("f"); break;
case '\n': put("n"); break;
case '\r': put("r"); break;
case '\t': put("t"); break;
case '\v': put("v"); break;
default:
char[8] buf;
size_t cnt;
if (c <= 0xFF)
{ put("x");
cnt = 2;
}
else if (c <= 0xFFFF)
{ put("u");
cnt = 4;
}
else
{ put("U");
cnt = 8;
}
version (BigEndian)
{
import std.bitmanip;
c = swapEndian(c);
}
foreach_reverse(i; 0 .. cnt)
{
buf[i] = toHex(c & 0xF);
c >>= 4;
}
put(cast(string)buf[0 .. cnt]);
}
}
put("'");
return res;
}
int compare(R)(R lhs, R rhs) if(isInputRange!R)
{
for (; !lhs.empty && !rhs.empty; lhs.popFront, rhs.popFront)
{
if (lhs.front < rhs.front)
return -1;
else if (lhs.front == rhs.front)
continue;
else
return 1;
}
if (lhs.empty && rhs.empty)
return 0;
else if (lhs.empty)
return -1;
else
return 1;
}
int compare(R)(R lhs, R rhs) if(!isInputRange!R)
{
if (lhs < rhs) return -1;
else if (lhs == rhs) return 0;
else return 1;
}
R[] sort(alias pred, R)(R[] range)
{
if (range.length <= 1)
return range;
R[] lessEq, greater;
auto pivot = range.front;
range.popFront;
foreach(e; range)
{
if (pred(e, pivot) == 1)
greater ~= e;
else
lessEq ~= e;
}
return sort!pred(lessEq) ~ pivot ~ sort!pred(greater);
}
unittest {
auto toSort = "qwertzuiopasdfghjklyxcvbnm"d.dup;
assert(sort!compare(toSort) == "abcdefghijklmnopqrstuvwxyz"d);
}
size_t[] makeIndex(R)(R[] range)
{
size_t[] indices;
indices.length = range.length;
foreach(i; 0 .. range.length)
indices[i] = i;
int pred(size_t a, size_t b)
{
return compare(range[a], range[b]);
}
return sort!pred(indices);
}
size_t transitionIndex(alias pred="a < b", R, E)(R haystack, E needle)
{
alias binaryFun!pred _pred;
size_t lo = 0, hi = haystack.length;
while (lo < hi)
{
immutable it = lo + (hi - lo) / 2;
if (_pred(haystack[it], needle))
lo = it + 1;
else
hi = it;
}
return lo;
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* Contains some contributions under the Thrift Software License.
* Please see doc/old-thrift-license.txt in the Thrift distribution for
* details.
*/
namespace c_glib TTest
namespace java thrift.test
namespace cpp thrift.test
namespace rb Thrift.Test
namespace perl ThriftTest
namespace csharp Thrift.Test
namespace js ThriftTest
namespace st ThriftTest
namespace py ThriftTest
namespace py.twisted ThriftTest
namespace go ThriftTest
namespace php ThriftTest
namespace delphi Thrift.Test
namespace cocoa ThriftTest
namespace * thrift.test
/**
* Docstring!
*/
enum Numberz
{
ONE = 1,
TWO,
THREE,
FIVE = 5,
SIX,
EIGHT = 8
}
const Numberz myNumberz = Numberz.ONE;
// the following is expected to fail:
// const Numberz urNumberz = ONE;
typedef i64 UserId
struct Bonk
{
1: string message,
2: i32 type
}
struct Bools {
1: bool im_true,
2: bool im_false,
}
struct Xtruct
{
1: string string_thing,
4: byte byte_thing,
9: i32 i32_thing,
11: i64 i64_thing
}
struct Xtruct2
{
1: byte byte_thing,
2: Xtruct struct_thing,
3: i32 i32_thing
}
struct Xtruct3
{
1: string string_thing,
4: i32 changed,
9: i32 i32_thing,
11: i64 i64_thing
}
struct Insanity
{
1: map<Numberz, UserId> userMap,
2: list<Xtruct> xtructs
}
struct CrazyNesting {
1: string string_field,
2: optional set<Insanity> set_field,
3: required list< map<set<i32>,map<i32,set<list<map<Insanity,string>>>>>> list_field,
4: binary binary_field
}
exception Xception {
1: i32 errorCode,
2: string message
}
exception Xception2 {
1: i32 errorCode,
2: Xtruct struct_thing
}
struct EmptyStruct {}
struct OneField {
1: EmptyStruct field
}
service ThriftTest
{
void testVoid(),
string testString(1: string thing),
byte testByte(1: byte thing),
i32 testI32(1: i32 thing),
i64 testI64(1: i64 thing),
double testDouble(1: double thing),
Xtruct testStruct(1: Xtruct thing),
Xtruct2 testNest(1: Xtruct2 thing),
map<i32,i32> testMap(1: map<i32,i32> thing),
map<string,string> testStringMap(1: map<string,string> thing),
set<i32> testSet(1: set<i32> thing),
list<i32> testList(1: list<i32> thing),
Numberz testEnum(1: Numberz thing),
UserId testTypedef(1: UserId thing),
map<i32,map<i32,i32>> testMapMap(1: i32 hello),
/* So you think you've got this all worked, out eh? */
map<UserId, map<Numberz,Insanity>> testInsanity(1: Insanity argument),
/* Multiple parameters */
Xtruct testMulti(1: byte arg0, 2: i32 arg1, 3: i64 arg2, 4: map<i16, string> arg3, 5: Numberz arg4, 6: UserId arg5),
/* Exception specifier */
void testException(1: string arg) throws(1: Xception err1),
/* Multiple exceptions specifier */
Xtruct testMultiException(1: string arg0, 2: string arg1) throws(1: Xception err1, 2: Xception2 err2)
/* Test oneway void */
oneway void testOneway(1:i32 secondsToSleep)
}
service SecondService
{
void blahBlah()
}
struct VersioningTestV1 {
1: i32 begin_in_both,
3: string old_string,
12: i32 end_in_both
}
struct VersioningTestV2 {
1: i32 begin_in_both,
2: i32 newint,
3: byte newbyte,
4: i16 newshort,
5: i64 newlong,
6: double newdouble
7: Bonk newstruct,
8: list<i32> newlist,
9: set<i32> newset,
10: map<i32, i32> newmap,
11: string newstring,
12: i32 end_in_both
}
struct ListTypeVersioningV1 {
1: list<i32> myints;
2: string hello;
}
struct ListTypeVersioningV2 {
1: list<string> strings;
2: string hello;
}
struct GuessProtocolStruct {
7: map<string,string> map_field,
}
struct LargeDeltas {
1: Bools b1,
10: Bools b10,
100: Bools b100,
500: bool check_true,
1000: Bools b1000,
1500: bool check_false,
2000: VersioningTestV2 vertwo2000,
2500: set<string> a_set2500,
3000: VersioningTestV2 vertwo3000,
4000: list<i32> big_numbers
}
struct NestedListsI32x2 {
1: list<list<i32>> integerlist
}
struct NestedListsI32x3 {
1: list<list<list<i32>>> integerlist
}
struct NestedMixedx2 {
1: list<set<i32>> int_set_list
2: map<i32,set<string>> map_int_strset
3: list<map<i32,set<string>>> map_int_strset_list
}
struct ListBonks {
1: list<Bonk> bonk
}
struct NestedListsBonk {
1: list<list<list<Bonk>>> bonk
}
struct BoolTest {
1: optional bool b = true;
2: optional string s = "true";
}
struct StructA {
1: required string s;
}
struct StructB {
1: optional StructA aa;
2: required StructA ab;
}
module thrift_idl;
import lexer, std.algorithm, std.conv, std.functional,
std.range, std.typetuple, std.typecons;
alias short i16;
alias int i32;
alias long i64;
alias ubyte[] binary;
alias void[0] SetDummy;
template ThriftIDL(string text)
{
mixin(DEmitter.codegen(text));
}
string takeUntil(alias pred)(ref string input)
{
auto text = input.save;
static if (is(typeof(pred) == string))
input = input.find(pred);
else
input = input.find!(pred)();
return text[0 .. $ - input.length];
}
template SIota(size_t i) if (i)
{
alias TypeTuple!(SIota!(i - 1), i - 1) SIota;
}
template SIota(size_t i) if (!i)
{
alias TypeTuple!() SIota;
}
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// Lexer
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
struct ThriftLexer
{
import std.ascii;
alias TokenDeclaration!string tok;
enum Tokens = [
tok("Lcurly" , "{" ),
tok("Rcurly" , "}" ),
tok("Lparen" , "(" ),
tok("Rparen" , ")" ),
tok("Lbracket" , "[" ),
tok("Rbracket" , "]" ),
tok("Semicolon" , ";" ),
tok("Colon" , ":" ),
tok("Comma" , "," ),
tok("Dot" , "." ),
tok("Assign" , "=" ),
tok("Lt" , "<" ),
tok("Gt" , ">" ),
tok("Star" , "*" ),
tok("String" , "\"" , q{upto!"\""} ),
tok("String" , "'" , q{upto!"'"} ),
tok("Comment" , "#" , q{upto!isNewLine} ),
tok("Comment" , "//" , q{upto!isNewLine} ),
tok("Comment" , "/*" , q{upto!"*/"} ),
];
enum BaseTypes = [
tok("Bool"),
tok("Byte"),
tok("i16"),
tok("i32"),
tok("i64"),
tok("Double"),
tok("String"),
tok("Binary"),
tok("SList"),
tok("Void"),
];
enum Keywords = [
tok("Include"),
tok("CPP_Include"),
tok("Namespace"),
tok("XSD_Namespace"),
tok("PHP_Namespace"),
tok("Const"),
tok("TypeDef"),
tok("Enum"),
tok("SEnum"),
tok("Struct"),
tok("XSD_ANY"),
tok("Required"),
tok("Optional"),
tok("Exception"),
tok("Service"),
tok("Extends"),
tok("Oneway"),
tok("Throws"),
];
enum Other = [
tok("Identifier"),
tok("Integral"),
tok("Floating"),
];
enum TokenSpec = Tokens ~ BaseTypes ~ Keywords ~ Other;
alias Lexer1!(string, TokenSpec) Tok;
static struct Token
{
Tok _id;
string _text;
}
static Token upto(alias pred)(Tok id, ref string input, size_t len)
{
auto text = input.save;
input.popFrontN(len);
input.takeUntil!pred();
static if (is(typeof(pred) == string))
input = input[pred.length .. $];
else
input.popFront;
text = text[0 .. $ - input.length];
return Token(id, text);
}
static bool isNewLine(dchar c)
{
switch (c)
{
case '\n', '\r', std.uni.lineSep, std.uni.paraSep:
return true;
default:
return false;
}
}
Token defaultAction(Tok id, ref string input, size_t len)
{
auto tok = Token(id, null);
popFrontN(input, len);
return tok;
}
void skip(ref string input)
{
input.takeUntil!(not!isWhite)();
}
Token lexFallThrough(ref string input)
{
auto c = input.front;
if (isAlpha(c) || c == '_')
{
auto id = input.takeUntil!((c) => !isAlphaNum(c) && c != '_' && c != '.')();
if (auto pid = id in keywords)
return Token(*pid, null);
return Token(Tok.Identifier, id);
}
else if (isDigit(input.front))
{
auto id = input.takeUntil!(not!isDigit)();
return Token(Tok.Integral, id);
}
else
{
assert(0, "Lexical error\n" ~ input);
}
}
/*
* Second stage of lexer template. Provides the actual definitions
* and methods. It adds a range compatible interface for token
* lexing. It will also instatiate our actions which is fine now,
* because the functions are defined.
*/
mixin Lexer2!(string, defaultAction, skip, lexFallThrough, TokenSpec);
string _input;
static string kwTab()
{
string res = "[\n";
foreach(decl; Keywords)
{
res ~= "\"" ~ std.string.toLower(decl._name) ~ "\" : Tok." ~ decl._name ~ ",\n";
}
res ~= "]";
return res;
}
enum keywords = mixin(kwTab());
}
alias ThriftLexer.Tok Tok;
alias ThriftLexer.Token Token;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// Parser
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
Root parseThrift(string text)
{
version (none)
{ // nested struct with ctor not supported by CTFE
auto lexer = filter!(a => a._id != Tok.Comment)(ThriftLexer(text));
}
else
{
static struct Filter
{
this(ThriftLexer inp)
{
_inp = inp;
while (!empty && _inp.front._id == Tok.Comment)
_inp.popFront;
}
@property bool empty() { return _inp.empty; }
Token front() { return _inp.front; }
void popFront() { do { _inp.popFront; } while (!_inp.empty && _inp.front._id == Tok.Comment); }
ThriftLexer _inp;
}
auto lexer = Filter(ThriftLexer(text));
}
Node[] nodes;
while (!lexer.empty)
{
if (auto h = parseHeader(lexer))
nodes ~= h;
else
break;
}
while (!lexer.empty)
nodes ~= parseDefinition(lexer);
return new Root(nodes);
}
//................................................................................
// Parse utils
Token expect(R)(ref R lexer, Tok id)
{
assert(!lexer.empty);
Token res = lexer.front;
assert(res._id == id, to!string(res._id));
lexer.popFront;
return res;
}
bool peekEquals(R)(ref R lexer, Tok[] ids...)
{
auto save = lexer/*.save*/;
foreach(id; ids)
{
if (save.empty || save.front._id != id)
return false;
save.popFront;
}
return true;
}
bool skipAny(R)(ref R lexer, Tok[] ids...)
{
if (!lexer.empty && ids.canFind(lexer.front._id))
{
lexer.popFront;
return true;
}
return false;
}
Node parseHeader(R)(ref R lexer)
{
auto tok = lexer.front;
switch (tok._id)
{
case Tok.CPP_Include:
lexer.popFront;
return new CppInclude(expect(lexer, Tok.String)._text);
case Tok.Include:
lexer.popFront;
return new Include(expect(lexer, Tok.String)._text);
case Tok.Namespace:
lexer.popFront;
auto nscope = parseNamespaceScope(lexer);
auto ident = expect(lexer, Tok.Identifier)._text;
return new Namespace(nscope, ident);
case Tok.PHP_Namespace:
throw new Exception("'php_namespace' is deprecated, use 'namespace php' instead.");
case Tok.XSD_Namespace:
throw new Exception("'xsd_namespace' is deprecated, use 'namespace xsd' instead.");
default:
return null;
}
}
Node parseDefinition(R)(ref R lexer)
{
auto tok = lexer.front;
switch (tok._id)
{
case Tok.Const:
lexer.popFront;
auto ft = parseType(lexer);
auto id = expect(lexer, Tok.Identifier)._text;
expect(lexer, Tok.Assign);
auto val = parseConstValue(lexer);
skipAny(lexer, Tok.Comma, Tok.Semicolon);
return new Const(ft, id, val);
case Tok.TypeDef:
lexer.popFront;
auto ft = parseType(lexer);
auto id = expect(lexer, Tok.Identifier)._text;
return new Typedef_(ft, id);
case Tok.Enum:
lexer.popFront;
auto id = expect(lexer, Tok.Identifier)._text;
expect(lexer, Tok.Lcurly);
auto mems = parseEnum(lexer);
expect(lexer, Tok.Rcurly);
return new Enum(id, mems);
case Tok.SEnum:
lexer.popFront;
auto id = expect(lexer, Tok.Identifier)._text;
expect(lexer, Tok.Lcurly);
auto mems = parseSEnum(lexer);
expect(lexer, Tok.Rcurly);
return new Senum(id, mems);
case Tok.Struct:
lexer.popFront;
auto id = expect(lexer, Tok.Identifier)._text;
expect(lexer, Tok.Lcurly);
auto fields = parseFields(lexer, Tok.Rcurly);
return new Struct(id, fields);
case Tok.Exception:
lexer.popFront;
auto id = expect(lexer, Tok.Identifier)._text;
expect(lexer, Tok.Lcurly);
auto fields = parseFields(lexer, Tok.Rcurly);
return new Exception_(id, fields);
case Tok.Service:
lexer.popFront;
auto id = expect(lexer, Tok.Identifier)._text;
string base;
if (peekEquals(lexer, Tok.Extends))
{ lexer.popFront;
base = expect(lexer, Tok.Identifier)._text;
}
expect(lexer, Tok.Lcurly);
auto funcs = parseFunctions(lexer);
expect(lexer, Tok.Rcurly);
return new Service(id, base, funcs);
default:
assert(0, to!string(tok._id));
}
}
Node parseType(R)(ref R lexer)
{
auto tok = lexer.front;
switch (tok._id)
{
case Tok.Identifier:
string cppType;
switch (tok._text)
{
default:
return new TypeIdent(expect(lexer, Tok.Identifier)._text);
case "map":
lexer.popFront;
cppType = optCppType(lexer);
expect(lexer, Tok.Lt);
auto kt = parseType(lexer);
expect(lexer, Tok.Comma);
auto vt = parseType(lexer);
expect(lexer, Tok.Gt);
return new MapType(kt, vt, cppType);
case "set":
lexer.popFront;
cppType = optCppType(lexer);
expect(lexer, Tok.Lt);
auto kt = parseType(lexer);
expect(lexer, Tok.Gt);
return new SetType(kt, cppType);
case "list":
lexer.popFront;
expect(lexer, Tok.Lt);
auto vt = parseType(lexer);
expect(lexer, Tok.Gt);
cppType = optCppType(lexer);
return new ListType(vt, cppType);
}
foreach(i; SIota!(ThriftLexer.BaseTypes.length))
case mixin(`Tok.`~ThriftLexer.BaseTypes[i]._name):
lexer.popFront;
return new BaseType(tok._id);
default:
assert(0, to!string(tok._id));
}
}
string optCppType(R)(ref R lexer)
{
if (!lexer.empty)
{
auto tok = lexer.front;
if (tok._id == Tok.Identifier && tok._text == "cpp_type")
{
lexer.popFront;
return expect(lexer, Tok.String)._text;
}
}
return null;
}
string parseNamespaceScope(R)(ref R lexer)
{
if (peekEquals(lexer, Tok.Star))
{ lexer.popFront;
return "*";
}
else
return expect(lexer, Tok.Identifier)._text;
}
Node parseConstValue(R)(ref R lexer)
{
Node result;
auto tok = lexer.front;
switch (tok._id)
{
case Tok.Integral:
result = new Integral(parse!long(tok._text));
break;
case Tok.Floating:
result = new Floating(parse!double(tok._text));
break;
case Tok.String:
result = new String(tok._text);
break;
case Tok.Identifier:
return new ValueIdent(expect(lexer, Tok.Identifier)._text);
case Tok.Lbracket:
lexer.popFront;
Node[] list;
while (1)
{
auto v = parseConstValue(lexer);
skipAny(lexer, Tok.Comma, Tok.Semicolon);
if (v is null)
break;
list ~= v;
}
expect(lexer, Tok.Rbracket);
return new List(list);
case Tok.Lcurly:
lexer.popFront;
Node[] keys;
Node[] values;
while (1)
{
if (auto k = parseConstValue(lexer))
{
expect(lexer, Tok.Colon);
auto v = parseConstValue(lexer);
assert(v !is null);
keys ~= k;
values ~= v;
skipAny(lexer, Tok.Comma, Tok.Semicolon);
}
else
break;
}
expect(lexer, Tok.Rcurly);
return new Map(keys, values);
default:
return result;
}
lexer.popFront;
return result;
}
Tuple!(string, long)[] parseEnum(R)(ref R lexer)
{
typeof(return) result;
long val;
while (peekEquals(lexer, Tok.Identifier))
{
auto id = lexer.front._text; lexer.popFront;
if (skipAny(lexer, Tok.Assign))
val = parse!long(expect(lexer, Tok.Integral)._text);
result ~= tuple(id, val++);
skipAny(lexer, Tok.Comma, Tok.Semicolon);
}
return result;
}
string[] parseSEnum(R)(ref R lexer)
{
typeof(return) result;
long val;
while (peekEquals(lexer, Tok.String))
{
result ~= lexer.front._text;
lexer.popFront;
skipAny(lexer, Tok.Comma, Tok.Semicolon);
}
return result;
}
Field[] parseFields(R)(ref R lexer, Tok end)
{
typeof(return) result;
while (!peekEquals(lexer, end))
{
int fid;
if (peekEquals(lexer, Tok.Integral))
{
fid = parse!int(lexer.front._text);
lexer.popFront;
expect(lexer, Tok.Colon);
}
FieldReq req;
if (peekEquals(lexer, Tok.Required))
req = FieldReq.required;
else if (peekEquals(lexer, Tok.Optional))
req = FieldReq.optional;
else
goto Lnoreq;
lexer.popFront;
Lnoreq:
auto ft = parseType(lexer);
auto id = expect(lexer, Tok.Identifier)._text;
Node val;
if (peekEquals(lexer, Tok.Assign))
{
lexer.popFront;
val = parseConstValue(lexer);
}
result ~= new Field(fid, req, ft, id, val);
skipAny(lexer, Tok.Comma, Tok.Semicolon);
}
assert(lexer.front._id == end);
lexer.popFront;
return result;
}
Node[] parseFunctions(R)(ref R lexer)
{
typeof(return) result;
while (!peekEquals(lexer, Tok.Rcurly))
{
bool oneway;
if (skipAny(lexer, Tok.Oneway))
oneway = true;
auto ft = parseType(lexer);
auto id = expect(lexer, Tok.Identifier)._text;
expect(lexer, Tok.Lparen);
auto params = parseFields(lexer, Tok.Rparen);
Field[] exceptions;
if (peekEquals(lexer, Tok.Throws))
{ lexer.popFront;
expect(lexer, Tok.Lparen);
exceptions = parseFields(lexer, Tok.Rparen);
}
result ~= new Function(ft, id, params, exceptions, oneway);
skipAny(lexer, Tok.Comma, Tok.Semicolon);
}
return result;
}
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
class Root
{
this(Node[] members)
{
_members = members;
}
override string toString()
{
return to!string(_members);
}
void apply(Visitor vistor)
{
foreach(m; _members)
m.apply(vistor);
}
Node[] _members;
}
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// AST
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
interface Node
{
void apply(Visitor v);
}
class AST(string name, TupleSpecs...) : Node
{
this(typeof(_tup).Types args)
{
this(typeof(_tup)(args));
}
this(typeof(_tup) tup)
{
_tup = tup;
}
void apply(Visitor v) { v.visit(this); }
Tuple!TupleSpecs _tup;
alias _tup this;
}
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// Headers
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
alias AST!("Include", string, "mod") Include;
alias AST!("CppInclude", string, "mod") CppInclude;
alias AST!("Namespace", string, "namescope", string, "ident") Namespace;
alias TypeTuple!(Include, CppInclude, Namespace) Headers;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// Definitions
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
enum FieldReq { none, required, optional, ss}
alias AST!("Const", Node, "type", string, "ident", Node, "value") Const;
alias AST!("Field", int, "fid", FieldReq, "req", Node, "type", string, "ident", Node, "value") Field;
alias AST!("Typedef_", Node, "type", string, "ident") Typedef_;
alias AST!("Enum", string, "ident", Tuple!(string, long)[], "members") Enum;
alias AST!("Senum", string, "ident", string[], "members") Senum;
alias AST!("Struct", string, "ident", Field[], "fields") Struct;
alias AST!("Exception_", string, "ident", Field[], "fields") Exception_;
alias AST!("Function", Node, "rtype", string, "ident", Field[], "args", Field[], "exceptions", bool, "oneway") Function;
alias AST!("Service", string, "ident", string, "base", Node[], "functions") Service;
alias TypeTuple!(Const, Field, Typedef_, Enum, Senum, Struct, Exception_, Function, Service) Definitions;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// Types
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
alias AST!("TypeIdent", string, "ident") TypeIdent;
alias AST!("BaseType", Tok, "tid") BaseType;
alias AST!("MapType", Node, "keyType", Node, "valueType", string, "cppType") MapType;
alias AST!("SetType", Node, "keyType", string, "cppType") SetType;
alias AST!("ListType", Node, "valueType", string, "cppType") ListType;
alias TypeTuple!(TypeIdent, BaseType, MapType, SetType, ListType) Types;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// Values
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
alias AST!("Integral", long, "value") Integral;
alias AST!("Floating", double, "value") Floating;
alias AST!("String", string, "value") String;
alias AST!("ValueIdent", string, "ident") ValueIdent;
alias AST!("Map", Node[], "keys", Node[], "values") Map;
alias AST!("List", Node[], "values") List;
alias TypeTuple!(Integral, Floating, String, ValueIdent, Map, List) Values;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// Visitor
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
alias TypeTuple!(Headers, Definitions, Types, Values) ASTNodes;
interface Visitor
{
mixin template _visit(Nodes...) if(Nodes.length)
{
void visit(Nodes[0] n);
static if (Nodes.length > 1)
{
mixin _visit!(Nodes[1 .. $]) vi;
alias vi.visit visit;
}
}
mixin _visit!(ASTNodes);
}
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// Util
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
string format(Args...)(string s, Args args) if(Args.length)
{
string result;
foreach(a; args)
{
auto tail = s.find("%s");
result ~= s[0 .. $ - tail.length];
result ~= to!string(a);
s = tail[2 .. $];
}
result ~= s;
return result;
}
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
// D code emitter
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
class DEmitter : BlackHole!Visitor
{
static string codegen(string idl)
{
return codegen(parseThrift(idl));
}
static string codegen(Root root)
{
auto self = new DEmitter;
root.apply(self);
return self.code;
}
override void visit(Include inc)
{
code ~= format("mixin ThriftIDL!(import(%s));\n", inc.mod);
}
// CppInclude, Namespace
// /Headers
override void visit(Const constdef)
{
code ~= "immutable ";
constdef.type.apply(this);
code ~= format(" %s = ", constdef.ident);
constdef.value.apply(this);
code ~= ";\n";
}
override void visit(Field field)
{
field.type.apply(this);
code ~= " " ~ field.ident;
if (field.value !is null)
{ code ~= " = ";
field.value.apply(this);
}
}
override void visit(Typedef_ td)
{
code ~= "alias ";
td.type.apply(this);
code ~= format(" %s;\n", td.ident);
}
override void visit(Enum en)
{
code ~= format("enum %s\n{\n", en.ident);
foreach(entry; en.members)
code ~= format(" %s = %s,\n", entry[0], entry[1]);
code ~= "}\n";
}
override void visit(Senum en)
{
code ~= format("enum %s : string\n{\n", en.ident);
foreach(i, entry; en.members)
code ~= format(" _%s = %s,\n", i, entry);
code ~= "}\n";
}
override void visit(Struct s)
{
code ~= format("struct %s\n{\n", s.ident);
foreach(f; s.fields)
{
code ~= " ";
f.apply(this);
code ~= ";\n";
}
code ~="}\n";
}
override void visit(Exception_ e)
{
// TODO: ctor
code ~= format("class %s : Exception\n{\n", e.ident);
code ~= " this() { super(null); }\n";
foreach(f; e.fields)
{
code ~= " ";
f.apply(this);
code ~= ";\n";
}
code ~="}\n";
}
override void visit(Function f)
{
f.rtype.apply(this);
code ~= format(" %s(", f.ident);
foreach(i, a; f.args)
{ if (i) code ~= ", ";
a.apply(this);
}
code ~= ")";
}
override void visit(Service s)
{
code ~= "interface " ~ s.ident;
if (s.base)
code ~= " : " ~ s.base;
code ~= "\n{\n";
foreach(f; s.functions)
{
code ~= " ";
f.apply(this);
code ~= ";\n";
}
code ~= "}\n";
}
// /Definitions
override void visit(TypeIdent ti)
{
code ~= ti.ident;
}
override void visit(BaseType bt)
{
code ~= std.string.toLower(to!string(bt.tid));
}
override void visit(MapType mt)
{
mt.valueType.apply(this);
code ~= "[";
mt.keyType.apply(this);
code ~= "]";
}
override void visit(SetType st)
{
code ~= "SetDummy[";
st.keyType.apply(this);
code ~= "]";
}
override void visit(ListType lt)
{
lt.valueType.apply(this);
code ~= "[]";
}
// /Types
override void visit(Integral i)
{
code ~= to!string(i.value);
}
override void visit(Floating f)
{
code ~= to!string(f.value);
}
override void visit(String s)
{
code ~= s.value;
}
override void visit(ValueIdent vi)
{
code ~= vi.ident;
}
override void visit(Map m)
{
assert(m.keys.length == m.values.length);
code ~= "[";
foreach(i; 0 .. m.keys.length)
{
if (i) code ~= ", ";
m.keys[i].apply(this);
code ~= " : ";
m.values[i].apply(this);
}
code ~= "]";
}
override void visit(List l)
{
code ~= "[";
foreach(i; 0 .. l.values.length)
{
if (i) code ~= ", ";
l.values[i].apply(this);
}
code ~= "]";
}
string code;
}
unittest
{
pragma(msg, ":::::::::::::::::::: CTFE ::::::::::::::::::::");
pragma(msg, DEmitter.codegen(import("test.thrift")));
pragma(msg, ":::::::::::::::::::: /CTFE ::::::::::::::::::::");
mixin ThriftIDL!(import("test.thrift"));
import std.stdio, std.file;
auto text = readText("test.thrift");
writeln(":::::::::::::::::::: Runtime ::::::::::::::::::::");
writeln(DEmitter.codegen(text));
writeln(":::::::::::::::::::: /Runtime ::::::::::::::::::::");
}
@MartinNowak
Copy link
Author

rdmd -J. -unittest --main thrift_idl.d

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment