Created
December 15, 2014 00:46
-
-
Save seancribbs/9f7ac5bf2bbaa50617aa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%% @doc Implements code-generation using syntax_tools. | |
-module(neotoma_generate). | |
-include("neotoma.hrl"). | |
-compile(export_all). | |
-import(erl_syntax, [ | |
application/3, | |
abstract/1, | |
case_expr/2, | |
clause/3, | |
variable/1, | |
atom/1, | |
binary/1, | |
binary_field/2, | |
underscore/0, | |
list/2, | |
tuple/1, | |
match_expr/2 | |
]). | |
-type failfun() :: fun((InputVar::erl_syntax:syntaxTree(), | |
Reason::string()) -> erl_syntax:syntaxTree()). | |
-type successfun() :: fun((CaptureVariable::erl_syntax:syntaxTree(), | |
RemainderVariable::erl_syntax:syntaxTree()) -> | |
erl_syntax:syntaxTree()). | |
%% TODO: Fix spec to only allow proper metagrammar syntax nodes | |
-spec generate(tuple(), InputVar::erl_syntax:syntaxTree(), | |
successfun(), failfun()) -> | |
erl_syntax:syntaxTree(). | |
generate(#primary{expr=E, label=L}, InputName, Success0, Fail) -> | |
%% NOTE: We assume that repetition has been optimized out here and | |
%% simply apply the label if it exists. | |
Success = if L /= undefined -> | |
fun(Capture, Rest) -> Success0(tuple([atom(L), Capture]), Rest) end; | |
true -> Success0 | |
end, | |
generate(E, InputName, Success, Fail); | |
generate(#charclass{charclass=C, index=I}, InputName, Success, Fail) -> | |
%% TODO: For now, treating character class as a regexp. In the | |
%% future, this can be exploded into a more efficient case | |
%% statement. | |
generate(#regexp{regexp=C, index=I}, InputName, Success, Fail); | |
generate(#regexp{regexp=R}, InputName, Success, Fail) -> | |
%% Template: | |
%% | |
%% case re:run(Input0, {{R}}) of | |
%% {match, [{0, Length0}|_]} -> | |
%% {Match0, Rest0} = erlang:split_binary(Input0, Length0), | |
%% {{Success(Match0, Rest0)}}; | |
%% _ -> {{Fail}} | |
%% end | |
Regexp = abstract(R), | |
MatchName = variable(new_name("Match")), | |
LengthName = variable(new_name("Length")), | |
RestName = variable(new_name("Rest")), | |
case_expr(application(atom("re"), atom("run"), [InputName, Regexp]), | |
[clause([tuple([atom("match"), list([tuple([abstract(0), LengthName])], underscore())])], none, | |
[match_expr(tuple([MatchName, RestName]), application(atom("erlang"), atom("split_binary"), [InputName, LengthName])) | |
| Success(MatchName, RestName)]), | |
clause([underscore()], none, Fail(InputName, error_reason({regexp, R})))]); | |
generate(#string{string=S}, InputName, Success, Fail) -> | |
%% Template: | |
%% | |
%% case Input0 of | |
%% <<{{S}}/binary, Rest0/binary>> -> {{Success(S, 'Rest0')}}; | |
%% _ -> {{Fail}} | |
%% end | |
Literal = abstract(S), | |
RestName = variable(new_name("Rest")), | |
case_expr(InputName, | |
[clause([binary([binary_field(Literal, [atom("binary")]), | |
binary_field(RestName, [atom("binary")])])], | |
none, | |
Success(Literal, RestName)), | |
clause([underscore()], none, | |
Fail(InputName, error_reason({string, S})))]); | |
generate(#epsilon{}, InputName, Success, _Fail) -> | |
%% Passes through, because epsilon always succeeds. | |
%% TODO: Do we need to create a capture here? This doesn't feel right. | |
Success(abstract([]), InputName); | |
generate(#anything{}, InputName, Success, Fail) -> | |
%% TODO: accept non-utf8 characters based on grammar settings. | |
%% | |
%% Template: | |
%% | |
%% case Input0 of | |
%% <<>> -> | |
%% {{Fail(anything)}}; | |
%% <<Char0/utf8, Rest0/binary>> -> | |
%% {{Success('Char0', 'Rest0')}} | |
%% end | |
CharName = variable(new_name("Char")), | |
RestName = variable(new_name("Rest")), | |
case_expr(InputName, | |
[clause([abstract(<<>>)], none, Fail(InputName, error_reason(anything))), | |
clause([binary([ | |
binary_field(CharName, [atom("utf8")]), | |
binary_field(RestName, [atom("binary")]) | |
])], | |
none, Success(CharName, RestName)) | |
]). | |
%% @doc Parsing error reasons | |
error_reason({regexp, Literal}) -> | |
?FMT("expected text matching pattern '~s'", [Literal]); | |
error_reason({string, Literal}) -> | |
?FMT("expected '~s'", [Literal]); | |
error_reason(anything) -> | |
"expected any character but reached end of input". | |
%% @doc Unique name generator, combining a prefix with an integer | |
%% tracked in the process dictionary. Should be safe for variable | |
%% names and function names. | |
-spec new_name(string()) -> string(). | |
new_name(Key) -> | |
Incr = case get({namegen, Key}) of | |
undefined -> | |
put({namegen, Key}, 0), | |
0; | |
Int -> | |
put({namegen, Key}, Int+1), | |
Int + 1 | |
end, | |
lists:flatten([Key, $_, integer_to_list(Incr)]). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment