Skip to content

Instantly share code, notes, and snippets.

@seancribbs
Created December 15, 2014 00:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seancribbs/9f7ac5bf2bbaa50617aa to your computer and use it in GitHub Desktop.
Save seancribbs/9f7ac5bf2bbaa50617aa to your computer and use it in GitHub Desktop.
%% @doc Implements code-generation using syntax_tools.
-module(neotoma_generate).
-include("neotoma.hrl").
-compile(export_all).
-import(erl_syntax, [
application/3,
abstract/1,
case_expr/2,
clause/3,
variable/1,
atom/1,
binary/1,
binary_field/2,
underscore/0,
list/2,
tuple/1,
match_expr/2
]).
-type failfun() :: fun((InputVar::erl_syntax:syntaxTree(),
Reason::string()) -> erl_syntax:syntaxTree()).
-type successfun() :: fun((CaptureVariable::erl_syntax:syntaxTree(),
RemainderVariable::erl_syntax:syntaxTree()) ->
erl_syntax:syntaxTree()).
%% TODO: Fix spec to only allow proper metagrammar syntax nodes
-spec generate(tuple(), InputVar::erl_syntax:syntaxTree(),
successfun(), failfun()) ->
erl_syntax:syntaxTree().
generate(#primary{expr=E, label=L}, InputName, Success0, Fail) ->
%% NOTE: We assume that repetition has been optimized out here and
%% simply apply the label if it exists.
Success = if L /= undefined ->
fun(Capture, Rest) -> Success0(tuple([atom(L), Capture]), Rest) end;
true -> Success0
end,
generate(E, InputName, Success, Fail);
generate(#charclass{charclass=C, index=I}, InputName, Success, Fail) ->
%% TODO: For now, treating character class as a regexp. In the
%% future, this can be exploded into a more efficient case
%% statement.
generate(#regexp{regexp=C, index=I}, InputName, Success, Fail);
generate(#regexp{regexp=R}, InputName, Success, Fail) ->
%% Template:
%%
%% case re:run(Input0, {{R}}) of
%% {match, [{0, Length0}|_]} ->
%% {Match0, Rest0} = erlang:split_binary(Input0, Length0),
%% {{Success(Match0, Rest0)}};
%% _ -> {{Fail}}
%% end
Regexp = abstract(R),
MatchName = variable(new_name("Match")),
LengthName = variable(new_name("Length")),
RestName = variable(new_name("Rest")),
case_expr(application(atom("re"), atom("run"), [InputName, Regexp]),
[clause([tuple([atom("match"), list([tuple([abstract(0), LengthName])], underscore())])], none,
[match_expr(tuple([MatchName, RestName]), application(atom("erlang"), atom("split_binary"), [InputName, LengthName]))
| Success(MatchName, RestName)]),
clause([underscore()], none, Fail(InputName, error_reason({regexp, R})))]);
generate(#string{string=S}, InputName, Success, Fail) ->
%% Template:
%%
%% case Input0 of
%% <<{{S}}/binary, Rest0/binary>> -> {{Success(S, 'Rest0')}};
%% _ -> {{Fail}}
%% end
Literal = abstract(S),
RestName = variable(new_name("Rest")),
case_expr(InputName,
[clause([binary([binary_field(Literal, [atom("binary")]),
binary_field(RestName, [atom("binary")])])],
none,
Success(Literal, RestName)),
clause([underscore()], none,
Fail(InputName, error_reason({string, S})))]);
generate(#epsilon{}, InputName, Success, _Fail) ->
%% Passes through, because epsilon always succeeds.
%% TODO: Do we need to create a capture here? This doesn't feel right.
Success(abstract([]), InputName);
generate(#anything{}, InputName, Success, Fail) ->
%% TODO: accept non-utf8 characters based on grammar settings.
%%
%% Template:
%%
%% case Input0 of
%% <<>> ->
%% {{Fail(anything)}};
%% <<Char0/utf8, Rest0/binary>> ->
%% {{Success('Char0', 'Rest0')}}
%% end
CharName = variable(new_name("Char")),
RestName = variable(new_name("Rest")),
case_expr(InputName,
[clause([abstract(<<>>)], none, Fail(InputName, error_reason(anything))),
clause([binary([
binary_field(CharName, [atom("utf8")]),
binary_field(RestName, [atom("binary")])
])],
none, Success(CharName, RestName))
]).
%% @doc Parsing error reasons
error_reason({regexp, Literal}) ->
?FMT("expected text matching pattern '~s'", [Literal]);
error_reason({string, Literal}) ->
?FMT("expected '~s'", [Literal]);
error_reason(anything) ->
"expected any character but reached end of input".
%% @doc Unique name generator, combining a prefix with an integer
%% tracked in the process dictionary. Should be safe for variable
%% names and function names.
-spec new_name(string()) -> string().
new_name(Key) ->
Incr = case get({namegen, Key}) of
undefined ->
put({namegen, Key}, 0),
0;
Int ->
put({namegen, Key}, Int+1),
Int + 1
end,
lists:flatten([Key, $_, integer_to_list(Incr)]).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment