public
Last active

  • Download Gist
json_grammar.yrl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
Nonterminals array element elements arrobj object members member.
 
Terminals '{' '}' '[' ']' string ',' ':' integer float true false null.
 
Rootsymbol element.
 
arrobj -> array : '$1'.
arrobj -> object : '$1'.
 
object -> '{' members '}' : {obj, '$2'}.
object -> '{' '}' : {obj, []}.
% object -> '{' member '}' : {'$2'}. % results in a shift/reduce conflict...
 
members -> member ',' members : ['$1' | '$3'].
members -> member : ['$1'].
 
member -> string ':' element : {element(3, '$1'),'$3'}.
 
array -> '[' elements ']' : list_to_tuple('$2').
 
elements -> element ',' elements : ['$1' | '$3'].
elements -> element : ['$1'].
elements -> '$empty' : [].
 
element -> string : element(3, '$1').
element -> arrobj : '$1'.
element -> integer : element(3, '$1').
element -> float : element(3, '$1').
element -> true : element(1, '$1').
element -> false : element(1, '$1').
element -> null : element(1, '$1').
json_lex.xrl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
Definitions.
 
ST = [^"]
L = [A-Za-z]
WS = ([\000-\s]|%.*)
D = [0-9]
H = [0-9a-fA-F]
 
Rules.
 
{ : {token, {'{', TokenLine}}.
} : {token, {'}', TokenLine}}.
 
\[ : {token, {'[', TokenLine}}.
\] : {token, {']', TokenLine}}.
 
\-?{D}+\.{D}+((E|e)(\+|\-)?{D}+)? : {token,{float,TokenLine,list_to_float(TokenChars)}}.
\-?{D}+(E|e)(\+|\-)?{D}+ : {token,{float,TokenLine,whole_float(TokenChars)}}.
\-?{D}+ : {token,{integer,TokenLine,list_to_integer(TokenChars)}}.
 
% "[^"\\]*(\\[^u][^"\\]*)*" : {token,{string,TokenLine,strip(unicode_string(TokenChars),TokenLen)}}.
"[^"\\]*(\\.[^"\\]*)*" : {token,{string,TokenLine,parse_string(strip(TokenChars,TokenLen))}}.
 
 
% \\u{H}{H}{H}{H} : {token, {unicode, TokenLine,TokenChars}}.
 
true : {token,{'true', TokenLine}}.
false : {token,{'false', TokenLine}}.
null : {token,{'null', TokenLine}}.
 
: : {token, {':', TokenLine}}.
, : {token, {',', TokenLine}}.
 
{WS}+ : skip_token.
 
Erlang code.
% "
 
-define(LOG(Name, Value),
io:format("{~p:~p}: ~p -> ~s~n", [?MODULE, ?LINE, Name, Value])).
-define(PLOG(Name, Value),
io:format("{~p:~p}: ~p -> ~p~n", [?MODULE, ?LINE, Name, Value])).
 
strip(TokenChars,TokenLen) -> lists:sublist(TokenChars, 2, TokenLen - 2).
 
whole_float(TokenChars) ->
{ok, NowFloat, 1 } = regexp:sub(TokenChars,"e",".0e"),
list_to_float(NowFloat).
 
unescape_quote(String) ->
case regexp:first_match(String,"\\\\[\\\\\"\\/]") of
{match, Pos, _} ->
{Before, [_|After]} = lists:split(Pos-1, String),
Before ++ unescape_quote(After);
nomatch ->
String
end.
unescape_control(String) ->
case regexp:first_match(String,"\\\\[bfnrt]") of
{match, Pos, _} ->
{Before, [_|[ContC|After]]} = lists:split(Pos-1, String),
C = case ContC of
$b -> $\b;
$f -> $\f;
$n -> $\n;
$r -> $\r;
$t -> $\t
end,
Before ++ [C] ++ unescape_control(After);
nomatch ->
String
end.
 
unescape_unicode(String) ->
case regexp:first_match(String,"\\\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]") of
{match, Pos, _} ->
{Before, After} = lists:split(Pos-1, String),
% ?LOG("before",Before),
{Code, Remain} = lists:split(6, After),
% ?LOG("code",Code),
% ?LOG("after",Remain),
[_, _, C3, C2, C1, C0] = Code,
% ?PLOG("pcode",{dehex(C0), dehex(C1), dehex(C2), dehex(C3)}),
C = dehex(C0) bor
(dehex(C1) bsl 4) bor
(dehex(C2) bsl 8) bor
(dehex(C3) bsl 12),
Before ++ [C] ++ unescape_unicode(Remain);
nomatch ->
String
end.
 
dehex(C) when C >= $0, C =< $9 ->
C - $0;
dehex(C) when C >= $a, C =< $f ->
C - $a + 10;
dehex(C) when C >= $A, C =< $F ->
C - $A + 10.
 
parse_string(StringChars) ->
% ?LOG("string",StringChars),
QuotesUnescaped = unescape_quote(StringChars),
ControlUnescaped = unescape_control(QuotesUnescaped),
unescape_unicode(ControlUnescaped).

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.