Skip to content

Instantly share code, notes, and snippets.

@sile
Created May 20, 2014
Embed
What would you like to do?
Erlangコード最適化メモ: JSONデコード処理(3): JSON文字列パースの効率化 ref: http://qiita.com/sile/items/a29a2ab4124ff3c8ae92
{
"id": 1,
"jsonrpc": "2.0",
"total": 1,
"result": [
{
"id": 1,
"avatar": "images/user_1.png",
"age": 38,
"admin": false,
"name": "Феликс Швец",
"company": "Genland",
"phone": "+70955600298",
"email": "feliks@genland.com",
"registerDate": "Tue, 18 Aug 2009 14:09:40 GMT",
"friends": [
{
"id": 1,
"name": "Яков Олейник",
"phone": "+70950177368"
},
{
"id": 2,
"name": "Антон Коваленко",
"phone": "+70958920708"
},
{
"id": 3,
"name": "Леонид Приходько",
"phone": "+70958423612"
}
],
"field": "field value"
}
]
}
--- json_decode_1.erl 2014-05-16 02:57:15.842341685 +0900
+++ json_decode_1_a.erl 2014-05-21 02:36:30.794384932 +0900
@@ -1,1 +1,1 @@
--module(json_decode_1).
+-module(json_decode_1_a).
@@ -73,1 +73,1 @@
-string(<<$", Bin/binary>>, Acc) -> {list_to_binary(lists:reverse(Acc)), Bin};
+string(<<$", Bin/binary>>, _) -> {<<"">>, Bin}; % ダミー値を返す
@@ -83,1 +83,1 @@
-string(<<0:1, C:7, Bin/binary>>, Acc) -> string(Bin, [C | Acc]);
+string(<<0:1, _:7, Bin/binary>>, Acc) -> string(Bin, Acc); % 文字列パース中間結果は保存しない
$ erl
Erlang R16B03-1 (erts-5.10.4) [source] [64-bit] [smp:2:2] [async-threads:10] [hipe]
Eshell V5.10.4 (abort with ^G)
> json_decode_1_a:decode(<<"{\"a\":10, \"b\":\"c\"}">>).
{[{<<"">>,10},{<<"">>,<<"">>}]}
%% json_decode_1.erlのstring/2の簡易版
string(<<$", Bin/binary>>, Acc) -> {list_to_binary(lists:reverse(Acc)), Bin};
string(<<$\\, $", Bin/binary>>, Acc) -> string(Bin, [$" | Acc]);
string(<<$\\, $n, Bin/binary>>, Acc) -> string(Bin, [$\n | Acc]);
string(<<$\\, $t, Bin/binary>>, Acc) -> string(Bin, [$\t | Acc]);
string(<<0:1, C:7, Bin/binary>>, Acc) -> string(Bin, [C | Acc]).
string(Bin) -> string(Bin, <<"">>).
string(<<$", Bin/binary>>, Acc) -> {Acc, Bin};
string(<<$\\, $", Bin/binary>>, Acc) -> string(Bin, <<Acc/binary, $">>);
string(<<$\\, $n, Bin/binary>>, Acc) -> string(Bin, <<Acc/binary, $n>>);
string(<<$\\, $t, Bin/binary>>, Acc) -> string(Bin, <<Acc/binary, $t>>);
string(<<0:1, C:7, Bin/binary>>, Acc) -> string(Bin, <<Acc/binary, C>>).
%% Accバイナリの末尾に直接文字を追加する。
%% もしAccバイナリの末尾部分の(内部的な)空き領域が不足している場合は、自動でリサイズが行われる。
%% http://www.erlang.org/doc/efficiency_guide/binaryhandling.html より引用:
%% Acc will be copied only in the first iteration and extra space will be allocated at the end
%% of the copied binary. In the next iteration, H will be written in to the extra space. When %% the extra space runs out, the binary will be reallocated with more extra space.
%% 新たにバッファ用のバイナリを引数に追加し、全ての関数で持ちまわすようにする
string(Bin, Buf) -> string(Bin, byte_size(Buf), Buf).
string(<<$", Bin/binary>>, BufStart, Buf) ->
Str = binary:part(Buf, 0, byte_size(Buf) - BufStart), % 今回の追加分だけを含むサブバイナリを作成する
{Str, Bin, Buf}; % バッファも返り値に含めて、次の処理に持ちまわすせるようにする
string(<<$\\, $", Bin/binary>>, BufStart, Buf) -> string(Bin, BufStart, <<Buf/binary, $">>);
string(<<$\\, $n, Bin/binary>>, BufStart, Buf) -> string(Bin, BufStart, <<Buf/binary, $n>>);
string(<<$\\, $t, Bin/binary>>, BufStart, Buf) -> string(Bin, BufStart, <<Buf/binary, $t>>);
string(<<0:1, C:7, Bin/binary>>, BufStart, Buf) -> string(Bin, BufStart, <<Buf/binary, C>>).
-spec string(binary(), non_neg_integer(), binary()) -> {json_string(), binary(), binary()}.
string(Bin, Start, Buf) ->
string(Bin, Bin, Start, Buf).
-spec string(binary(), binary(), non_neg_integer(), binary()) -> {json_string(), binary(), binary()}.
string(<<$", Bin/binary>>, Base, Start, Buf) ->
Prefix = binary:part(Base, 0, byte_size(Base) - byte_size(Bin) - 1), % 追加すべき範囲を入力から取り出す
case Start =:= byte_size(Buf) of
true -> {Prefix, Bin, Buf}; % エスケープ文字を含まないので、入力バイナリのサブバイナリがそのまま利用可能
false ->
Buf2 = <<Buf/binary, Prefix/binary>>,
{binary:part(Buf2, Start, byte_size(Buf2) - Start), Bin, Buf2}
end;
string(<<$\\, Bin0/binary>>, Base, Start, Buf) ->
Prefix = binary:part(Base, 0, byte_size(Base) - byte_size(Bin0) - 1), % 追加すべき範囲を入力から取り出す
case Bin0 of
<<$", Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $">>);
<<$/, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $/>>);
<<$\\, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\\>>);
<<$b, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\b>>);
<<$f, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\f>>);
<<$n, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\n>>);
<<$r, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\r>>);
<<$t, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\t>>);
<< Bin/binary>> -> error(badarg, [<<$\\, Bin/binary>>, Base, Start, Buf])
end;
string(<<0:1, _:7, Bin/binary>>, Base, Start, Buf) -> string(Bin, Base, Start, Buf); % この時点では文字は追加しない
string(Bin, Base, Start, Buf) -> error(badarg, [Bin, Base, Start, Buf]).
-module(json_decode_1_a).
-export([decode/1]).
-type json_value() :: null | boolean() | json_number() |
json_string() | json_array() | json_object().
-type json_number() :: non_neg_integer().
-type json_string() :: binary().
-type json_array() :: [json_value()].
-type json_object() :: {[json_object_member()]}.
-type json_object_member() :: {json_string(), json_value()}.
%% @doc JSON文字列をデコードする
%%
%% 不正なJSON文字列が渡された場合は、badargエラーが送出される
-spec decode(binary()) -> json_value().
decode(Json) ->
{Value, _RestBin} = value(skip_whitespace(Json)),
Value.
-spec skip_whitespace(binary()) -> binary().
skip_whitespace(<<$ , Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(<<$\t, Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(<<$\r, Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(<<$\n, Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(Bin) -> Bin.
-spec value(binary()) -> {json_value(), binary()}.
value(<<"null", Bin/binary>>) -> {null, Bin};
value(<<"false", Bin/binary>>) -> {false, Bin};
value(<<"true", Bin/binary>>) -> {true, Bin};
value(<<$[, Bin/binary>>) -> array(skip_whitespace(Bin));
value(<<${, Bin/binary>>) -> object(skip_whitespace(Bin));
value(<<$", Bin/binary>>) -> string(Bin, "");
value(<<C, Bin/binary>>) when $0 =< C, C =< $9 -> number(C - $0, Bin);
value(Bin) -> error(badarg, [Bin]).
-spec array(binary()) -> {json_array(), binary()}.
array(<<$], Bin/binary>>) -> {[], Bin};
array(Bin) -> array(Bin, []).
-spec array(binary(), [json_value()]) -> {json_array(), binary()}.
array(Bin, Values) ->
{Value, Bin2} = value(Bin),
Values2 = [Value | Values],
case skip_whitespace(Bin2) of
<<$], Bin3/binary>> -> {lists:reverse(Values2), Bin3};
<<$,, Bin3/binary>> -> array(skip_whitespace(Bin3), Values2);
_ -> error(badarg, [Bin, Values])
end.
-spec object(binary()) -> {json_object(), binary()}.
object(<<$}, Bin/binary>>) -> {{[]}, Bin};
object(Bin) -> object(Bin, []).
-spec object(binary(), [json_object_member()]) -> {json_object(), binary()}.
object(<<$", Bin/binary>>, Members) ->
{Key, Bin2} = string(Bin, ""),
case skip_whitespace(Bin2) of
<<$:, Bin3/binary>> ->
{Value, Bin4} = value(skip_whitespace(Bin3)),
Members2 = [{Key, Value} | Members],
case skip_whitespace(Bin4) of
<<$}, Bin5/binary>> -> {{lists:reverse(Members2)}, Bin5};
<<$,, Bin5/binary>> -> object(skip_whitespace(Bin5), Members2);
_ -> error(badarg, [<<$", Bin/binary>>, Members])
end;
_ -> error(badarg, [<<$", Bin/binary>>, Members])
end;
object(Bin, Members) -> error(badarg, [Bin, Members]).
-spec string(binary(), string()) -> {json_string(), binary()}.
string(<<$", Bin/binary>>, _) -> {<<"">>, Bin}; % ダミー値を返す
string(<<$\\, $", Bin/binary>>, Acc) -> string(Bin, [$" | Acc]);
string(<<$\\, $/, Bin/binary>>, Acc) -> string(Bin, [$/ | Acc]);
string(<<$\\, $\\, Bin/binary>>, Acc) -> string(Bin, [$\\ | Acc]);
string(<<$\\, $b, Bin/binary>>, Acc) -> string(Bin, [$\b | Acc]);
string(<<$\\, $f, Bin/binary>>, Acc) -> string(Bin, [$\f | Acc]);
string(<<$\\, $n, Bin/binary>>, Acc) -> string(Bin, [$\n | Acc]);
string(<<$\\, $r, Bin/binary>>, Acc) -> string(Bin, [$\r | Acc]);
string(<<$\\, $t, Bin/binary>>, Acc) -> string(Bin, [$\t | Acc]);
string(<<$\\, Bin/binary>>, Acc) -> error(badarg, [<<$\\, Bin/binary>>, Acc]);
string(<<0:1, _:7, Bin/binary>>, Acc) -> string(Bin, Acc); % 文字列パース中間結果は保存しない
string(Bin, Acc) -> error(badarg, [Bin, Acc]).
-spec number(json_number(), binary()) -> {json_number(), binary()}.
number(N, <<C, Bin/binary>>) when $0 =< C, C =< $9 -> number(N * 10 + C - $0, Bin);
number(N, Bin) -> {N, Bin}.
-module(json_decode_2).
-export([decode/1]).
-type json_value() :: null | boolean() | json_number() |
json_string() | json_array() | json_object().
-type json_number() :: non_neg_integer().
-type json_string() :: binary().
-type json_array() :: [json_value()].
-type json_object() :: {[json_object_member()]}.
-type json_object_member() :: {json_string(), json_value()}.
%% @doc JSON文字列をデコードする
%%
%% 不正なJSON文字列が渡された場合は、badargエラーが送出される
-spec decode(binary()) -> json_value().
decode(Json) ->
{Value, _RestBin, _Buf} = value(skip_whitespace(Json), <<"">>),
Value.
-spec skip_whitespace(binary()) -> binary().
skip_whitespace(<<$ , Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(<<$\t, Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(<<$\r, Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(<<$\n, Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(Bin) -> Bin.
-spec value(binary(), binary()) -> {json_value(), binary(), binary()}.
value(<<"null", Bin/binary>>, Buf) -> {null, Bin, Buf};
value(<<"false", Bin/binary>>, Buf) -> {false, Bin, Buf};
value(<<"true", Bin/binary>>, Buf) -> {true, Bin, Buf};
value(<<$[, Bin/binary>>, Buf) -> array(skip_whitespace(Bin), Buf);
value(<<${, Bin/binary>>, Buf) -> object(skip_whitespace(Bin), Buf);
value(<<$", Bin/binary>>, Buf) -> string(Bin, byte_size(Buf), Buf);
value(<<C, Bin/binary>>, Buf) when $0 =< C, C =< $9 -> number(C - $0, Bin, Buf);
value(Bin, Buf) -> error(badarg, [Bin, Buf]).
-spec array(binary(), binary()) -> {json_array(), binary(), binary()}.
array(<<$], Bin/binary>>, Buf) -> {[], Bin, Buf};
array(Bin, Buf) -> array(Bin, [], Buf).
-spec array(binary(), [json_value()], binary()) -> {json_array(), binary(), binary()}.
array(Bin, Values, Buf) ->
{Value, Bin2, Buf2} = value(Bin, Buf),
Values2 = [Value | Values],
case skip_whitespace(Bin2) of
<<$], Bin3/binary>> -> {lists:reverse(Values2), Bin3, Buf2};
<<$,, Bin3/binary>> -> array(skip_whitespace(Bin3), Values2, Buf2);
_ -> error(badarg, [Bin, Values, Buf])
end.
-spec object(binary(), binary()) -> {json_object(), binary(), binary()}.
object(<<$}, Bin/binary>>, Buf) -> {{[]}, Bin, Buf};
object(Bin, Buf) -> object(Bin, [], Buf).
-spec object(binary(), [json_object_member()], binary()) -> {json_object(), binary(), binary()}.
object(<<$", Bin/binary>>, Members, Buf) ->
{Key, Bin2, Buf2} = string(Bin, byte_size(Buf), Buf),
case skip_whitespace(Bin2) of
<<$:, Bin3/binary>> ->
{Value, Bin4, Buf3} = value(skip_whitespace(Bin3), Buf2),
Members2 = [{Key, Value} | Members],
case skip_whitespace(Bin4) of
<<$}, Bin5/binary>> -> {{lists:reverse(Members2)}, Bin5, Buf3};
<<$,, Bin5/binary>> -> object(skip_whitespace(Bin5), Members2, Buf3);
_ -> error(badarg, [<<$", Bin/binary>>, Members, Buf])
end;
_ -> error(badarg, [<<$", Bin/binary>>, Members, Buf])
end;
object(Bin, Members, Buf) -> error(badarg, [Bin, Members, Buf]).
-spec string(binary(), non_neg_integer(), binary()) -> {json_string(), binary(), binary()}.
string(Bin, Start, Buf) ->
string(Bin, Bin, Start, Buf).
-spec string(binary(), binary(), non_neg_integer(), binary()) -> {json_string(), binary(), binary()}.
string(<<$", Bin/binary>>, Base, Start, Buf) ->
Prefix = binary:part(Base, 0, byte_size(Base) - byte_size(Bin) - 1),
case Start =:= byte_size(Buf) of
true -> {Prefix, Bin, Buf}; % エスケープ文字を含まないので、入力バイナリのサブバイナリがそのまま利用可能
false ->
Buf2 = <<Buf/binary, Prefix/binary>>,
{binary:part(Buf2, Start, byte_size(Buf2) - Start), Bin, Buf2}
end;
string(<<$\\, Bin0/binary>>, Base, Start, Buf) ->
Prefix = binary:part(Base, 0, byte_size(Base) - byte_size(Bin0) - 1),
case Bin0 of
<<$", Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $">>);
<<$/, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $/>>);
<<$\\, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\\>>);
<<$b, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\b>>);
<<$f, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\f>>);
<<$n, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\n>>);
<<$r, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\r>>);
<<$t, Bin/binary>> -> string(Bin, Start, <<Buf/binary, Prefix/binary, $\t>>);
<< Bin/binary>> -> error(badarg, [<<$\\, Bin/binary>>, Base, Start, Buf])
end;
string(<<0:1, _:7, Bin/binary>>, Base, Start, Buf) -> string(Bin, Base, Start, Buf);
string(Bin, Base, Start, Buf) -> error(badarg, [Bin, Base, Start, Buf]).
-spec number(json_number(), binary(), binary()) -> {json_number(), binary(), binary()}.
number(N, <<C, Bin/binary>>, Buf) when $0 =< C, C =< $9 -> number(N * 10 + C - $0, Bin, Buf);
number(N, Bin, Buf) -> {N, Bin, Buf}.
-module(json_decode_2_b).
-export([decode/1]).
-type json_value() :: null | boolean() | json_number() |
json_string() | json_array() | json_object().
-type json_number() :: non_neg_integer().
-type json_string() :: binary().
-type json_array() :: [json_value()].
-type json_object() :: {[json_object_member()]}.
-type json_object_member() :: {json_string(), json_value()}.
%% @doc JSON文字列をデコードする
%%
%% 不正なJSON文字列が渡された場合は、badargエラーが送出される
-spec decode(binary()) -> json_value().
decode(Json) ->
{Value, _RestBin, _Buf} = value(skip_whitespace(Json), <<"">>),
Value.
-spec skip_whitespace(binary()) -> binary().
skip_whitespace(<<$ , Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(<<$\t, Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(<<$\r, Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(<<$\n, Bin/binary>>) -> skip_whitespace(Bin);
skip_whitespace(Bin) -> Bin.
-spec value(binary(), binary()) -> {json_value(), binary(), binary()}.
value(<<"null", Bin/binary>>, Buf) -> {null, Bin, Buf};
value(<<"false", Bin/binary>>, Buf) -> {false, Bin, Buf};
value(<<"true", Bin/binary>>, Buf) -> {true, Bin, Buf};
value(<<$[, Bin/binary>>, Buf) -> array(skip_whitespace(Bin), Buf);
value(<<${, Bin/binary>>, Buf) -> object(skip_whitespace(Bin), Buf);
value(<<$", Bin/binary>>, Buf) -> string(Bin, byte_size(Buf), Buf);
value(<<C, Bin/binary>>, Buf) when $0 =< C, C =< $9 -> number(C - $0, Bin, Buf);
value(Bin, Buf) -> error(badarg, [Bin, Buf]).
-spec array(binary(), binary()) -> {json_array(), binary(), binary()}.
array(<<$], Bin/binary>>, Buf) -> {[], Bin, Buf};
array(Bin, Buf) -> array(Bin, [], Buf).
-spec array(binary(), [json_value()], binary()) -> {json_array(), binary(), binary()}.
array(Bin, Values, Buf) ->
{Value, Bin2, Buf2} = value(Bin, Buf),
Values2 = [Value | Values],
case skip_whitespace(Bin2) of
<<$], Bin3/binary>> -> {lists:reverse(Values2), Bin3, Buf2};
<<$,, Bin3/binary>> -> array(skip_whitespace(Bin3), Values2, Buf2);
_ -> error(badarg, [Bin, Values, Buf])
end.
-spec object(binary(), binary()) -> {json_object(), binary(), binary()}.
object(<<$}, Bin/binary>>, Buf) -> {{[]}, Bin, Buf};
object(Bin, Buf) -> object(Bin, [], Buf).
-spec object(binary(), [json_object_member()], binary()) -> {json_object(), binary(), binary()}.
object(<<$", Bin/binary>>, Members, Buf) ->
{Key, Bin2, Buf2} = string(Bin, byte_size(Buf), Buf),
case skip_whitespace(Bin2) of
<<$:, Bin3/binary>> ->
{Value, Bin4, Buf3} = value(skip_whitespace(Bin3), Buf2),
Members2 = [{Key, Value} | Members],
case skip_whitespace(Bin4) of
<<$}, Bin5/binary>> -> {{lists:reverse(Members2)}, Bin5, Buf3};
<<$,, Bin5/binary>> -> object(skip_whitespace(Bin5), Members2, Buf3);
_ -> error(badarg, [<<$", Bin/binary>>, Members, Buf])
end;
_ -> error(badarg, [<<$", Bin/binary>>, Members, Buf])
end;
object(Bin, Members, Buf) -> error(badarg, [Bin, Members, Buf]).
-spec string(binary(), non_neg_integer(), binary()) -> {json_string(), binary(), binary()}.
string(<<$", Bin/binary>>, Start, Buf) -> {binary:part(Buf, Start, byte_size(Buf) - Start), Bin, Buf};
string(<<$\\, $", Bin/binary>>, Start, Buf) -> string(Bin, Start, <<Buf/binary, $">>);
string(<<$\\, $/, Bin/binary>>, Start, Buf) -> string(Bin, Start, <<Buf/binary, $/>>);
string(<<$\\, $\\, Bin/binary>>, Start, Buf) -> string(Bin, Start, <<Buf/binary, $\\>>);
string(<<$\\, $b, Bin/binary>>, Start, Buf) -> string(Bin, Start, <<Buf/binary, $\b>>);
string(<<$\\, $f, Bin/binary>>, Start, Buf) -> string(Bin, Start, <<Buf/binary, $\f>>);
string(<<$\\, $n, Bin/binary>>, Start, Buf) -> string(Bin, Start, <<Buf/binary, $\n>>);
string(<<$\\, $r, Bin/binary>>, Start, Buf) -> string(Bin, Start, <<Buf/binary, $\r>>);
string(<<$\\, $t, Bin/binary>>, Start, Buf) -> string(Bin, Start, <<Buf/binary, $\t>>);
string(<<$\\, Bin/binary>>, Start, Buf) -> error(badarg, [<<$\\, Bin/binary>>, Start, Buf]);
string(<<0:1, C:7, Bin/binary>>, Start, Buf) -> string(Bin, Start, <<Buf/binary, C>>);
string(Bin, Start, Buf) -> error(badarg, [Bin, Start, Buf]).
-spec number(json_number(), binary(), binary()) -> {json_number(), binary(), binary()}.
number(N, <<C, Bin/binary>>, Buf) when $0 =< C, C =< $9 -> number(N * 10 + C - $0, Bin, Buf);
number(N, Bin, Buf) -> {N, Bin, Buf}.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment