Skip to content

Instantly share code, notes, and snippets.

@nickva
Last active October 31, 2023 05:53
Show Gist options
  • Save nickva/d784043bee8e28f447bb3e66336712df to your computer and use it in GitHub Desktop.
Save nickva/d784043bee8e28f447bb3e66336712df to your computer and use it in GitHub Desktop.
Reproducer attempt for process_info slowdown in OTP 25
%% Reproducer attempt for https://github.com/erlang/otp/issues/7801
%% It's supposed to model the couch_file.erl from Apache CouchDB
%%
%% Example run:
%% c(ioblock), ioblock:go("./junk.bin", 8192, 60, 4000).
%% - Create a temporary junk.bin file with 8GB of random data
%% - Spawn 60 gen_servers to handle preads from the file
%% - Spawn 4000 caller processes to call the servers with pread commands
%%
%% Note: run the first time without measuring to create the larger file.
%% Then drop the page cache [1] before running the tests. In this way pread(..) will
%% have to do some actual data fetching from disk.
%%
%% [1] # echo 3 > /proc/sys/vm/drop_caches
%%
%% OTP 25
%% ------
%% $ erl +MBacul 0 +MBas aobf -name otp25@127.0.0.1
%% Erlang/OTP 25 [erts-13.2.2.4] [source] [64-bit] [smp:72:72] [ds:72:72:10] [async-threads:1] [jit:ns]
%%
%% > (otp25@127.0.0.1)15> c(ioblock), ioblock:go("./junk.bin", 8192, 60, 4000).
%% File "./junk.bin" already there and >= 8192 MBs
%% started
%% process_info:
%% => 4113 procs 48836 msec
%% process_info:
%% => 4113 procs 34533 msec
%% process_info:
%% (otp25@127.0.0.1)16>
%% (otp25@127.0.0.1)16> ioblock:stop().
%% stopped
%%
%% OTP 24
%% ------
%% $ erl +MBacul 0 +MBas aobf -name otp24@127.0.0.1
%% Erlang/OTP 24 [erts-12.3.2.13] [source] [64-bit] [smp:72:72] [ds:72:72:10] [async-threads:1] [jit]
%%
%% (otp24@127.0.0.1)2> c(ioblock), ioblock:go("./junk.bin", 8192, 60, 4000).
%% process_info:
%% => 4111 procs 28 msec
%% process_info:
%% => 4111 procs 20 msec
%% process_info:
%% => 4112 procs 28 msec
%% ...
-module(ioblock).
-behaviour(gen_server).
-export([
go/0,
go/1,
go/4,
stop/0,
get_stats_int/0
]).
-export([
init/1,
handle_call/3,
handle_cast/2,
terminate/2
]).
-define(MB, 1 bsl 20).
go() ->
go("./junk.bin").
go(File) ->
go(File, 8192, 60, 4000).
go(File, LenMBs, ServerProcs, CallerProcs) ->
stop(),
LenBytes = write(File, LenMBs),
register(?MODULE, sup_start()),
Servers = spawn_servers(File, ServerProcs),
Callers = spawn_callers(Servers, LenBytes, CallerProcs),
sup_add(spawn(fun() -> stats() end)),
[Pid ! start_calling || Pid <- Callers],
started.
stop() ->
case whereis(?MODULE) of
undefined -> not_running;
Pid when is_pid(Pid) -> unlink(Pid), exit(Pid, kill), stopped
end.
write(File, LenMBs) ->
{ok, Fd} = file:open(File, [append, raw, binary]),
case already_there(File, LenMBs) of
true ->
LenMBs * ?MB;
false ->
ok = file:truncate(Fd),
LenBytes = write_blocks(Fd, rand:bytes(?MB), LenMBs),
ok = file:close(Fd),
LenBytes
end.
% Avoid waiting to write a new file every time
%
already_there(File, LenMBs) ->
case file:open(File, [read, raw, binary]) of
{ok, Fd} ->
try
case file:position(Fd, eof) of
{ok, Pos} ->
case Pos >= LenMBs * ?MB of
true ->
io:format("File ~p already there and >= ~p MBs~n", [File, LenMBs]),
true;
false ->
false
end;
_ ->
false
end
after
file:close(Fd)
end;
_ ->
false
end.
write_blocks(Fd, _, 0) ->
file:sync(Fd),
{ok, Bytes} = file:position(Fd, eof),
Bytes;
write_blocks(Fd, Block, N) ->
ok = file:write(Fd, Block),
write_blocks(Fd, Block, N-1).
spawn_servers(File, N) ->
lists:map(fun(_) ->
{ok, Pid} = gen_server:start(?MODULE, [File], []), Pid
end, lists:seq(1, N)).
spawn_callers(Servers, LenBytes, N) ->
[caller_start(Servers, LenBytes) || _ <- lists:seq(1, N)].
caller_start(Servers, LenBytes) ->
sup_add(spawn(fun() ->
rand:seed(default),
receive start_calling -> ok end,
caller_loop(shuffle(Servers), LenBytes)
end)).
caller_loop([S | Rest], LenBytes) ->
Pos = rand:uniform(LenBytes),
Size = (1 bsl 12) + rand:uniform(1 bsl 23),
_ = catch gen_server:call(S, {pread, Pos, Size}, infinity),
caller_loop(Rest ++ [S], LenBytes).
% The sup hack is just to keep everything linked together
% so it can be easily torn down and restarted
%
sup_start() ->
spawn_link(fun() -> process_flag(trap_exit, true), sup([]) end).
sup(Pids) ->
receive
{add, Pid, From} ->
link(Pid),
From ! {ok, self()},
sup([Pid | Pids]);
{'EXIT', Pid, _Reason} ->
sup(lists:delete(Pid, Pids))
end.
sup_add(Pid) ->
Sup = whereis(?MODULE),
Sup ! {add, Pid, self()},
receive {ok, Sup} -> ok end,
Pid.
shuffle(L) ->
[S || {_, S} <- lists:sort([{rand:uniform(), X} || X <- L])].
% This is somewhat similar to the _system Apache CouchDB endpoint
% is gathers metrics, in this case only process_info-s.
%
stats() ->
timer:sleep(500),
io:format("process_info:~n", []),
T0 = erlang:monotonic_time(millisecond),
Procs = length(get_stats(node())),
Dt = erlang:monotonic_time(millisecond) - T0,
io:format(" => ~p procs ~p msec~n", [Procs, Dt]),
stats().
get_stats(Node) ->
erpc:call(Node, ?MODULE, get_stats_int, []).
get_stats_int() ->
[catch(process_info(P, [message_queue_len, dictionary])) || P <- processes()].
% gen_server that's supposed to be a model of a couch_file gen_server
% Just do pread and some basic steps inside the pread call
%
init([File]) ->
{ok, Fd} = file:open(File, [read, raw, binary, append]),
sup_add(self()),
{ok, #{fd => Fd}}.
terminate(_Reason, #{fd := Fd}) ->
ok = file:close(Fd),
ok.
handle_call({pread, Pos, Len}, _From, #{fd := Fd} = St) ->
_ = file:pread(Fd, Pos, 4),
case file:pread(Fd, Pos + 4, Len) of
{ok, Bin} ->
Blocks = remove_block_prefixes(Pos rem 16#1000, Bin),
Bin1 = iolist_to_binary(Blocks),
{reply, Bin1, St};
eof ->
{reply, <<>>, St}
end.
handle_cast(Msg, #{} = St) ->
{stop, {unexpected_cast, Msg}, St}.
% Copied from couch_file. It removes block prefixes Not sure if this is
% relevant but since it's in the function in question, it was easy enough to
% use it to hopefully approximate the conditions better.
%
remove_block_prefixes(_BlockOffset, <<>>) ->
[];
remove_block_prefixes(0, <<_BlockPrefix, Rest/binary>>) ->
remove_block_prefixes(1, Rest);
remove_block_prefixes(BlockOffset, Bin) ->
BlockBytesAvailable = 16#1000 - BlockOffset,
case size(Bin) of
Size when Size > BlockBytesAvailable ->
<<DataBlock:BlockBytesAvailable/binary, Rest/binary>> = Bin,
[DataBlock | remove_block_prefixes(0, Rest)];
_Size ->
[Bin]
end.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment