Skip to content

Instantly share code, notes, and snippets.

@potatosalad
Last active August 25, 2018 15:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save potatosalad/a99a2c3ea9a11709930b50ebbe82bfd6 to your computer and use it in GitHub Desktop.
Save potatosalad/a99a2c3ea9a11709930b50ebbe82bfd6 to your computer and use it in GitHub Desktop.
Possible race condition in ETS

The registry_example.ex test spawns two child processes who both attempt to register themselves against a unique Registry.

Only one should succeed. However, there seem to be cases when :ets.insert_new/2 returns true for two independent processes.

Example reproduction of the bug:

# This is on Erlang/OTP 21.0 and Elixir 1.7.2
# However, I was able to confirm the same behavior on master for both.
elixirc registry_example.ex
elixir -e ':registry_example.testn(100000)'

This almost always fails for me (you may have to bump the number of iterations higher in your own experiments):

#Ref<0.1777803745.3065511938.171015> child 1 yes
#Ref<0.1777803745.3065511938.171015> child 2 yes
** (CaseClauseError) no case clause matching: {:EXIT, #PID<0.11957.0>, {{:case_clause, {#Reference<0.1777803745.3065511938.171015>, #PID<0.89.0>, :register}}, [{:registry_example, :"-test/0-fun-1-", 2, [file: 'registry_example.ex', line: 58]}]}}
    registry_example.ex:91: :registry_example.test/0
    registry_example.ex:7: :registry_example.testn/1
    (stdlib) erl_eval.erl:677: :erl_eval.do_apply/6
    (elixir) lib/code.ex:232: Code.eval_string/3

The output shows that both child 1 and child 2 received a :yes answer from calling:

Registry.register_name({Registry.ViaTest, "test"}, self())

This error completely disappears for me when the number of schedulers is set to 1:

elixir --erl '+S1:1' -e ':registry_example.testn(100000)'

The other ets_example.ex test is a condensed form of the bug involving ETS and 3 processes (1 parent and 2 children):

elixirc ets_example.ex
elixir -e ':ets_example.testn(100000)'

The error should look like this:

** (CaseClauseError) no case clause matching: {:EXIT, #PID<0.19797.3>, {{:case_clause, {#Reference<0.142296104.2799173633.138626>, #PID<0.89.0>, :register}}, [{:ets_example, :"-test/0-fun-1-", 2, [file: 'ets_example.ex', line: 48]}]}}
    ets_example.ex:82: :ets_example.test/0
    ets_example.ex:7: :ets_example.testn/1
    (stdlib) erl_eval.erl:677: :erl_eval.do_apply/6
    (elixir) lib/code.ex:232: Code.eval_string/3

The problem is also immediately present when using Concuerror:

./concuerror --pa "$(exenv prefix)/lib/elixir/ebin" -m ets_example -t test --treat_as_normal=killed --treat_as_normal=shutdown --show_races=true --graph=ets_example.dot

See the attached concuerror_report.txt and image of the race condition.

Concuerror 0.20.0+build.2149.refc040b0d started at 25 Aug 2018 09:44:12
Options:
[{after_timeout,infinity},
{assertions_only,false},
{assume_racing,true},
{depth_bound,500},
{disable_sleep_sets,false},
{dpor,optimal},
{entry_point,{ets_example,test,[]}},
{exclude_module,[]},
{first_process_errors_only,false},
{ignore_error,[]},
{instant_delivery,true},
{interleaving_bound,infinity},
{keep_going,false},
{non_racing_system,[]},
{pa,"/Users/andrew.bennett/.exenv/versions/1.7.2/lib/elixir/ebin"},
{print_depth,20},
{scheduling,round_robin},
{scheduling_bound_type,none},
{show_races,true},
{strict_scheduling,false},
{symbolic_names,true},
{timeout,5000},
{treat_as_normal,[killed,shutdown]},
{use_receive_patterns,true}]
################################################################################
Interleaving #1
--------------------------------------------------------------------------------
New races found:
* 6: <P.1>: true = ets:insert_new(reg, {key,<P.1>})
8: <P.2>: false = ets:insert_new(reg, {key,<P.2>})
* 8: <P.2>: false = ets:insert_new(reg, {key,<P.2>})
11: <P>: true = ets:delete(reg, key)
################################################################################
Interleaving #2
--------------------------------------------------------------------------------
Errors found:
* At step 15 process <P.2> exited abnormally
Reason:
{{case_clause,{#Ref<0.257385189.114819073.226546>,<P>,register}},
[{ets_example,'-test/0-fun-1-',2,
[{file,"/Users/andrew.bennett/Work/potatosalad/erlang/iyam/ets_example.ex"},
{line,48}]}]}
Stacktrace:
[{ets_example,'-test/0-fun-1-',2,
[{file,"/Users/andrew.bennett/Work/potatosalad/erlang/iyam/ets_example.ex"},
{line,48}]}]
* At step 21 process <P> exited abnormally
Reason:
{{case_clause,
{'EXIT',<P.2>,
{{case_clause,
{#Ref<0.257385189.114819073.226546>,<P>,register}},
[{ets_example,'-test/0-fun-1-',2,
[{file,
"/Users/andrew.bennett/Work/potatosalad/erlang/iyam/ets_example.ex"},
{line,48}]}]}}},
[{ets_example,test,0,
[{file,
"/Users/andrew.bennett/Work/potatosalad/erlang/iyam/ets_example.ex"},
{line,78}]}]}
Stacktrace:
[{ets_example,test,0,
[{file,"/Users/andrew.bennett/Work/potatosalad/erlang/iyam/ets_example.ex"},
{line,78}]}]
* At step 25 process <P.1> exited abnormally
Reason:
{{case_clause,
{'EXIT',<P.2>,
{{case_clause,
{#Ref<0.257385189.114819073.226546>,<P>,register}},
[{ets_example,'-test/0-fun-1-',2,
[{file,
"/Users/andrew.bennett/Work/potatosalad/erlang/iyam/ets_example.ex"},
{line,48}]}]}}},
[{ets_example,test,0,
[{file,
"/Users/andrew.bennett/Work/potatosalad/erlang/iyam/ets_example.ex"},
{line,78}]}]}
Stacktrace:
[]
--------------------------------------------------------------------------------
Event trace:
1: <P>: false = erlang:process_flag(trap_exit, true)
2: <P>: reg = ets:new(reg, [ordered_set,public,named_table,{read_concurrency,false}])
in ets_example.ex line 13
3: <P>: #Ref<0.257385189.114819073.226546> = erlang:make_ref()
in ets_example.ex line 15
4: <P>: <P.1> = erlang:spawn_link(erlang, apply, [#Fun<ets_example.1.17251196>,[]])
in erlang.erl line 2803
5: <P>: <P.2> = erlang:spawn_link(erlang, apply, [#Fun<ets_example.2.17251196>,[]])
in erlang.erl line 2803
6: <P.1>: true = ets:insert_new(reg, {key,<P.1>})
in ets_example.ex line 19
7: <P.1>: {#Ref<0.257385189.114819073.226546>,<P.1>,yes} = erlang:send(<P>, {#Ref<0.257385189.114819073.226546>,<P.1>,yes})
8: <P>: receives message ({#Ref<0.257385189.114819073.226546>,<P.1>,yes})
in ets_example.ex line 116
9: <P>: [{key,<P.1>}] = ets:lookup(reg, key)
in ets_example.ex line 69
10: <P>: true = ets:delete(reg, key)
in ets_example.ex line 70
11: <P>: {#Ref<0.257385189.114819073.226546>,<P>,register} = erlang:send(<P.2>, {#Ref<0.257385189.114819073.226546>,<P>,register})
12: <P.2>: true = ets:insert_new(reg, {key,<P.2>})
in ets_example.ex line 44
13: <P.2>: {#Ref<0.257385189.114819073.226546>,<P.2>,yes} = erlang:send(<P>, {#Ref<0.257385189.114819073.226546>,<P.2>,yes})
14: <P.2>: receives message ({#Ref<0.257385189.114819073.226546>,<P>,register})
in ets_example.ex line 116
15: <P.2>: exits abnormally ({{case_clause,{#Ref<0.257385189.114819073.226546>,<P>,register}},[{ets_example,'-test/0-fun-1-',2,[{file,[47,85,115,101,114,115,47,97,110|...]},{line,48}]}]})
16: <P.2>: true = erlang:exit(<P>, {{case_clause,{#Ref<0.257385189.114819073.226546>,<P>,register}},[{ets_example,'-test/0-fun-1-',2,[{file,[47,85,115,101,114,115,47,97,110|...]},{line,48}]}]})
(while exiting)
17: <P>: receives message ({#Ref<0.257385189.114819073.226546>,<P.2>,yes})
in ets_example.ex line 116
18: <P>: [{key,<P.2>}] = ets:lookup(reg, key)
in ets_example.ex line 75
19: <P>: {#Ref<0.257385189.114819073.226546>,stop} = erlang:send(<P.1>, {#Ref<0.257385189.114819073.226546>,stop})
20: <P>: receives message ({'EXIT',<P.2>,{{case_clause,{#Ref<0.257385189.114819073.226546>,<P>,register}},[{ets_example,'-test/0-fun-1-',2,[{file,[47,85,115,101,114,115|...]},{line,48}]}]}})
in ets_example.ex line 116
21: <P>: exits abnormally ({{case_clause,{'EXIT',<P.2>,{{case_clause,{#Ref<0.257385189.114819073.226546>,<P>,register}},[{ets_example,'-test/0-fun-1-',2,[{file,[47,85,115|...]},{line,48}]}]}}},[{ets_example,test,0,[{file,[47,85,115,101,114,115,47,97,110|...]},{line,78}]}]})
22: <P>: true = ets:delete(reg)
(while exiting)
23: <P>: true = erlang:exit(<P.1>, {{case_clause,{'EXIT',<P.2>,{{case_clause,{#Ref<0.257385189.114819073.226546>,<P>,register}},[{ets_example,'-test/0-fun-1-',2,[{file,[47,85,115|...]},{line,48}]}]}}},[{ets_example,test,0,[{file,[47,85,115,101,114,115,47,97,110|...]},{line,78}]}]})
(while exiting)
24: <P>: true = erlang:exit(<P.2>, {{case_clause,{'EXIT',<P.2>,{{case_clause,{#Ref<0.257385189.114819073.226546>,<P>,register}},[{ets_example,'-test/0-fun-1-',2,[{file,[47,85,115|...]},{line,48}]}]}}},[{ets_example,test,0,[{file,[47,85,115,101,114,115,47,97,110|...]},{line,78}]}]})
(while exiting)
25: <P.1>: exits abnormally ({{case_clause,{'EXIT',<P.2>,{{case_clause,{#Ref<0.257385189.114819073.226546>,<P>,register}},[{ets_example,'-test/0-fun-1-',2,[{file,[47,85,115|...]},{line,48}]}]}}},[{ets_example,test,0,[{file,[47,85,115,101,114,115,47,97,110|...]},{line,78}]}]})
26: <P.1>: true = erlang:exit(<P>, {{case_clause,{'EXIT',<P.2>,{{case_clause,{#Ref<0.257385189.114819073.226546>,<P>,register}},[{ets_example,'-test/0-fun-1-',2,[{file,[47,85,115|...]},{line,48}]}]}}},[{ets_example,test,0,[{file,[47,85,115,101,114,115,47,97,110|...]},{line,78}]}]})
(while exiting)
################################################################################
Exploration completed!
################################################################################
Errors:
--------------------------------------------------------------------------------
* Stop testing on first error. (Check '-h keep_going').
################################################################################
Tips:
--------------------------------------------------------------------------------
* Running without a scheduling_bound corresponds to verification and may take a long time.
* Increase '--print_depth' if output/graph contains "...".
* An abnormal exit signal killed a process. This is probably the worst thing that can happen race-wise, as any other side-effecting operation races with the arrival of the signal. If the test produces too many interleavings consider refactoring your code.
################################################################################
Info:
--------------------------------------------------------------------------------
* Showing progress (-h progress, for details)
* Writing results in concuerror_report.txt
* Writing graph in ets_example.dot
* Automatically instrumented module io_lib
* Showing PIDs as "<symbolic name(/last registered name)>" ('-h symbolic_names').
* Automatically instrumented module ets_example
* Automatically instrumented module erlang
################################################################################
Done at 25 Aug 2018 09:44:23 (Exit status: error)
Summary: 1 errors, 2/3 interleavings explored
defmodule :ets_example do
def testn(0) do
:ok
end
def testn(n) when is_integer(n) and n > 0 do
:ok = test()
testn(n - 1)
end
def test() do
_ = :erlang.process_flag(:trap_exit, true)
:reg = :ets.new(:reg, [:ordered_set, :public, :named_table, {:read_concurrency, false}])
parent = self()
tag = make_ref()
child1_pid =
spawn_link(fn ->
case :ets.insert_new(:reg, {:key, self()}) do
true ->
_ = send(parent, {tag, self(), :yes})
# :io.format('~p child 1 yes~n', [tag])
case receive_once() do
{^tag, :stop} ->
exit(:normal)
end
false ->
case receive_once() do
{^tag, ^parent, :register} ->
true = :ets.insert_new(:reg, {:key, self()})
_ = send(parent, {tag, self(), :yes})
case receive_once() do
{^tag, :stop} ->
exit(:normal)
end
end
end
end)
child2_pid =
spawn_link(fn ->
case :ets.insert_new(:reg, {:key, self()}) do
true ->
_ = send(parent, {tag, self(), :yes})
# :io.format('~p child 2 yes~n', [tag])
case receive_once() do
{^tag, :stop} ->
exit(:normal)
end
false ->
case receive_once() do
{^tag, ^parent, :register} ->
true = :ets.insert_new(:reg, {:key, self()})
_ = send(parent, {tag, self(), :yes})
case receive_once() do
{^tag, :stop} ->
exit(:normal)
end
end
end
end)
case receive_once() do
{^tag, ^child1_pid, :yes} ->
[{:key, ^child1_pid}] = :ets.lookup(:reg, :key)
true = :ets.delete(:reg, :key)
_ = send(child2_pid, {tag, self(), :register})
case receive_once() do
{^tag, ^child2_pid, :yes} ->
[{:key, ^child2_pid}] = :ets.lookup(:reg, :key)
_ = send(child1_pid, {tag, :stop})
case receive_once() do
{:EXIT, ^child1_pid, :normal} ->
_ = send(child2_pid, {tag, :stop})
case receive_once() do
{:EXIT, ^child2_pid, :normal} ->
true = :ets.delete(:reg)
:ok
end
end
end
{^tag, ^child2_pid, :yes} ->
[{:key, ^child2_pid}] = :ets.lookup(:reg, :key)
true = :ets.delete(:reg, :key)
_ = send(child1_pid, {tag, self(), :register})
case receive_once() do
{^tag, ^child1_pid, :yes} ->
[{:key, ^child1_pid}] = :ets.lookup(:reg, :key)
_ = send(child2_pid, {tag, :stop})
case receive_once() do
{:EXIT, ^child2_pid, :normal} ->
_ = send(child1_pid, {tag, :stop})
case receive_once() do
{:EXIT, ^child1_pid, :normal} ->
true = :ets.delete(:reg)
:ok
end
end
end
end
end
@doc false
defp receive_once() do
receive do
msg ->
msg
end
end
end
defmodule :registry_example do
def testn(0) do
:ok
end
def testn(n) when is_integer(n) and n > 0 do
:ok = test()
testn(n - 1)
end
def test() do
_ = :erlang.process_flag(:trap_exit, true)
parent = self()
tag = make_ref()
{:ok, registry_pid} = Registry.start_link(keys: :unique, name: Registry.ViaTest)
child1_pid =
spawn_link(fn ->
# NOTE: remove this line when testing with Concuerror
:erlang.yield()
case Registry.register_name({Registry.ViaTest, "test"}, self()) do
:yes ->
_ = send(parent, {tag, self(), :yes})
# NOTE: remove this line when testing with Concuerror
:io.format('~p child 1 yes~n', [tag])
case receive_once() do
{^tag, :stop} ->
exit(:normal)
end
:no ->
case receive_once() do
{^tag, ^parent, :register} ->
:yes = Registry.register_name({Registry.ViaTest, "test"}, self())
_ = send(parent, {tag, self(), :yes})
case receive_once() do
{^tag, :stop} ->
exit(:normal)
end
end
end
end)
child2_pid =
spawn_link(fn ->
# NOTE: remove this line when testing with Concuerror
:erlang.yield()
case Registry.register_name({Registry.ViaTest, "test"}, self()) do
:yes ->
_ = send(parent, {tag, self(), :yes})
# NOTE: remove this line when testing with Concuerror
:io.format('~p child 2 yes~n', [tag])
case receive_once() do
{^tag, :stop} ->
exit(:normal)
end
:no ->
case receive_once() do
{^tag, ^parent, :register} ->
:yes = Registry.register_name({Registry.ViaTest, "test"}, self())
_ = send(parent, {tag, self(), :yes})
case receive_once() do
{^tag, :stop} ->
exit(:normal)
end
end
end
end)
case receive_once() do
{^tag, ^child1_pid, :yes} ->
^child1_pid = Registry.whereis_name({Registry.ViaTest, "test"})
_ = send(child1_pid, {tag, :stop})
case receive_once() do
{:EXIT, ^child1_pid, :normal} ->
_ = send(child2_pid, {tag, self(), :register})
case receive_once() do
{^tag, ^child2_pid, :yes} ->
^child2_pid = Registry.whereis_name({Registry.ViaTest, "test"})
_ = send(child2_pid, {tag, :stop})
case receive_once() do
{:EXIT, ^child2_pid, :normal} ->
_ = :erlang.exit(registry_pid, :shutdown)
case receive_once() do
{:EXIT, ^registry_pid, :shutdown} ->
:ok
end
end
end
end
{^tag, ^child2_pid, :yes} ->
^child2_pid = Registry.whereis_name({Registry.ViaTest, "test"})
_ = send(child2_pid, {tag, :stop})
case receive_once() do
{:EXIT, ^child2_pid, :normal} ->
_ = send(child1_pid, {tag, self(), :register})
case receive_once() do
{^tag, ^child1_pid, :yes} ->
^child1_pid = Registry.whereis_name({Registry.ViaTest, "test"})
_ = send(child1_pid, {tag, :stop})
case receive_once() do
{:EXIT, ^child1_pid, :normal} ->
_ = :erlang.exit(registry_pid, :shutdown)
case receive_once() do
{:EXIT, ^registry_pid, :shutdown} ->
:ok
end
end
end
end
end
end
@doc false
defp receive_once() do
receive do
msg ->
msg
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment