Skip to content

Instantly share code, notes, and snippets.

@slfritchie
Last active December 17, 2015 14:29
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slfritchie/5624609 to your computer and use it in GitHub Desktop.
Save slfritchie/5624609 to your computer and use it in GitHub Desktop.
Erlang/OTP R15B0x and R16B patch for the +zdss flag.
Purpose: To prevent scheduler collapse by disabling scheduler sleep periods.
This patch is relative to R16B01 pre-release but may be applied (with "fuzz")
on R15B01 and R15B03-1 without difficulty.
The new flag, "+zdss", must be used with "+scl false" flag to be successful.
See the erl.xml document for a description. Example use:
erl +scl false +zdss 500:500
Note that this flag used to be called "+zdnfgtse"
See also:
* https://github.com/slfritchie/otp/compare/a70d09b6e...disable-scheduler-sleeps
* http://erlang.org/pipermail/erlang-questions/2013-April/073490.html
* http://erlang.org/pipermail/erlang-bugs/2013-May/003529.html
diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml
index a68e62d..528bbf3 100644
--- a/erts/doc/src/erl.xml
+++ b/erts/doc/src/erl.xml
@@ -1159,6 +1159,28 @@
give lower latency and higher throughput at the expense
of higher memory usage.</p>
</item>
+ <tag><marker id="+zdss"><c>+zdss usec1:usec2</c></marker></tag>
+ <item>
+ <p>Enable the 'disable scheduler sleep' flag and set
+ the two microsecond sleep constants used during idle periods.</p>
+ <p>During certain uncommon workloads, it is possible for
+ Erlang schedulers to go to sleep (for both speed and
+ energy efficiences) Use of this flag will disable the
+ ability of schedulers to go to sleep merely for being idle.</p>
+ <p>When schedulers are added and removed, e.g., during
+ virtual machine startup or by using the
+ <seealso
+ marker="erlang#system_flag_schedulers_online">erlang:system_flag(schedulers_online)</seealso>
+ BIF, scheduler sleep periods are mandatory. This flag
+ will not interfere with those sleep periods.</p>
+ <p>The two constants here specify sleep times (in
+ microseconds) for two places where schedulers would
+ otherwise sleep. Smaller values will lower latency during
+ low-stress time periods but (in trade) will consume more CPU
+ resources. Suggested values should be at least 500-2000
+ microseconds each, though experimentation is required to
+ find an acceptable CPU time vs. latency trade-off.</p>
+ </item>
</taglist>
</item>
</taglist>
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index 7dc59ea..ee3eefe 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -5587,6 +5587,13 @@ ok
connected via TCP/IP (the normal case) is the socket
actually used in communication with the specific node.</p>
</item>
+ <tag><marker id="system_info_do_not_sleep_constants"><c>do_not_sleep_constants</c></marker></tag>
+ <item>
+ <p>Returns the values of the two microsecond sleep
+ constants specified by the <seealso
+ marker="erts:erl#+zdss">+zdss</seealso> command
+ line flag to <c>erl</c>.</p>
+ </item>
<tag><c>driver_version</c></tag>
<item>
<p>Returns a string containing the erlang driver version
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 54eefe8..e95c2c2 100755
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -2567,6 +2567,20 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
hp = hsz ? HAlloc(BIF_P, hsz) : NULL;
res = erts_bld_uint(&hp, NULL, erts_dist_buf_busy_limit);
BIF_RET(res);
+ } else if (ERTS_IS_ATOM_STR("do_not_sleep_constants", BIF_ARG_1)) {
+#ifdef ERTS_SMP
+ if (!dss_enabled) {
+ BIF_RET(am_false);
+ } else {
+ Eterm *hp = HAlloc(BIF_P, 3);
+ res = TUPLE2(hp,
+ erts_make_integer(dss_sleep_m, BIF_P),
+ erts_make_integer(dss_sleep_n, BIF_P));
+ BIF_RET(res);
+ }
+#else
+ BIF_RET(am_false);
+#endif
} else if (ERTS_IS_ATOM_STR("print_ethread_info", BIF_ARG_1)) {
#if defined(ETHR_NATIVE_ATOMIC32_IMPL) \
|| defined(ETHR_NATIVE_ATOMIC64_IMPL) \
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index e6a96d4..40cb0e6 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -560,6 +560,9 @@ void erts_usage(void)
erts_fprintf(stderr, " see error_logger documentation for details\n");
erts_fprintf(stderr, "-zdbbl size set the distribution buffer busy limit in kilobytes\n");
erts_fprintf(stderr, " valid range is [1-%d]\n", INT_MAX/1024);
+ erts_fprintf(stderr, "-zdss N:M\n");
+ erts_fprintf(stderr, " disable scheduler sleeps and replace with\n");
+ erts_fprintf(stderr, " static N and M microsecond pauses.\n");
erts_fprintf(stderr, "\n");
erts_fprintf(stderr, "Note that if the emulator is started with erlexec (typically\n");
erts_fprintf(stderr, "from the erl script), these flags should be specified with +.\n");
@@ -886,6 +889,9 @@ early_init(int *argc, char **argv) /*
erts_ets_realloc_always_moves = 0;
erts_ets_always_compress = 0;
erts_dist_buf_busy_limit = ERTS_DE_BUSY_LIMIT;
+#ifdef ERTS_SMP
+ dss_enabled = 0;
+#endif
return ncpu;
}
@@ -1598,7 +1604,25 @@ erl_start(int argc, char **argv)
} else {
erts_dist_buf_busy_limit = new_limit*1024;
}
+ } else if (has_prefix("dss", sub_param)) {
+ useconds_t m, n;
+ char *arg = get_arg(sub_param+3, argv[i+1], &i);
+ switch (sscanf(arg, "%d:%d", &m, &n)) {
+ case 1:
+ n = m;
+ /* fall through */
+ case 2:
+#ifdef ERTS_SMP
+ dss_enabled = 1;
+ dss_sleep_m = m;
+ dss_sleep_n = n;
+#endif
+ break;
+ default:
+ goto bad_z_option;
+ }
} else {
+ bad_z_option:
erts_fprintf(stderr, "bad -z option %s\n", argv[i]);
erts_usage();
}
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 7415a57..68bc4fb 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -270,6 +270,10 @@ Eterm erts_system_monitor_long_gc;
Eterm erts_system_monitor_large_heap;
struct erts_system_monitor_flags_t erts_system_monitor_flags;
+int dss_enabled = 0;
+useconds_t dss_sleep_m = 1000;
+useconds_t dss_sleep_n = 2000;
+
/* system performance monitor */
Eterm erts_system_profile;
struct erts_system_profile_flags_t erts_system_profile_flags;
@@ -2306,6 +2310,10 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
if (aux_work)
flgs = erts_smp_atomic32_read_acqb(&ssi->flags);
else {
+ if (dss_enabled) {
+ usleep(dss_sleep_m);
+ break;
+ }
if (thr_prgr_active) {
erts_thr_progress_active(esdp, thr_prgr_active = 0);
sched_wall_time_change(esdp, 0);
@@ -2468,6 +2476,11 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
goto sys_poll_aux_work;
}
#ifdef ERTS_SMP
+ if (dss_enabled) {
+ usleep(dss_sleep_n);
+ erts_smp_runq_unlock(rq);
+ goto sys_woken;
+ } else
flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
if (!(flgs & ERTS_SSI_FLG_SLEEPING)) {
if (!(flgs & ERTS_SSI_FLG_WAITING)) {
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 5a1f6bb..e062308 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -1981,3 +1981,9 @@ erts_sched_poke(ErtsSchedulerSleepInfo *ssi)
void erl_halt(int code);
extern erts_smp_atomic32_t erts_halt_progress;
extern int erts_halt_code;
+
+#ifdef ERTS_SMP
+extern int dss_enabled;
+extern useconds_t dss_sleep_m;
+extern useconds_t dss_sleep_n;
+#endif
diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c
index 31d9b2e..4ce64ab 100644
--- a/erts/etc/common/erlexec.c
+++ b/erts/etc/common/erlexec.c
@@ -149,6 +149,7 @@ static char *plusr_val_switches[] = {
/* +z arguments with values */
static char *plusz_val_switches[] = {
"dbbl",
+ "dss",
NULL
};
diff --git a/erts/test/erlexec_SUITE.erl b/erts/test/erlexec_SUITE.erl
index 0dfe6c2..8317101 100644
--- a/erts/test/erlexec_SUITE.erl
+++ b/erts/test/erlexec_SUITE.erl
@@ -35,7 +35,7 @@
init_per_group/2,end_per_group/2,
init_per_testcase/2, end_per_testcase/2]).
--export([args_file/1, evil_args_file/1, env/1, args_file_env/1, otp_7461/1, otp_7461_remote/1, otp_8209/1, zdbbl_dist_buf_busy_limit/1]).
+-export([args_file/1, evil_args_file/1, env/1, args_file_env/1, otp_7461/1, otp_7461_remote/1, otp_8209/1, zdbbl_dist_buf_busy_limit/1, zdss_sleep_constants/1]).
-include_lib("test_server/include/test_server.hrl").
@@ -57,7 +57,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
all() ->
[args_file, evil_args_file, env, args_file_env,
- otp_7461, otp_8209, zdbbl_dist_buf_busy_limit].
+ otp_7461, otp_8209, zdbbl_dist_buf_busy_limit, zdss_sleep_constants].
groups() ->
[].
@@ -368,6 +368,26 @@ zdbbl_dist_buf_busy_limit(Config) when is_list(Config) ->
?line ok = cleanup_node(SNameS, 10),
ok.
+zdss_sleep_constants(doc) ->
+ ["Check +zdss flag"];
+zdss_sleep_constants(suite) ->
+ [];
+zdss_sleep_constants(Config) when is_list(Config) ->
+ M = 500,
+ N = 600,
+ ?line {ok,[[PName]]} = init:get_argument(progname),
+ ?line SNameS = "erlexec_test_03",
+ ?line SName = list_to_atom(SNameS++"@"++
+ hd(tl(string:tokens(atom_to_list(node()),"@")))),
+ ?line Cmd = PName ++ " -sname "++SNameS++" -setcookie "++
+ atom_to_list(erlang:get_cookie()) ++
+ " +zdss " ++ integer_to_list(M) ++ ":" ++ integer_to_list(N),
+ ?line open_port({spawn,Cmd},[]),
+ ?line pong = loop_ping(SName,40),
+ ?line {M, N} = rpc:call(SName,erlang,system_info,[do_not_sleep_constants]),
+ ?line ok = cleanup_node(SNameS, 10),
+ ok.
+
%%
%% Utils
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment