Last active
June 25, 2020 19:45
-
-
Save rjarry/a19629ae41668a46d44566b4ea16e0d4 to your computer and use it in GitHub Desktop.
Tooling to reproduce a dead lock with sysrepo shared mutex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 820aed8c8bc479b0866e6488a4bd980546547dda Mon Sep 17 00:00:00 2001 | |
From: Robin Jarry <robin.jarry@6wind.com> | |
Date: Thu, 25 Jun 2020 10:37:32 +0200 | |
Subject: mutex bug | |
First, build libyang and sysrepo | |
Then: | |
cd sysrepo/mutex-bug | |
make | |
And to reproduce the problem: | |
bash -x test.sh | |
After a few seconds, hit ctrl-c, you should get: | |
++ sysrepoctl -C | |
0 | |
++ sysrepoctl -l | |
[ERR]: Locking a mutex failed (sr_connect: Connection timed out). | |
sysrepoctl error: Failed to connect (Timeout expired) | |
The problem does not appear every time. It seems to depend when ctrl-c | |
is hit. Also, it is very hard to reproduce with only a few yang modules | |
installed. This is why we install 300 dummy modules in the Makefile. | |
Only the first two are actually used by the applications. | |
After this error, the whole shared memory is corrupted. The only way to | |
run the test again is to clear it. | |
Signed-off-by: Robin Jarry <robin.jarry@6wind.com> | |
--- | |
mutex-bug/Makefile | 12 +++ | |
mutex-bug/example.yang | 48 ++++++++++ | |
mutex-bug/main.c | 194 +++++++++++++++++++++++++++++++++++++++++ | |
mutex-bug/test.sh | 50 +++++++++++ | |
4 files changed, 304 insertions(+) | |
create mode 100644 mutex-bug/Makefile | |
create mode 100644 mutex-bug/example.yang | |
create mode 100644 mutex-bug/main.c | |
create mode 100755 mutex-bug/test.sh | |
diff --git a/mutex-bug/Makefile b/mutex-bug/Makefile | |
new file mode 100644 | |
index 000000000000..580194909479 | |
--- /dev/null | |
+++ b/mutex-bug/Makefile | |
@@ -0,0 +1,12 @@ | |
+ | |
+all: prog1 prog2 | |
+all: $(shell seq -f '/etc/sysrepo/yang/example%.0f.yang' 300) | |
+ | |
+/etc/sysrepo/yang/example%.yang: example.yang | |
+ sed 's/NNN/$*/' $< > $(@F) | |
+ sysrepoctl -i $(@F) -v2 | |
+ rm $(@F) | |
+ | |
+prog%: main.c | |
+ gcc -Wall -Werror -Wextra -pedantic -std=gnu99 \ | |
+ -DMODULE_NUM=$* $< -o $@ -lsysrepo -lyang -levent | |
diff --git a/mutex-bug/example.yang b/mutex-bug/example.yang | |
new file mode 100644 | |
index 000000000000..b6d3dbc772f6 | |
--- /dev/null | |
+++ b/mutex-bug/example.yang | |
@@ -0,0 +1,48 @@ | |
+module exampleNNN { | |
+ yang-version 1.1; | |
+ namespace "urn:example:NNN"; | |
+ prefix exNNN; | |
+ | |
+ container conf { | |
+ leaf enable-app { | |
+ type boolean; | |
+ } | |
+ } | |
+ | |
+ container state { | |
+ config false; | |
+ leaf hostname { | |
+ type string; | |
+ } | |
+ leaf size { | |
+ type uint32; | |
+ } | |
+ container network { | |
+ list interface { | |
+ key name; | |
+ leaf name { | |
+ type string; | |
+ } | |
+ leaf address { | |
+ type string; | |
+ } | |
+ } | |
+ } | |
+ } | |
+ | |
+ rpc reboot { | |
+ input { | |
+ leaf delay { | |
+ type uint64; | |
+ } | |
+ } | |
+ } | |
+ | |
+ rpc poweroff { | |
+ input { | |
+ leaf delay { | |
+ type uint64; | |
+ } | |
+ } | |
+ } | |
+} | |
diff --git a/mutex-bug/main.c b/mutex-bug/main.c | |
new file mode 100644 | |
index 000000000000..bbda2906cabf | |
--- /dev/null | |
+++ b/mutex-bug/main.c | |
@@ -0,0 +1,194 @@ | |
+#include <unistd.h> | |
+#include <signal.h> | |
+#include <string.h> | |
+ | |
+#include <event.h> | |
+#include <libyang/libyang.h> | |
+#include <sysrepo.h> | |
+ | |
+#ifndef MODULE_NUM | |
+#error "MODULE_NUM is not defined" | |
+#endif | |
+#if (MODULE_NUM == 1) | |
+#define MODULE_NAME "example1" | |
+#elif (MODULE_NUM == 2) | |
+#define MODULE_NAME "example2" | |
+#else | |
+#error "Invalid value for MODULE_NUM" | |
+#endif | |
+ | |
+static int | |
+module_change_cb(sr_session_ctx_t *session, const char *module_name, | |
+ const char *xpath, sr_event_t event, uint32_t request_id, void *private_data) | |
+{ | |
+ struct lyd_node *conf = NULL; | |
+ int rc = SR_ERR_OK; | |
+ | |
+ (void)module_name; | |
+ (void)request_id; | |
+ (void)private_data; | |
+ | |
+ if (event != SR_EV_DONE) | |
+ goto end; | |
+ | |
+ printf("module_change_cb xpath=%s\n", xpath); | |
+ rc = sr_get_subtree(session, xpath, 0, &conf); | |
+ if (conf) | |
+ lyd_print_file(stdout, conf, LYD_XML, LYP_FORMAT); | |
+ | |
+#if (MODULE_NUM == 1) | |
+ sr_change_iter_t *it = NULL; | |
+ sr_change_oper_t oper; | |
+ sr_val_t *old_value = NULL; | |
+ sr_val_t *new_value = NULL; | |
+ rc = sr_get_changes_iter(session, "//." , &it); | |
+ if (rc) | |
+ goto end; | |
+ while ((rc = sr_get_change_next(session, it, &oper, &old_value, &new_value)) == SR_ERR_OK) { | |
+ if (!strcmp(new_value->xpath, "/example1:conf/enable-app")) { | |
+ if (new_value->data.bool_val) { | |
+ printf("starting prog2\n"); | |
+ system("./prog2 &"); | |
+ } else { | |
+ printf("stopping prog2\n"); | |
+ system("killall -s SIGINT prog2"); | |
+ } | |
+ } | |
+ sr_free_val(old_value); | |
+ sr_free_val(new_value); | |
+ } | |
+ sr_free_change_iter(it); | |
+ if (rc == SR_ERR_NOT_FOUND) | |
+ rc = SR_ERR_OK; | |
+#endif | |
+ | |
+end: | |
+ lyd_free_withsiblings(conf); | |
+ return rc; | |
+} | |
+ | |
+static int | |
+rpc_cb(sr_session_ctx_t *session, const char *op_path, | |
+ const struct lyd_node *input, sr_event_t event, uint32_t request_id, | |
+ struct lyd_node *output, void *private_data) | |
+{ | |
+ (void)session; | |
+ (void)input; | |
+ (void)event; | |
+ (void)request_id; | |
+ (void)output; | |
+ (void)private_data; | |
+ printf("rpc_cb %s\n", op_path); | |
+ lyd_print_file(stdout, input, LYD_XML, LYP_FORMAT); | |
+ return SR_ERR_OK; | |
+} | |
+ | |
+static int | |
+get_items_cb(sr_session_ctx_t *session, const char *module_name, | |
+ const char *path, const char *request_xpath, uint32_t request_id, | |
+ struct lyd_node **parent, void *private_data) | |
+{ | |
+ const struct ly_ctx *ctx = NULL; | |
+ (void)session; | |
+ (void)module_name; | |
+ (void)path; | |
+ (void)request_id; | |
+ (void)private_data; | |
+ | |
+ printf("get_items_cb %s\n", request_xpath); | |
+ | |
+ ctx = sr_get_context(sr_session_get_connection(session)); | |
+ | |
+ *parent = lyd_new_path(NULL, ctx, path, NULL, 0, 0); | |
+ lyd_new_path(*parent, NULL, "hostname", "foobar", 0, 0); | |
+ lyd_new_path(*parent, NULL, "size", "12", 0, 0); | |
+ lyd_new_path(*parent, NULL, "network/interface[name='eth0']/address", "1.2.3.4/24", 0, 0); | |
+ lyd_new_path(*parent, NULL, "network/interface[name='eth1']/address", "4.3.2.1/24", 0, 0); | |
+ | |
+ return SR_ERR_OK; | |
+} | |
+ | |
+static void | |
+signal_cb(evutil_socket_t fd, short what, void *arg) | |
+{ | |
+ (void)fd; | |
+ (void)what; | |
+ struct event_base *evt_base = arg; | |
+ event_base_loopbreak(evt_base); | |
+} | |
+ | |
+static void | |
+sub_ready_cb(evutil_socket_t fd, short what, void *arg) | |
+{ | |
+ (void)fd; | |
+ (void)what; | |
+ sr_subscription_ctx_t *sub = arg; | |
+ sr_process_events(sub, NULL, NULL); | |
+} | |
+ | |
+int main(void) | |
+{ | |
+ sr_conn_ctx_t *connection = NULL; | |
+ sr_session_ctx_t *session = NULL; | |
+ sr_subscription_ctx_t *data_sub = NULL, *rpc_sub = NULL; | |
+ struct event_base *evt_base; | |
+ int data_sub_fd, rpc_sub_fd; | |
+ int rc = SR_ERR_OK; | |
+ | |
+ sr_log_stderr(SR_LL_INF); | |
+ | |
+ rc = sr_connect(0, &connection); | |
+ if (rc) | |
+ goto end; | |
+ rc = sr_session_start(connection, SR_DS_RUNNING, &session); | |
+ if (rc) | |
+ goto end; | |
+ rc = sr_oper_get_items_subscribe( | |
+ session, MODULE_NAME, "/"MODULE_NAME":state", get_items_cb, NULL, | |
+ SR_SUBSCR_CTX_REUSE | SR_SUBSCR_NO_THREAD, &data_sub); | |
+ if (rc) | |
+ goto end; | |
+ rc = sr_module_change_subscribe( | |
+ session, MODULE_NAME, "/"MODULE_NAME":conf", module_change_cb, NULL, | |
+ 0, SR_SUBSCR_CTX_REUSE | SR_SUBSCR_NO_THREAD | SR_SUBSCR_DONE_ONLY, &data_sub); | |
+ if (rc) | |
+ goto end; | |
+ rc = sr_rpc_subscribe_tree( | |
+ session, "/"MODULE_NAME":reboot", rpc_cb, NULL, | |
+ 0, SR_SUBSCR_CTX_REUSE | SR_SUBSCR_NO_THREAD, &rpc_sub); | |
+ if (rc) | |
+ goto end; | |
+ rc = sr_rpc_subscribe_tree( | |
+ session, "/"MODULE_NAME":poweroff", rpc_cb, NULL, | |
+ 0, SR_SUBSCR_CTX_REUSE | SR_SUBSCR_NO_THREAD, &rpc_sub); | |
+ if (rc) | |
+ goto end; | |
+ | |
+ rc = sr_get_event_pipe(data_sub, &data_sub_fd); | |
+ if (rc) | |
+ goto end; | |
+ rc = sr_get_event_pipe(rpc_sub, &rpc_sub_fd); | |
+ if (rc) | |
+ goto end; | |
+ | |
+ evt_base = event_base_new(); | |
+ event_add(event_new( | |
+ evt_base, SIGINT, EV_SIGNAL | EV_PERSIST, signal_cb, evt_base), NULL); | |
+ event_add(event_new( | |
+ evt_base, data_sub_fd, EV_READ | EV_PERSIST, sub_ready_cb, data_sub), NULL); | |
+ event_add(event_new( | |
+ evt_base, rpc_sub_fd, EV_READ | EV_PERSIST, sub_ready_cb, rpc_sub), NULL); | |
+ | |
+ event_base_dispatch(evt_base); | |
+ | |
+end: | |
+ if (rpc_sub) | |
+ sr_unsubscribe(rpc_sub); | |
+ if (data_sub) | |
+ sr_unsubscribe(data_sub); | |
+ if (session) | |
+ sr_session_stop(session); | |
+ if (connection) | |
+ sr_disconnect(connection); | |
+ return rc == SR_ERR_OK ? 0 : 1; | |
+} | |
diff --git a/mutex-bug/test.sh b/mutex-bug/test.sh | |
new file mode 100755 | |
index 000000000000..badeba7ec76f | |
--- /dev/null | |
+++ b/mutex-bug/test.sh | |
@@ -0,0 +1,50 @@ | |
+#!/bin/sh | |
+ | |
+tmpfile=$(mktemp) | |
+trap "rm -f -- $tmpfile; sysrepoctl -C; sysrepoctl -l" EXIT | |
+ | |
+dotest() { | |
+ ./prog1 & | |
+ | |
+ sleep 1 | |
+ | |
+ sysrepocfg -d operational -x /example1:state -f xml -X | |
+ cat >$tmpfile <<EOF | |
+<conf xmlns="urn:example:1"> | |
+ <enable-app>true</enable-app> | |
+</conf> | |
+EOF | |
+ sysrepocfg -w -f xml -m example1 -I$tmpfile | |
+ | |
+ cat >$tmpfile <<EOF | |
+<reboot xmlns="urn:example:1"> | |
+ <delay>5</delay> | |
+</reboot> | |
+EOF | |
+ sysrepocfg -f xml -R$tmpfile | |
+ | |
+ cat >$tmpfile <<EOF | |
+<poweroff xmlns="urn:example:1"> | |
+ <delay>2</delay> | |
+</poweroff> | |
+EOF | |
+ sysrepocfg -f xml -R$tmpfile | |
+ | |
+ sysrepocfg -d operational -x /example2:state -f xml -X | |
+ | |
+ cat >$tmpfile <<EOF | |
+<conf xmlns="urn:example:1"> | |
+ <enable-app>false</enable-app> | |
+</conf> | |
+EOF | |
+ sysrepocfg -w -f xml -m example1 -I$tmpfile | |
+ | |
+ kill -INT %1 | |
+ wait | |
+} | |
+ | |
+rm -rf /dev/shm/sr* | |
+ | |
+while true; do | |
+ dotest | |
+done | |
-- | |
2.25.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment