Skip to content

Instantly share code, notes, and snippets.

@rjarry
Last active June 25, 2020 19:45
Show Gist options
  • Save rjarry/a19629ae41668a46d44566b4ea16e0d4 to your computer and use it in GitHub Desktop.
Save rjarry/a19629ae41668a46d44566b4ea16e0d4 to your computer and use it in GitHub Desktop.
Tooling to reproduce a dead lock with sysrepo shared mutex
From 820aed8c8bc479b0866e6488a4bd980546547dda Mon Sep 17 00:00:00 2001
From: Robin Jarry <robin.jarry@6wind.com>
Date: Thu, 25 Jun 2020 10:37:32 +0200
Subject: mutex bug
First, build libyang and sysrepo
Then:
cd sysrepo/mutex-bug
make
And to reproduce the problem:
bash -x test.sh
After a few seconds, hit ctrl-c, you should get:
++ sysrepoctl -C
0
++ sysrepoctl -l
[ERR]: Locking a mutex failed (sr_connect: Connection timed out).
sysrepoctl error: Failed to connect (Timeout expired)
The problem does not appear every time. It seems to depend when ctrl-c
is hit. Also, it is very hard to reproduce with only a few yang modules
installed. This is why we install 300 dummy modules in the Makefile.
Only the first two are actually used by the applications.
After this error, the whole shared memory is corrupted. The only way to
run the test again is to clear it.
Signed-off-by: Robin Jarry <robin.jarry@6wind.com>
---
mutex-bug/Makefile | 12 +++
mutex-bug/example.yang | 48 ++++++++++
mutex-bug/main.c | 194 +++++++++++++++++++++++++++++++++++++++++
mutex-bug/test.sh | 50 +++++++++++
4 files changed, 304 insertions(+)
create mode 100644 mutex-bug/Makefile
create mode 100644 mutex-bug/example.yang
create mode 100644 mutex-bug/main.c
create mode 100755 mutex-bug/test.sh
diff --git a/mutex-bug/Makefile b/mutex-bug/Makefile
new file mode 100644
index 000000000000..580194909479
--- /dev/null
+++ b/mutex-bug/Makefile
@@ -0,0 +1,12 @@
+
+all: prog1 prog2
+all: $(shell seq -f '/etc/sysrepo/yang/example%.0f.yang' 300)
+
+/etc/sysrepo/yang/example%.yang: example.yang
+ sed 's/NNN/$*/' $< > $(@F)
+ sysrepoctl -i $(@F) -v2
+ rm $(@F)
+
+prog%: main.c
+ gcc -Wall -Werror -Wextra -pedantic -std=gnu99 \
+ -DMODULE_NUM=$* $< -o $@ -lsysrepo -lyang -levent
diff --git a/mutex-bug/example.yang b/mutex-bug/example.yang
new file mode 100644
index 000000000000..b6d3dbc772f6
--- /dev/null
+++ b/mutex-bug/example.yang
@@ -0,0 +1,48 @@
+module exampleNNN {
+ yang-version 1.1;
+ namespace "urn:example:NNN";
+ prefix exNNN;
+
+ container conf {
+ leaf enable-app {
+ type boolean;
+ }
+ }
+
+ container state {
+ config false;
+ leaf hostname {
+ type string;
+ }
+ leaf size {
+ type uint32;
+ }
+ container network {
+ list interface {
+ key name;
+ leaf name {
+ type string;
+ }
+ leaf address {
+ type string;
+ }
+ }
+ }
+ }
+
+ rpc reboot {
+ input {
+ leaf delay {
+ type uint64;
+ }
+ }
+ }
+
+ rpc poweroff {
+ input {
+ leaf delay {
+ type uint64;
+ }
+ }
+ }
+}
diff --git a/mutex-bug/main.c b/mutex-bug/main.c
new file mode 100644
index 000000000000..bbda2906cabf
--- /dev/null
+++ b/mutex-bug/main.c
@@ -0,0 +1,194 @@
+#include <unistd.h>
+#include <signal.h>
+#include <string.h>
+
+#include <event.h>
+#include <libyang/libyang.h>
+#include <sysrepo.h>
+
+#ifndef MODULE_NUM
+#error "MODULE_NUM is not defined"
+#endif
+#if (MODULE_NUM == 1)
+#define MODULE_NAME "example1"
+#elif (MODULE_NUM == 2)
+#define MODULE_NAME "example2"
+#else
+#error "Invalid value for MODULE_NUM"
+#endif
+
+static int
+module_change_cb(sr_session_ctx_t *session, const char *module_name,
+ const char *xpath, sr_event_t event, uint32_t request_id, void *private_data)
+{
+ struct lyd_node *conf = NULL;
+ int rc = SR_ERR_OK;
+
+ (void)module_name;
+ (void)request_id;
+ (void)private_data;
+
+ if (event != SR_EV_DONE)
+ goto end;
+
+ printf("module_change_cb xpath=%s\n", xpath);
+ rc = sr_get_subtree(session, xpath, 0, &conf);
+ if (conf)
+ lyd_print_file(stdout, conf, LYD_XML, LYP_FORMAT);
+
+#if (MODULE_NUM == 1)
+ sr_change_iter_t *it = NULL;
+ sr_change_oper_t oper;
+ sr_val_t *old_value = NULL;
+ sr_val_t *new_value = NULL;
+ rc = sr_get_changes_iter(session, "//." , &it);
+ if (rc)
+ goto end;
+ while ((rc = sr_get_change_next(session, it, &oper, &old_value, &new_value)) == SR_ERR_OK) {
+ if (!strcmp(new_value->xpath, "/example1:conf/enable-app")) {
+ if (new_value->data.bool_val) {
+ printf("starting prog2\n");
+ system("./prog2 &");
+ } else {
+ printf("stopping prog2\n");
+ system("killall -s SIGINT prog2");
+ }
+ }
+ sr_free_val(old_value);
+ sr_free_val(new_value);
+ }
+ sr_free_change_iter(it);
+ if (rc == SR_ERR_NOT_FOUND)
+ rc = SR_ERR_OK;
+#endif
+
+end:
+ lyd_free_withsiblings(conf);
+ return rc;
+}
+
+static int
+rpc_cb(sr_session_ctx_t *session, const char *op_path,
+ const struct lyd_node *input, sr_event_t event, uint32_t request_id,
+ struct lyd_node *output, void *private_data)
+{
+ (void)session;
+ (void)input;
+ (void)event;
+ (void)request_id;
+ (void)output;
+ (void)private_data;
+ printf("rpc_cb %s\n", op_path);
+ lyd_print_file(stdout, input, LYD_XML, LYP_FORMAT);
+ return SR_ERR_OK;
+}
+
+static int
+get_items_cb(sr_session_ctx_t *session, const char *module_name,
+ const char *path, const char *request_xpath, uint32_t request_id,
+ struct lyd_node **parent, void *private_data)
+{
+ const struct ly_ctx *ctx = NULL;
+ (void)session;
+ (void)module_name;
+ (void)path;
+ (void)request_id;
+ (void)private_data;
+
+ printf("get_items_cb %s\n", request_xpath);
+
+ ctx = sr_get_context(sr_session_get_connection(session));
+
+ *parent = lyd_new_path(NULL, ctx, path, NULL, 0, 0);
+ lyd_new_path(*parent, NULL, "hostname", "foobar", 0, 0);
+ lyd_new_path(*parent, NULL, "size", "12", 0, 0);
+ lyd_new_path(*parent, NULL, "network/interface[name='eth0']/address", "1.2.3.4/24", 0, 0);
+ lyd_new_path(*parent, NULL, "network/interface[name='eth1']/address", "4.3.2.1/24", 0, 0);
+
+ return SR_ERR_OK;
+}
+
+static void
+signal_cb(evutil_socket_t fd, short what, void *arg)
+{
+ (void)fd;
+ (void)what;
+ struct event_base *evt_base = arg;
+ event_base_loopbreak(evt_base);
+}
+
+static void
+sub_ready_cb(evutil_socket_t fd, short what, void *arg)
+{
+ (void)fd;
+ (void)what;
+ sr_subscription_ctx_t *sub = arg;
+ sr_process_events(sub, NULL, NULL);
+}
+
+int main(void)
+{
+ sr_conn_ctx_t *connection = NULL;
+ sr_session_ctx_t *session = NULL;
+ sr_subscription_ctx_t *data_sub = NULL, *rpc_sub = NULL;
+ struct event_base *evt_base;
+ int data_sub_fd, rpc_sub_fd;
+ int rc = SR_ERR_OK;
+
+ sr_log_stderr(SR_LL_INF);
+
+ rc = sr_connect(0, &connection);
+ if (rc)
+ goto end;
+ rc = sr_session_start(connection, SR_DS_RUNNING, &session);
+ if (rc)
+ goto end;
+ rc = sr_oper_get_items_subscribe(
+ session, MODULE_NAME, "/"MODULE_NAME":state", get_items_cb, NULL,
+ SR_SUBSCR_CTX_REUSE | SR_SUBSCR_NO_THREAD, &data_sub);
+ if (rc)
+ goto end;
+ rc = sr_module_change_subscribe(
+ session, MODULE_NAME, "/"MODULE_NAME":conf", module_change_cb, NULL,
+ 0, SR_SUBSCR_CTX_REUSE | SR_SUBSCR_NO_THREAD | SR_SUBSCR_DONE_ONLY, &data_sub);
+ if (rc)
+ goto end;
+ rc = sr_rpc_subscribe_tree(
+ session, "/"MODULE_NAME":reboot", rpc_cb, NULL,
+ 0, SR_SUBSCR_CTX_REUSE | SR_SUBSCR_NO_THREAD, &rpc_sub);
+ if (rc)
+ goto end;
+ rc = sr_rpc_subscribe_tree(
+ session, "/"MODULE_NAME":poweroff", rpc_cb, NULL,
+ 0, SR_SUBSCR_CTX_REUSE | SR_SUBSCR_NO_THREAD, &rpc_sub);
+ if (rc)
+ goto end;
+
+ rc = sr_get_event_pipe(data_sub, &data_sub_fd);
+ if (rc)
+ goto end;
+ rc = sr_get_event_pipe(rpc_sub, &rpc_sub_fd);
+ if (rc)
+ goto end;
+
+ evt_base = event_base_new();
+ event_add(event_new(
+ evt_base, SIGINT, EV_SIGNAL | EV_PERSIST, signal_cb, evt_base), NULL);
+ event_add(event_new(
+ evt_base, data_sub_fd, EV_READ | EV_PERSIST, sub_ready_cb, data_sub), NULL);
+ event_add(event_new(
+ evt_base, rpc_sub_fd, EV_READ | EV_PERSIST, sub_ready_cb, rpc_sub), NULL);
+
+ event_base_dispatch(evt_base);
+
+end:
+ if (rpc_sub)
+ sr_unsubscribe(rpc_sub);
+ if (data_sub)
+ sr_unsubscribe(data_sub);
+ if (session)
+ sr_session_stop(session);
+ if (connection)
+ sr_disconnect(connection);
+ return rc == SR_ERR_OK ? 0 : 1;
+}
diff --git a/mutex-bug/test.sh b/mutex-bug/test.sh
new file mode 100755
index 000000000000..badeba7ec76f
--- /dev/null
+++ b/mutex-bug/test.sh
@@ -0,0 +1,50 @@
+#!/bin/sh
+
+tmpfile=$(mktemp)
+trap "rm -f -- $tmpfile; sysrepoctl -C; sysrepoctl -l" EXIT
+
+dotest() {
+ ./prog1 &
+
+ sleep 1
+
+ sysrepocfg -d operational -x /example1:state -f xml -X
+ cat >$tmpfile <<EOF
+<conf xmlns="urn:example:1">
+ <enable-app>true</enable-app>
+</conf>
+EOF
+ sysrepocfg -w -f xml -m example1 -I$tmpfile
+
+ cat >$tmpfile <<EOF
+<reboot xmlns="urn:example:1">
+ <delay>5</delay>
+</reboot>
+EOF
+ sysrepocfg -f xml -R$tmpfile
+
+ cat >$tmpfile <<EOF
+<poweroff xmlns="urn:example:1">
+ <delay>2</delay>
+</poweroff>
+EOF
+ sysrepocfg -f xml -R$tmpfile
+
+ sysrepocfg -d operational -x /example2:state -f xml -X
+
+ cat >$tmpfile <<EOF
+<conf xmlns="urn:example:1">
+ <enable-app>false</enable-app>
+</conf>
+EOF
+ sysrepocfg -w -f xml -m example1 -I$tmpfile
+
+ kill -INT %1
+ wait
+}
+
+rm -rf /dev/shm/sr*
+
+while true; do
+ dotest
+done
--
2.25.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment