Skip to content

Instantly share code, notes, and snippets.

@agentzh
Last active June 20, 2020 07:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save agentzh/932e7a32bb508da1d6cbe7ef65994000 to your computer and use it in GitHub Desktop.
Save agentzh/932e7a32bb508da1d6cbe7ef65994000 to your computer and use it in GitHub Desktop.
commit 41f8450cb340aa2968fb57d9192fd649c9f8b98f
Author: Yichun Zhang (agentzh) <yichun@openresty.com>
Date: Sat Jun 20 00:37:49 2020 -0700
bugfix: relay_v2: there were no writer <=> reader locking protections which might lead to corrupted data (like "\0") and garbage data in the staprun output.
diff --git a/runtime/print_flush.c b/runtime/print_flush.c
index 18b3e8f62..5b28c4522 100644
--- a/runtime/print_flush.c
+++ b/runtime/print_flush.c
@@ -37,7 +37,19 @@ void stp_print_flush(_stp_pbuf *pb)
#ifdef STP_BULKMODE
#ifdef NO_PERCPU_HEADERS
{
+ struct context* __restrict__ c = NULL;
char *bufp = pb->buf;
+ int inode_locked;
+
+ c = _stp_runtime_entryfn_get_context();
+
+ if (!(inode_locked = _stp_transport_trylock_relay_inode())) {
+ atomic_inc (&_stp_transport_failures);
+#ifndef STP_TRANSPORT_RISKY
+ _stp_runtime_entryfn_put_context(c);
+ return;
+#endif
+ }
while (len > 0) {
size_t bytes_reserved;
@@ -55,15 +67,32 @@ void stp_print_flush(_stp_pbuf *pb)
break;
}
}
+
+ if (inode_locked)
+ _stp_transport_unlock_relay_inode();
+
+ _stp_runtime_entryfn_put_context(c);
}
#else /* !NO_PERCPU_HEADERS */
{
+ struct context* __restrict__ c = NULL;
char *bufp = pb->buf;
struct _stp_trace t = { .sequence = _stp_seq_inc(),
.pdu_len = len};
size_t bytes_reserved;
+ int inode_locked;
+
+ c = _stp_runtime_entryfn_get_context();
+
+ if (!(inode_locked = _stp_transport_trylock_relay_inode())) {
+ atomic_inc (&_stp_transport_failures);
+#ifndef STP_TRANSPORT_RISKY
+ _stp_runtime_entryfn_put_context(c);
+ return;
+#endif
+ }
bytes_reserved = _stp_data_write_reserve(sizeof(struct _stp_trace), &entry);
if (likely(entry && bytes_reserved > 0)) {
@@ -73,7 +102,7 @@ void stp_print_flush(_stp_pbuf *pb)
}
else {
atomic_inc(&_stp_transport_failures);
- return;
+ goto done;
}
while (len > 0) {
@@ -90,6 +119,13 @@ void stp_print_flush(_stp_pbuf *pb)
break;
}
}
+
+ done:
+
+ if (inode_locked)
+ _stp_transport_unlock_relay_inode();
+
+ _stp_runtime_entryfn_put_context(c);
}
#endif /* !NO_PERCPU_HEADERS */
@@ -110,6 +146,7 @@ void stp_print_flush(_stp_pbuf *pb)
unsigned long flags;
struct context* __restrict__ c = NULL;
char *bufp = pb->buf;
+ int inode_locked;
/* Prevent probe reentrancy on _stp_print_lock.
*
@@ -129,6 +166,15 @@ void stp_print_flush(_stp_pbuf *pb)
*/
c = _stp_runtime_entryfn_get_context();
+ if (!(inode_locked = _stp_transport_trylock_relay_inode())) {
+ atomic_inc (&_stp_transport_failures);
+#ifndef STP_TRANSPORT_RISKY
+ //errk("discarding %zu bytes of data\n", len);
+ _stp_runtime_entryfn_put_context(c);
+ return;
+#endif
+ }
+
dbug_trans(1, "calling _stp_data_write...\n");
stp_spin_lock_irqsave(&_stp_print_lock, flags);
while (len > 0) {
@@ -148,6 +194,10 @@ void stp_print_flush(_stp_pbuf *pb)
}
}
stp_spin_unlock_irqrestore(&_stp_print_lock, flags);
+
+ if (inode_locked)
+ _stp_transport_unlock_relay_inode();
+
_stp_runtime_entryfn_put_context(c);
}
#endif /* STP_TRANSPORT_VERSION != 1 */
diff --git a/runtime/transport/relay_v2.c b/runtime/transport/relay_v2.c
index 135951a8e..853d32744 100644
--- a/runtime/transport/relay_v2.c
+++ b/runtime/transport/relay_v2.c
@@ -269,12 +269,44 @@ static void _stp_transport_data_fs_start(void)
static void _stp_transport_data_fs_stop(void)
{
+
if (atomic_read (&_stp_relay_data.transport_state) == STP_TRANSPORT_RUNNING) {
atomic_set (&_stp_relay_data.transport_state, STP_TRANSPORT_STOPPED);
del_timer_sync(&_stp_relay_data.timer);
dbug_trans(0, "flushing...\n");
- if (_stp_relay_data.rchan)
- relay_flush(_stp_relay_data.rchan);
+ if (_stp_relay_data.rchan) {
+ struct rchan_buf *buf;
+
+ // NB we cannot call relay_flush() directly here since
+ // we need to do inode locking ourselves.
+
+#ifdef STP_BULKMODE
+ unsigned int i;
+ struct rchan *rchan = _stp_relay_data.rchan;
+
+ for_each_possible_cpu(i) {
+ if ((buf = *per_cpu_ptr(rchan->buf, i))) {
+ struct inode *inode = buf->dentry->d_inode;
+ inode_lock(inode);
+ relay_switch_subbuf(buf, 0);
+ inode_unlock(inode);
+ }
+ }
+#else /* !STP_BULKMODE */
+ buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, 0);
+
+ if (buf != NULL) {
+ struct inode *inode = buf->dentry->d_inode;
+
+ /* NB we are in the syscall context which allows
+ * sleeping. The following inode locking might
+ * sleep. See PR26131. */
+ inode_lock(inode);
+ relay_switch_subbuf(buf, 0);
+ inode_unlock(inode);
+ }
+#endif
+ }
}
}
@@ -403,7 +435,12 @@ _stp_data_write_reserve(size_t size_request, void **entry)
return -EINVAL;
buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf,
- smp_processor_id());
+#ifdef STP_BULKMODE
+ smp_processor_id()
+#else
+ 0
+#endif
+ );
if (unlikely(buf->offset + size_request > buf->chan->subbuf_size)) {
size_request = __stp_relay_switch_subbuf(buf, size_request);
if (!size_request)
@@ -426,3 +463,62 @@ static int _stp_data_write_commit(void *entry)
/* Nothing to do here. */
return 0;
}
+
+static noinline int _stp_transport_trylock_relay_inode(void)
+{
+ unsigned i;
+ struct rchan_buf *buf;
+ struct inode *inode;
+#ifdef DEBUG_TRANS
+ cycles_t begin;
+#endif
+
+ buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf,
+#ifdef STP_BULKMODE
+ smp_processor_id()
+#else
+ 0
+#endif
+ );
+ if (buf == NULL)
+ return 0;
+
+ inode = buf->dentry->d_inode;
+
+#ifdef DEBUG_TRANS
+ begin = get_cycles();
+#endif
+
+ // NB this bounded spinlock is needed for stream mode. it is observed
+ // that almost all of the iterations needed are less than 50K iterations
+ // or about 300K cycles.
+ for (i = 0; i < 50 * 1000; i++) {
+ if (inode_trylock(inode)) {
+ dbug_trans(3, "got inode lock: i=%u: cycles: %llu", i,
+ get_cycles() - begin);
+ return 1;
+ }
+ }
+
+ dbug_trans(0, "failed to get inode lock: i=%u: cycles: %llu", i,
+ get_cycles() - begin);
+ return 0;
+}
+
+static void _stp_transport_unlock_relay_inode(void)
+{
+ struct rchan_buf *buf;
+ struct inode *inode;
+
+ buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf,
+#ifdef STP_BULKMODE
+ smp_processor_id()
+#else
+ 0
+#endif
+ );
+ if (buf == NULL)
+ return;
+
+ inode_unlock(buf->dentry->d_inode);
+}
diff --git a/runtime/transport/relayfs.c b/runtime/transport/relayfs.c
index d8fa4e049..97d2dff8b 100644
--- a/runtime/transport/relayfs.c
+++ b/runtime/transport/relayfs.c
@@ -186,3 +186,14 @@ static int _stp_data_write_commit(void *entry)
/* Nothing to do here. */
return 0;
}
+
+static int _stp_transport_trylock_relay_inode(void)
+{
+ /* Nothing to do here. */
+ return 1;
+}
+
+static void _stp_transport_unlock_relay_inode(void)
+{
+ /* Nothing to do here. */
+}
diff --git a/runtime/transport/ring_buffer.c b/runtime/transport/ring_buffer.c
index 43e014bae..21e356973 100644
--- a/runtime/transport/ring_buffer.c
+++ b/runtime/transport/ring_buffer.c
@@ -827,3 +827,14 @@ static void _stp_transport_data_fs_overwrite(int overwrite)
dbug_trans(0, "setting ovewrite to %d\n", overwrite);
_stp_relay_data.overwrite_flag = overwrite;
}
+
+static int _stp_transport_trylock_relay_inode(void)
+{
+ /* Nothing to do here. */
+ return 1;
+}
+
+static void _stp_transport_unlock_relay_inode(void)
+{
+ /* Nothing to do here. */
+}
diff --git a/runtime/transport/transport.h b/runtime/transport/transport.h
index 0cf2a7ca3..9f0f9ddf8 100644
--- a/runtime/transport/transport.h
+++ b/runtime/transport/transport.h
@@ -67,6 +67,24 @@ enum _stp_transport_state {
*/
static enum _stp_transport_state _stp_transport_get_state(void);
+/*
+ * _stp_transport_trylock_relay_inode
+ *
+ * This function locks the relay file inode to protect against relay readers
+ * (i.e., staprun/stapio).
+ * Returns whether the lock is successfully obtained.
+ */
+static noinline int _stp_transport_trylock_relay_inode(void);
+
+/*
+ * _stp_transport_unlock_relay_inode
+ *
+ * This function releases the lock obtained by
+ * _stp_transport_trylock_relay_inode.
+ * should only call this when the lock is indeed obtained.
+ */
+static void _stp_transport_unlock_relay_inode(void);
+
/*
* _stp_transport_data_fs_init
*
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment