-
-
Save agentzh/932e7a32bb508da1d6cbe7ef65994000 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 41f8450cb340aa2968fb57d9192fd649c9f8b98f | |
Author: Yichun Zhang (agentzh) <yichun@openresty.com> | |
Date: Sat Jun 20 00:37:49 2020 -0700 | |
bugfix: relay_v2: there were no writer <=> reader locking protections which might lead to corrupted data (like "\0") and garbage data in the staprun output. | |
diff --git a/runtime/print_flush.c b/runtime/print_flush.c | |
index 18b3e8f62..5b28c4522 100644 | |
--- a/runtime/print_flush.c | |
+++ b/runtime/print_flush.c | |
@@ -37,7 +37,19 @@ void stp_print_flush(_stp_pbuf *pb) | |
#ifdef STP_BULKMODE | |
#ifdef NO_PERCPU_HEADERS | |
{ | |
+ struct context* __restrict__ c = NULL; | |
char *bufp = pb->buf; | |
+ int inode_locked; | |
+ | |
+ c = _stp_runtime_entryfn_get_context(); | |
+ | |
+ if (!(inode_locked = _stp_transport_trylock_relay_inode())) { | |
+ atomic_inc (&_stp_transport_failures); | |
+#ifndef STP_TRANSPORT_RISKY | |
+ _stp_runtime_entryfn_put_context(c); | |
+ return; | |
+#endif | |
+ } | |
while (len > 0) { | |
size_t bytes_reserved; | |
@@ -55,15 +67,32 @@ void stp_print_flush(_stp_pbuf *pb) | |
break; | |
} | |
} | |
+ | |
+ if (inode_locked) | |
+ _stp_transport_unlock_relay_inode(); | |
+ | |
+ _stp_runtime_entryfn_put_context(c); | |
} | |
#else /* !NO_PERCPU_HEADERS */ | |
{ | |
+ struct context* __restrict__ c = NULL; | |
char *bufp = pb->buf; | |
struct _stp_trace t = { .sequence = _stp_seq_inc(), | |
.pdu_len = len}; | |
size_t bytes_reserved; | |
+ int inode_locked; | |
+ | |
+ c = _stp_runtime_entryfn_get_context(); | |
+ | |
+ if (!(inode_locked = _stp_transport_trylock_relay_inode())) { | |
+ atomic_inc (&_stp_transport_failures); | |
+#ifndef STP_TRANSPORT_RISKY | |
+ _stp_runtime_entryfn_put_context(c); | |
+ return; | |
+#endif | |
+ } | |
bytes_reserved = _stp_data_write_reserve(sizeof(struct _stp_trace), &entry); | |
if (likely(entry && bytes_reserved > 0)) { | |
@@ -73,7 +102,7 @@ void stp_print_flush(_stp_pbuf *pb) | |
} | |
else { | |
atomic_inc(&_stp_transport_failures); | |
- return; | |
+ goto done; | |
} | |
while (len > 0) { | |
@@ -90,6 +119,13 @@ void stp_print_flush(_stp_pbuf *pb) | |
break; | |
} | |
} | |
+ | |
+ done: | |
+ | |
+ if (inode_locked) | |
+ _stp_transport_unlock_relay_inode(); | |
+ | |
+ _stp_runtime_entryfn_put_context(c); | |
} | |
#endif /* !NO_PERCPU_HEADERS */ | |
@@ -110,6 +146,7 @@ void stp_print_flush(_stp_pbuf *pb) | |
unsigned long flags; | |
struct context* __restrict__ c = NULL; | |
char *bufp = pb->buf; | |
+ int inode_locked; | |
/* Prevent probe reentrancy on _stp_print_lock. | |
* | |
@@ -129,6 +166,15 @@ void stp_print_flush(_stp_pbuf *pb) | |
*/ | |
c = _stp_runtime_entryfn_get_context(); | |
+ if (!(inode_locked = _stp_transport_trylock_relay_inode())) { | |
+ atomic_inc (&_stp_transport_failures); | |
+#ifndef STP_TRANSPORT_RISKY | |
+ //errk("discarding %zu bytes of data\n", len); | |
+ _stp_runtime_entryfn_put_context(c); | |
+ return; | |
+#endif | |
+ } | |
+ | |
dbug_trans(1, "calling _stp_data_write...\n"); | |
stp_spin_lock_irqsave(&_stp_print_lock, flags); | |
while (len > 0) { | |
@@ -148,6 +194,10 @@ void stp_print_flush(_stp_pbuf *pb) | |
} | |
} | |
stp_spin_unlock_irqrestore(&_stp_print_lock, flags); | |
+ | |
+ if (inode_locked) | |
+ _stp_transport_unlock_relay_inode(); | |
+ | |
_stp_runtime_entryfn_put_context(c); | |
} | |
#endif /* STP_TRANSPORT_VERSION != 1 */ | |
diff --git a/runtime/transport/relay_v2.c b/runtime/transport/relay_v2.c | |
index 135951a8e..853d32744 100644 | |
--- a/runtime/transport/relay_v2.c | |
+++ b/runtime/transport/relay_v2.c | |
@@ -269,12 +269,44 @@ static void _stp_transport_data_fs_start(void) | |
static void _stp_transport_data_fs_stop(void) | |
{ | |
+ | |
if (atomic_read (&_stp_relay_data.transport_state) == STP_TRANSPORT_RUNNING) { | |
atomic_set (&_stp_relay_data.transport_state, STP_TRANSPORT_STOPPED); | |
del_timer_sync(&_stp_relay_data.timer); | |
dbug_trans(0, "flushing...\n"); | |
- if (_stp_relay_data.rchan) | |
- relay_flush(_stp_relay_data.rchan); | |
+ if (_stp_relay_data.rchan) { | |
+ struct rchan_buf *buf; | |
+ | |
+ // NB we cannot call relay_flush() directly here since | |
+ // we need to do inode locking ourselves. | |
+ | |
+#ifdef STP_BULKMODE | |
+ unsigned int i; | |
+ struct rchan *rchan = _stp_relay_data.rchan; | |
+ | |
+ for_each_possible_cpu(i) { | |
+ if ((buf = *per_cpu_ptr(rchan->buf, i))) { | |
+ struct inode *inode = buf->dentry->d_inode; | |
+ inode_lock(inode); | |
+ relay_switch_subbuf(buf, 0); | |
+ inode_unlock(inode); | |
+ } | |
+ } | |
+#else /* !STP_BULKMODE */ | |
+ buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, 0); | |
+ | |
+ if (buf != NULL) { | |
+ struct inode *inode = buf->dentry->d_inode; | |
+ | |
+ /* NB we are in the syscall context which allows | |
+ * sleeping. The following inode locking might | |
+ * sleep. See PR26131. */ | |
+ inode_lock(inode); | |
+ relay_switch_subbuf(buf, 0); | |
+ inode_unlock(inode); | |
+ } | |
+#endif | |
+ } | |
} | |
} | |
@@ -403,7 +435,12 @@ _stp_data_write_reserve(size_t size_request, void **entry) | |
return -EINVAL; | |
buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, | |
- smp_processor_id()); | |
+#ifdef STP_BULKMODE | |
+ smp_processor_id() | |
+#else | |
+ 0 | |
+#endif | |
+ ); | |
if (unlikely(buf->offset + size_request > buf->chan->subbuf_size)) { | |
size_request = __stp_relay_switch_subbuf(buf, size_request); | |
if (!size_request) | |
@@ -426,3 +463,62 @@ static int _stp_data_write_commit(void *entry) | |
/* Nothing to do here. */ | |
return 0; | |
} | |
+ | |
+static noinline int _stp_transport_trylock_relay_inode(void) | |
+{ | |
+ unsigned i; | |
+ struct rchan_buf *buf; | |
+ struct inode *inode; | |
+#ifdef DEBUG_TRANS | |
+ cycles_t begin; | |
+#endif | |
+ | |
+ buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, | |
+#ifdef STP_BULKMODE | |
+ smp_processor_id() | |
+#else | |
+ 0 | |
+#endif | |
+ ); | |
+ if (buf == NULL) | |
+ return 0; | |
+ | |
+ inode = buf->dentry->d_inode; | |
+ | |
+#ifdef DEBUG_TRANS | |
+ begin = get_cycles(); | |
+#endif | |
+ | |
+ // NB this bounded spinlock is needed for stream mode. it is observed | |
+ // that almost all of the iterations needed are less than 50K iterations | |
+ // or about 300K cycles. | |
+ for (i = 0; i < 50 * 1000; i++) { | |
+ if (inode_trylock(inode)) { | |
+ dbug_trans(3, "got inode lock: i=%u: cycles: %llu", i, | |
+ get_cycles() - begin); | |
+ return 1; | |
+ } | |
+ } | |
+ | |
+ dbug_trans(0, "failed to get inode lock: i=%u: cycles: %llu", i, | |
+ get_cycles() - begin); | |
+ return 0; | |
+} | |
+ | |
+static void _stp_transport_unlock_relay_inode(void) | |
+{ | |
+ struct rchan_buf *buf; | |
+ struct inode *inode; | |
+ | |
+ buf = _stp_get_rchan_subbuf(_stp_relay_data.rchan->buf, | |
+#ifdef STP_BULKMODE | |
+ smp_processor_id() | |
+#else | |
+ 0 | |
+#endif | |
+ ); | |
+ if (buf == NULL) | |
+ return; | |
+ | |
+ inode_unlock(buf->dentry->d_inode); | |
+} | |
diff --git a/runtime/transport/relayfs.c b/runtime/transport/relayfs.c | |
index d8fa4e049..97d2dff8b 100644 | |
--- a/runtime/transport/relayfs.c | |
+++ b/runtime/transport/relayfs.c | |
@@ -186,3 +186,14 @@ static int _stp_data_write_commit(void *entry) | |
/* Nothing to do here. */ | |
return 0; | |
} | |
+ | |
+static int _stp_transport_trylock_relay_inode(void) | |
+{ | |
+ /* Nothing to do here. */ | |
+ return 1; | |
+} | |
+ | |
+static void _stp_transport_unlock_relay_inode(void) | |
+{ | |
+ /* Nothing to do here. */ | |
+} | |
diff --git a/runtime/transport/ring_buffer.c b/runtime/transport/ring_buffer.c | |
index 43e014bae..21e356973 100644 | |
--- a/runtime/transport/ring_buffer.c | |
+++ b/runtime/transport/ring_buffer.c | |
@@ -827,3 +827,14 @@ static void _stp_transport_data_fs_overwrite(int overwrite) | |
dbug_trans(0, "setting ovewrite to %d\n", overwrite); | |
_stp_relay_data.overwrite_flag = overwrite; | |
} | |
+ | |
+static int _stp_transport_trylock_relay_inode(void) | |
+{ | |
+ /* Nothing to do here. */ | |
+ return 1; | |
+} | |
+ | |
+static void _stp_transport_unlock_relay_inode(void) | |
+{ | |
+ /* Nothing to do here. */ | |
+} | |
diff --git a/runtime/transport/transport.h b/runtime/transport/transport.h | |
index 0cf2a7ca3..9f0f9ddf8 100644 | |
--- a/runtime/transport/transport.h | |
+++ b/runtime/transport/transport.h | |
@@ -67,6 +67,24 @@ enum _stp_transport_state { | |
*/ | |
static enum _stp_transport_state _stp_transport_get_state(void); | |
+/* | |
+ * _stp_transport_trylock_relay_inode | |
+ * | |
+ * This function locks the relay file inode to protect against relay readers | |
+ * (i.e., staprun/stapio). | |
+ * Returns whether the lock is successfully obtained. | |
+ */ | |
+static noinline int _stp_transport_trylock_relay_inode(void); | |
+ | |
+/* | |
+ * _stp_transport_unlock_relay_inode | |
+ * | |
+ * This function releases the lock obtained by | |
+ * _stp_transport_trylock_relay_inode. | |
+ * should only call this when the lock is indeed obtained. | |
+ */ | |
+static void _stp_transport_unlock_relay_inode(void); | |
+ | |
/* | |
* _stp_transport_data_fs_init | |
* |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment