Created
December 2, 2020 03:33
-
-
Save kerneltoast/3536057c98a7900da6801b9d1e46eaaa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From bdf8d32c87c224da91a54793b1ac46e9a1c054a0 Mon Sep 17 00:00:00 2001 | |
From: Sultan Alsawaf <sultan@openresty.com> | |
Date: Tue, 1 Dec 2020 19:30:03 -0800 | |
Subject: [PATCH] runtime: fix print races in IRQ context and during print | |
cleanup | |
Prints can race when there's a print called from IRQ context or a print | |
called while print cleanup takes place, which can lead to garbled print | |
messages, out-of-bounds memory accesses, and memory use-after-free. This | |
is one example of racy modification of the print buffer len in IRQ | |
context which caused a panic due to an out-of-bounds memory access: | |
BUG: unable to handle kernel paging request at ffffe8ffff621000 | |
IP: [<ffffffffc05da0f3>] _stp_vsprint_memory+0x83/0x950 [stap_2c44636dfda18135ca3012a752599da6_13_533] | |
PGD 174b90067 PUD 174b8f067 PMD 174b93067 PTE 0 | |
Oops: 0002 [#1] SMP | |
CPU: 12 PID: 3468 Comm: cat Kdump: loaded Tainted: G OE ------------ 3.10.0-1127.19.1.el7.x86_64.debug #1 | |
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014 | |
task: ffff88001f4f0000 ti: ffff88004ea5c000 task.ti: ffff88004ea5c000 | |
RIP: 0010:[<ffffffffc05da0f3>] [<ffffffffc05da0f3>] _stp_vsprint_memory+0x83/0x950 [stap_2c44636dfda18135ca3012a752599da6_13_533] | |
RSP: 0018:ffff88004ea5f9a8 EFLAGS: 00010082 | |
RAX: ffffe8ffff621001 RBX: ffffe8ffff620ff2 RCX: fffffffffffffffe | |
RDX: 000000000000006e RSI: ffffffffffffffff RDI: ffffc90002c23730 | |
RBP: ffff88004ea5fa28 R08: 00000000ffffffff R09: 0000000000000073 | |
R10: ffffc90002c243d7 R11: 0000000000000001 R12: ffffc90002c2373f | |
R13: ffffe8ffff621004 R14: 0000000000000012 R15: 00000000fffffffe | |
FS: 00007f8a9b1d4740(0000) GS:ffff880179e00000(0000) knlGS:0000000000000000 | |
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 | |
CR2: ffffe8ffff621000 CR3: 00000000b3e3c000 CR4: 0000000000360fe0 | |
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 | |
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 | |
Call Trace: | |
[<ffffffff8103eb89>] ? sched_clock+0x9/0x10 | |
[<ffffffff8114036f>] ? lock_release_holdtime.part.30+0xf/0x1a0 | |
[<ffffffffc05dcb80>] function___global_trace__overload_0+0x5b0/0x1220 [stap_2c44636dfda18135ca3012a752599da6_13_533] | |
[<ffffffffc05d8993>] ? stp_lock_probe+0x53/0xe0 [stap_2c44636dfda18135ca3012a752599da6_13_533] | |
[<ffffffff8188d879>] ? kretprobe_trampoline_holder+0x9/0x9 | |
[<ffffffffc05e0662>] probe_7118+0x82/0xe0 [stap_2c44636dfda18135ca3012a752599da6_13_533] | |
[<ffffffffc05de866>] enter_kretprobe_common+0x256/0x490 [stap_2c44636dfda18135ca3012a752599da6_13_533] | |
[<ffffffff813489f1>] ? proc_sys_open+0x51/0x60 | |
[<ffffffffc05dead0>] enter_kretprobe_probe+0x10/0x20 [stap_2c44636dfda18135ca3012a752599da6_13_533] | |
[<ffffffff8188e1d8>] trampoline_handler+0x148/0x220 | |
[<ffffffff813489f1>] ? proc_sys_open+0x51/0x60 | |
[<ffffffff8188d89e>] kretprobe_trampoline+0x25/0x57 | |
[<ffffffff813489f1>] ? proc_sys_open+0x51/0x60 | |
[<ffffffff8188d879>] kretprobe_trampoline_holder+0x9/0x9 | |
[<ffffffff81384702>] ? security_inode_permission+0x22/0x30 | |
[<ffffffff813489a0>] ? sysctl_head_finish+0x50/0x50 | |
[<ffffffff812ac11d>] vfs_open+0x5d/0xb0 | |
[<ffffffff812bb74a>] ? may_open+0x5a/0x120 | |
[<ffffffff812c0af5>] do_last+0x285/0x15b0 | |
[<ffffffff812bf18e>] ? link_path_walk+0x27e/0x8c0 | |
[<ffffffff812c1ef0>] path_openat+0xd0/0x5d0 | |
[<ffffffff8107a7f3>] ? kvm_clock_read+0x33/0x40 | |
[<ffffffff812c38ad>] do_filp_open+0x4d/0xb0 | |
[<ffffffff81889497>] ? _raw_spin_unlock+0x27/0x40 | |
[<ffffffff812d5a9b>] ? __alloc_fd+0xfb/0x270 | |
[<ffffffff812ad784>] do_sys_open+0x124/0x220 | |
[<ffffffff812ad89e>] SyS_open+0x1e/0x20 | |
[<ffffffff8188d879>] kretprobe_trampoline_holder+0x9/0x9 | |
This patch resolves the IRQ print races by disabling IRQs on the local | |
CPU when accessing said CPU's print buffer, and resolves the cleanup | |
races with a lock. We also protect against data corruption and panics | |
from prints inside NMIs now by checking if the current CPU was accessing | |
the log buffer when an NMI fired; in this case, the NMI's prints will be | |
dropped, as there is no way to safely service them without creating a | |
dedicated log buffer for them. This is achieved by forbidding reentrancy | |
with respect to _stp_print_trylock_irqsave() when the runtime context | |
isn't held. Reentrancy is otherwise allowed when the runtime context is | |
held because the runtime context provides reentrancy protection. | |
--- | |
runtime/linux/io.c | 44 ++++--- | |
runtime/linux/print.c | 238 ++++++++++++++++++++++++---------- | |
runtime/print.h | 3 + | |
runtime/print_flush.c | 56 +------- | |
runtime/stack.c | 34 +++-- | |
runtime/transport/transport.c | 2 +- | |
runtime/vsprintf.c | 18 ++- | |
translate.cxx | 20 ++- | |
8 files changed, 266 insertions(+), 149 deletions(-) | |
diff --git a/runtime/linux/io.c b/runtime/linux/io.c | |
index 74a032c52..48adcecd7 100644 | |
--- a/runtime/linux/io.c | |
+++ b/runtime/linux/io.c | |
@@ -20,9 +20,6 @@ | |
#define WARN_STRING "WARNING: " | |
#define ERR_STRING "ERROR: " | |
-#if (STP_LOG_BUF_LEN < 10) /* sizeof(WARN_STRING) */ | |
-#error "STP_LOG_BUF_LEN is too short" | |
-#endif | |
enum code { INFO=0, WARN, ERROR, DBUG }; | |
@@ -31,25 +28,37 @@ static void _stp_vlog (enum code type, const char *func, int line, const char *f | |
static void _stp_vlog (enum code type, const char *func, int line, const char *fmt, va_list args) | |
{ | |
+ struct _stp_log *log; | |
+ unsigned long flags; | |
+ size_t bytes_avail; | |
int num; | |
- char *buf = per_cpu_ptr(Stp_lbuf, get_cpu()); | |
+ char *buf; | |
int start = 0; | |
+ if (!_stp_print_trylock_irqsave(&flags)) | |
+ return; | |
+ | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ bytes_avail = STP_BUFFER_SIZE - log->len; | |
+ if (unlikely(!bytes_avail)) | |
+ goto err_unlock; | |
+ | |
+ buf = &log->buf[log->len]; | |
if (type == DBUG) { | |
- start = _stp_snprintf(buf, STP_LOG_BUF_LEN, "%s:%d: ", func, line); | |
+ start = _stp_snprintf(buf, bytes_avail, "%s:%d: ", func, line); | |
} else if (type == WARN) { | |
- /* This strcpy() is OK, since we know STP_LOG_BUF_LEN | |
- * is > sizeof(WARN_STRING). */ | |
- strcpy (buf, WARN_STRING); | |
- start = sizeof(WARN_STRING) - 1; | |
+ strncpy(buf, WARN_STRING, bytes_avail); | |
+ start = min(bytes_avail, sizeof(WARN_STRING) - 1); | |
} else if (type == ERROR) { | |
- /* This strcpy() is OK, since we know STP_LOG_BUF_LEN | |
- * is > sizeof(ERR_STRING) (which is < sizeof(WARN_STRING). */ | |
- strcpy (buf, ERR_STRING); | |
- start = sizeof(ERR_STRING) - 1; | |
+ strncpy(buf, ERR_STRING, bytes_avail); | |
+ start = min(bytes_avail, sizeof(ERR_STRING) - 1); | |
} | |
- num = vscnprintf (buf + start, STP_LOG_BUF_LEN - start - 1, fmt, args); | |
+ bytes_avail -= start; | |
+ if (unlikely(!bytes_avail)) | |
+ goto err_unlock; | |
+ | |
+ num = vscnprintf(buf + start, bytes_avail - 1, fmt, args); | |
if (num + start) { | |
if (buf[num + start - 1] != '\n') { | |
buf[num + start] = '\n'; | |
@@ -66,12 +75,13 @@ static void _stp_vlog (enum code type, const char *func, int line, const char *f | |
if (type != DBUG) { | |
_stp_ctl_send(STP_OOB_DATA, buf, start + num + 1); | |
} else { | |
- _stp_print(buf); | |
- _stp_print_flush(); | |
+ log->len += start + num + 1; | |
+ __stp_print_flush(log); | |
} | |
#endif | |
} | |
- put_cpu(); | |
+err_unlock: | |
+ _stp_print_unlock_irqrestore(&flags); | |
} | |
/** Prints warning. | |
diff --git a/runtime/linux/print.c b/runtime/linux/print.c | |
index 777bca8b0..5bbe66949 100644 | |
--- a/runtime/linux/print.c | |
+++ b/runtime/linux/print.c | |
@@ -35,84 +35,181 @@ | |
* @{ | |
*/ | |
-typedef struct __stp_pbuf { | |
- uint32_t len; /* bytes used in the buffer */ | |
+struct _stp_log { | |
+ unsigned int len; /* Bytes used in the buffer */ | |
char buf[STP_BUFFER_SIZE]; | |
-} _stp_pbuf; | |
+ atomic_t reentrancy_lock; | |
+}; | |
+#include "print_flush.c" | |
-static void *Stp_pbuf = NULL; | |
+static struct _stp_log *_stp_log_pcpu; | |
+static bool _stp_log_stop; | |
-/** private buffer for _stp_vlog() */ | |
-#ifndef STP_LOG_BUF_LEN | |
-#define STP_LOG_BUF_LEN 256 | |
-#endif | |
+/* | |
+ * An atomic counter is used to synchronize every possible print buffer usage | |
+ * with the _stp_print_cleanup() function. The cleanup function sets the counter | |
+ * to INT_MAX after waiting for everything using the print buffer to finish. We | |
+ * cannot use a lock primitive to implement this because lock_acquire() contains | |
+ * tracepoints. We instead use _stp_print_ctr as if it were a read-write lock. | |
+ */ | |
+static atomic_t _stp_print_ctr = ATOMIC_INIT(0); | |
+ | |
+/* | |
+ * This disables IRQs to make per-CPU print buffer accesses atomic. There is a | |
+ * reentrancy protection mechanism specifically for NMIs, since they can violate | |
+ * our atomic guarantee. Reentrancy is otherwise allowed within code sections | |
+ * that have the runtime context held (via _stp_runtime_entryfn_get_context()). | |
+ */ | |
+static bool _stp_print_trylock_irqsave(unsigned long *flags) | |
+{ | |
+ bool context_held = false; | |
+ struct _stp_log *log; | |
+ | |
+ if (!atomic_add_unless(&_stp_print_ctr, 1, INT_MAX)) | |
+ return false; | |
+ | |
+ /* | |
+ * Check the per-CPU reentrancy lock for contention, unless the runtime | |
+ * context is already held, in which case we already have reentrancy | |
+ * protection. Otherwise, if the reentrancy lock is contented, that | |
+ * means we're either inside an NMI that fired while the current CPU was | |
+ * accessing the log buffer, or something is trying to nest calls to | |
+ * _stp_print_trylock_irqsave(). Our only choice is to reject the log | |
+ * access attempt in this case because log buffer corruption and panics | |
+ * could ensue if we're inside an NMI. | |
+ */ | |
+ if (atomic_add_unless(&_stp_contexts_busy_ctr, 1, INT_MAX)) { | |
+ struct context *c = _stp_runtime_get_context(); | |
+ context_held = c && atomic_read(&c->busy); | |
+ atomic_dec(&_stp_contexts_busy_ctr); | |
+ } | |
+ | |
+ local_irq_save(*flags); | |
+ | |
+ /* Fall back onto the reentrancy lock if the context isn't held */ | |
+ if (!context_held) { | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ if (atomic_cmpxchg(&log->reentrancy_lock, 0, 1)) | |
+ goto irqrestore; | |
+ } | |
+ | |
+ return true; | |
-typedef char _stp_lbuf[STP_LOG_BUF_LEN]; | |
-static void *Stp_lbuf = NULL; | |
+irqrestore: | |
+ local_irq_restore(*flags); | |
+print_unlock: | |
+ atomic_dec(&_stp_print_ctr); | |
+ return false; | |
+} | |
+ | |
+static void _stp_print_unlock_irqrestore(unsigned long *flags) | |
+{ | |
+ bool context_held = false; | |
+ struct _stp_log *log; | |
+ | |
+ if (atomic_add_unless(&_stp_contexts_busy_ctr, 1, INT_MAX)) { | |
+ struct context *c = _stp_runtime_get_context(); | |
+ context_held = c && atomic_read(&c->busy); | |
+ atomic_dec(&_stp_contexts_busy_ctr); | |
+ } | |
+ | |
+ if (!context_held) { | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ atomic_set(&log->reentrancy_lock, 0); | |
+ } | |
+ | |
+ local_irq_restore(*flags); | |
+ atomic_dec(&_stp_print_ctr); | |
+} | |
/* create percpu print and io buffers */ | |
static int _stp_print_init (void) | |
{ | |
- Stp_pbuf = _stp_alloc_percpu(sizeof(_stp_pbuf)); | |
- if (unlikely(Stp_pbuf == 0)) | |
- return -1; | |
- | |
- /* now initialize IO buffer used in io.c */ | |
- Stp_lbuf = _stp_alloc_percpu(sizeof(_stp_lbuf)); | |
- if (unlikely(Stp_lbuf == 0)) { | |
- _stp_free_percpu(Stp_pbuf); | |
- return -1; | |
+ unsigned int cpu; | |
+ | |
+ _stp_log_pcpu = _stp_alloc_percpu(sizeof(*_stp_log_pcpu)); | |
+ if (!_stp_log_pcpu) | |
+ return -ENOMEM; | |
+ | |
+ for_each_possible_cpu(cpu) { | |
+ struct _stp_log *log = per_cpu_ptr(_stp_log_pcpu, cpu); | |
+ | |
+ log->reentrancy_lock = (atomic_t)ATOMIC_INIT(0); | |
} | |
return 0; | |
} | |
static void _stp_print_cleanup (void) | |
{ | |
- if (Stp_pbuf) | |
- _stp_free_percpu(Stp_pbuf); | |
- if (Stp_lbuf) | |
- _stp_free_percpu(Stp_lbuf); | |
-} | |
+ unsigned int cpu; | |
-#include "print_flush.c" | |
+ /* Wait for the loggers to finish modifying the print buffers */ | |
+ while (atomic_cmpxchg(&_stp_print_ctr, 0, INT_MAX)) | |
+ cpu_relax(); | |
+ | |
+ for_each_possible_cpu(cpu) { | |
+ struct _stp_log *log = per_cpu_ptr(_stp_log_pcpu, cpu); | |
+ | |
+ /* | |
+ * Flush anything that could be left in the print buffer. It is | |
+ * safe to do this without any kind of synchronization mechanism | |
+ * because nothing is using this print buffer anymore. | |
+ */ | |
+ __stp_print_flush(log); | |
+ } | |
+ | |
+ _stp_free_percpu(_stp_log_pcpu); | |
+} | |
static inline void _stp_print_flush(void) | |
{ | |
- stp_print_flush(per_cpu_ptr(Stp_pbuf, smp_processor_id())); | |
+ struct _stp_log *log; | |
+ unsigned long flags; | |
+ | |
+ if (!_stp_print_trylock_irqsave(&flags)) | |
+ return; | |
+ | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ __stp_print_flush(log); | |
+ _stp_print_unlock_irqrestore(&flags); | |
} | |
+ | |
#ifndef STP_MAXBINARYARGS | |
#define STP_MAXBINARYARGS 127 | |
#endif | |
-/** Reserves space in the output buffer for direct I/O. | |
+/** Reserves space in the output buffer for direct I/O. Must be called with | |
+ * _stp_print_trylock_irqsave() held. | |
*/ | |
static void * _stp_reserve_bytes (int numbytes) | |
{ | |
- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); | |
- int size = STP_BUFFER_SIZE - pb->len; | |
- void * ret; | |
+ struct _stp_log *log; | |
+ char *ret; | |
if (unlikely(numbytes == 0 || numbytes > STP_BUFFER_SIZE)) | |
return NULL; | |
- if (unlikely(numbytes > size)) | |
- _stp_print_flush(); | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ if (unlikely(numbytes > (STP_BUFFER_SIZE - log->len))) | |
+ __stp_print_flush(log); | |
- ret = pb->buf + pb->len; | |
- pb->len += numbytes; | |
+ ret = &log->buf[log->len]; | |
+ log->len += numbytes; | |
return ret; | |
} | |
static void _stp_unreserve_bytes (int numbytes) | |
{ | |
- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); | |
+ struct _stp_log *log; | |
- if (unlikely(numbytes == 0 || numbytes > pb->len)) | |
+ if (unlikely(numbytes == 0)) | |
return; | |
- pb->len -= numbytes; | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ if (numbytes <= log->len) | |
+ log->len -= numbytes; | |
} | |
/** Write 64-bit args directly into the output stream. | |
@@ -123,22 +220,25 @@ static void _stp_unreserve_bytes (int numbytes) | |
*/ | |
static void _stp_print_binary (int num, ...) | |
{ | |
+ unsigned long flags; | |
va_list vargs; | |
int i; | |
int64_t *args; | |
- | |
+ | |
if (unlikely(num > STP_MAXBINARYARGS)) | |
num = STP_MAXBINARYARGS; | |
- args = _stp_reserve_bytes(num * sizeof(int64_t)); | |
+ if (!_stp_print_trylock_irqsave(&flags)) | |
+ return; | |
- if (likely(args != NULL)) { | |
+ args = _stp_reserve_bytes(num * sizeof(int64_t)); | |
+ if (args) { | |
va_start(vargs, num); | |
- for (i = 0; i < num; i++) { | |
+ for (i = 0; i < num; i++) | |
args[i] = va_arg(vargs, int64_t); | |
- } | |
va_end(vargs); | |
} | |
+ _stp_print_unlock_irqrestore(&flags); | |
} | |
/** Print into the print buffer. | |
@@ -149,6 +249,7 @@ static void _stp_print_binary (int num, ...) | |
static void _stp_printf (const char *fmt, ...) | |
{ | |
va_list args; | |
+ | |
va_start(args, fmt); | |
_stp_vsnprintf(NULL, 0, fmt, args); | |
va_end(args); | |
@@ -160,37 +261,38 @@ static void _stp_printf (const char *fmt, ...) | |
static void _stp_print (const char *str) | |
{ | |
- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); | |
- char *end = pb->buf + STP_BUFFER_SIZE; | |
- char *ptr = pb->buf + pb->len; | |
- char *instr = (char *)str; | |
- | |
- while (ptr < end && *instr) | |
- *ptr++ = *instr++; | |
- | |
- /* Did loop terminate due to lack of buffer space? */ | |
- if (unlikely(*instr)) { | |
- /* Don't break strings across subbufs. */ | |
- /* Restart after flushing. */ | |
- _stp_print_flush(); | |
- end = pb->buf + STP_BUFFER_SIZE; | |
- ptr = pb->buf + pb->len; | |
- instr = (char *)str; | |
- while (ptr < end && *instr) | |
- *ptr++ = *instr++; | |
+ struct _stp_log *log; | |
+ unsigned long flags; | |
+ unsigned int i; | |
+ | |
+ if (!_stp_print_trylock_irqsave(&flags)) | |
+ return; | |
+ | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ while (1) { | |
+ for (i = log->len; i < STP_BUFFER_SIZE && *str; i++) | |
+ log->buf[i] = *str++; | |
+ log->len = i; | |
+ if (likely(!*str)) | |
+ break; | |
+ __stp_print_flush(log); | |
} | |
- pb->len = ptr - pb->buf; | |
+ _stp_print_unlock_irqrestore(&flags); | |
} | |
static void _stp_print_char (const char c) | |
{ | |
- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); | |
- int size = STP_BUFFER_SIZE - pb->len; | |
- if (unlikely(1 >= size)) | |
- _stp_print_flush(); | |
- | |
- pb->buf[pb->len] = c; | |
- pb->len ++; | |
+ struct _stp_log *log; | |
+ unsigned long flags; | |
+ | |
+ if (!_stp_print_trylock_irqsave(&flags)) | |
+ return; | |
+ | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ if (unlikely(log->len == STP_BUFFER_SIZE)) | |
+ __stp_print_flush(log); | |
+ log->buf[log->len++] = c; | |
+ _stp_print_unlock_irqrestore(&flags); | |
} | |
static void _stp_print_kernel_info(char *sname, char *vstr, int ctx, int num_probes) | |
diff --git a/runtime/print.h b/runtime/print.h | |
index ede71f033..ffdea594d 100644 | |
--- a/runtime/print.h | |
+++ b/runtime/print.h | |
@@ -10,6 +10,9 @@ | |
#ifndef _STP_PRINT_H_ | |
#define _STP_PRINT_H_ | |
+/* The lock must be held with IRQs disabled to do any printing */ | |
+static bool _stp_print_trylock_irqsave(unsigned long *flags); | |
+static void _stp_print_unlock_irqrestore(unsigned long *flags); | |
static int _stp_print_init(void); | |
static void _stp_print_cleanup(void); | |
static void *_stp_reserve_bytes(int numbytes); | |
diff --git a/runtime/print_flush.c b/runtime/print_flush.c | |
index cf40a2645..acd6a32d9 100644 | |
--- a/runtime/print_flush.c | |
+++ b/runtime/print_flush.c | |
@@ -13,40 +13,31 @@ | |
* is filled, or this is called. This MUST be called before returning | |
* from a probe or accumulated output in the print buffer will be lost. | |
* | |
- * @note Preemption must be disabled to use this. | |
+ * @note Interrupts must be disabled to use this. | |
*/ | |
-static STP_DEFINE_SPINLOCK(_stp_print_lock); | |
- | |
-void stp_print_flush(_stp_pbuf *pb) | |
+static void __stp_print_flush(struct _stp_log *log) | |
{ | |
- size_t len = pb->len; | |
+ size_t len = log->len; | |
void *entry = NULL; | |
/* check to see if there is anything in the buffer */ | |
if (likely(len == 0)) | |
return; | |
- pb->len = 0; | |
- | |
- if (unlikely(_stp_transport_get_state() != STP_TRANSPORT_RUNNING)) | |
- return; | |
+ log->len = 0; | |
dbug_trans(1, "len = %zu\n", len); | |
#ifdef STP_BULKMODE | |
#ifdef NO_PERCPU_HEADERS | |
{ | |
- struct context* __restrict__ c = NULL; | |
- char *bufp = pb->buf; | |
+ char *bufp = log->buf; | |
int inode_locked; | |
- c = _stp_runtime_entryfn_get_context(); | |
- | |
if (!(inode_locked = _stp_transport_trylock_relay_inode())) { | |
atomic_inc (&_stp_transport_failures); | |
#ifndef STP_TRANSPORT_RISKY | |
- _stp_runtime_entryfn_put_context(c); | |
return; | |
#endif | |
} | |
@@ -70,26 +61,20 @@ void stp_print_flush(_stp_pbuf *pb) | |
if (inode_locked) | |
_stp_transport_unlock_relay_inode(); | |
- | |
- _stp_runtime_entryfn_put_context(c); | |
} | |
#else /* !NO_PERCPU_HEADERS */ | |
{ | |
- struct context* __restrict__ c = NULL; | |
- char *bufp = pb->buf; | |
+ char *bufp = log->buf; | |
struct _stp_trace t = { .sequence = _stp_seq_inc(), | |
.pdu_len = len}; | |
size_t bytes_reserved; | |
int inode_locked; | |
- c = _stp_runtime_entryfn_get_context(); | |
- | |
if (!(inode_locked = _stp_transport_trylock_relay_inode())) { | |
atomic_inc (&_stp_transport_failures); | |
#ifndef STP_TRANSPORT_RISKY | |
- _stp_runtime_entryfn_put_context(c); | |
return; | |
#endif | |
} | |
@@ -124,48 +109,24 @@ void stp_print_flush(_stp_pbuf *pb) | |
if (inode_locked) | |
_stp_transport_unlock_relay_inode(); | |
- | |
- _stp_runtime_entryfn_put_context(c); | |
} | |
#endif /* !NO_PERCPU_HEADERS */ | |
#else /* !STP_BULKMODE */ | |
{ | |
- unsigned long flags; | |
- struct context* __restrict__ c = NULL; | |
- char *bufp = pb->buf; | |
+ char *bufp = log->buf; | |
int inode_locked; | |
- /* Prevent probe reentrancy on _stp_print_lock. | |
- * | |
- * Since stp_print_flush may be called from probe context, we | |
- * have to make sure that its lock, _stp_print_lock, can't | |
- * possibly be held outside probe context too. We ensure this | |
- * by grabbing the context here, so any probe triggered by this | |
- * region will appear reentrant and be skipped rather than | |
- * deadlock. Failure to get_context just means we're already | |
- * in a probe, which is fine. | |
- * | |
- * (see also _stp_ctl_send for a similar situation) | |
- * | |
- * A better solution would be to replace this | |
- * concurrency-control-laden effort with a lockless | |
- * algorithm. | |
- */ | |
- c = _stp_runtime_entryfn_get_context(); | |
- | |
if (!(inode_locked = _stp_transport_trylock_relay_inode())) { | |
atomic_inc (&_stp_transport_failures); | |
#ifndef STP_TRANSPORT_RISKY | |
dbug_trans(0, "discarding %zu bytes of data\n", len); | |
- _stp_runtime_entryfn_put_context(c); | |
return; | |
#endif | |
} | |
dbug_trans(1, "calling _stp_data_write...\n"); | |
- stp_spin_lock_irqsave(&_stp_print_lock, flags); | |
while (len > 0) { | |
size_t bytes_reserved; | |
@@ -182,12 +143,9 @@ void stp_print_flush(_stp_pbuf *pb) | |
break; | |
} | |
} | |
- stp_spin_unlock_irqrestore(&_stp_print_lock, flags); | |
if (inode_locked) | |
_stp_transport_unlock_relay_inode(); | |
- | |
- _stp_runtime_entryfn_put_context(c); | |
} | |
#endif /* !STP_BULKMODE */ | |
} | |
diff --git a/runtime/stack.c b/runtime/stack.c | |
index 241ccf793..da23d4395 100644 | |
--- a/runtime/stack.c | |
+++ b/runtime/stack.c | |
@@ -690,13 +690,20 @@ static void _stp_stack_kernel_sprint(char *str, int size, struct context* c, | |
* then call _stp_stack_print, | |
* then copy the result into the output string | |
* and clear the print buffer. */ | |
- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); | |
- _stp_print_flush(); | |
+ struct _stp_log *log; | |
+ unsigned long flags; | |
- _stp_stack_kernel_print(c, sym_flags); | |
+ if (!_stp_print_trylock_irqsave(&flags)) { | |
+ *str = '\0'; | |
+ return; | |
+ } | |
- strlcpy(str, pb->buf, size < (int)pb->len ? size : (int)pb->len); | |
- pb->len = 0; | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ __stp_print_flush(log); | |
+ _stp_stack_kernel_print(c, sym_flags); | |
+ strlcpy(str, log->buf, min_t(int, size, log->len)); | |
+ log->len = 0; | |
+ _stp_print_unlock_irqrestore(&flags); | |
} | |
static void _stp_stack_user_sprint(char *str, int size, struct context* c, | |
@@ -707,13 +714,20 @@ static void _stp_stack_user_sprint(char *str, int size, struct context* c, | |
* then call _stp_stack_print, | |
* then copy the result into the output string | |
* and clear the print buffer. */ | |
- _stp_pbuf *pb = per_cpu_ptr(Stp_pbuf, smp_processor_id()); | |
- _stp_print_flush(); | |
+ struct _stp_log *log; | |
+ unsigned long flags; | |
- _stp_stack_user_print(c, sym_flags); | |
+ if (!_stp_print_trylock_irqsave(&flags)) { | |
+ *str = '\0'; | |
+ return; | |
+ } | |
- strlcpy(str, pb->buf, size < (int)pb->len ? size : (int)pb->len); | |
- pb->len = 0; | |
+ log = per_cpu_ptr(_stp_log_pcpu, raw_smp_processor_id()); | |
+ __stp_print_flush(log); | |
+ _stp_stack_user_print(c, sym_flags); | |
+ strlcpy(str, log->buf, min_t(int, size, log->len)); | |
+ log->len = 0; | |
+ _stp_print_unlock_irqrestore(&flags); | |
} | |
#endif /* _STACK_C_ */ | |
diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c | |
index 57955334b..44e69b68c 100644 | |
--- a/runtime/transport/transport.c | |
+++ b/runtime/transport/transport.c | |
@@ -540,8 +540,8 @@ static void _stp_transport_close(void) | |
current->pid); | |
_stp_cleanup_and_exit(0); | |
_stp_unregister_ctl_channel(); | |
+ _stp_print_cleanup(); /* Requires the transport, so free this first */ | |
_stp_transport_fs_close(); | |
- _stp_print_cleanup(); /* free print buffers */ | |
_stp_mem_debug_done(); | |
dbug_trans(1, "---- CLOSED ----\n"); | |
diff --git a/runtime/vsprintf.c b/runtime/vsprintf.c | |
index 28fd18f16..417d9f7f3 100644 | |
--- a/runtime/vsprintf.c | |
+++ b/runtime/vsprintf.c | |
@@ -542,6 +542,8 @@ _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) | |
number of chars for from string */ | |
int qualifier; /* 'h', 'l', or 'L' for integer fields */ | |
int num_bytes = 0; | |
+ unsigned long irqflags = 0; | |
+ bool got_print_lock = false; | |
/* Reject out-of-range values early */ | |
if (unlikely((int) size < 0)) | |
@@ -724,11 +726,14 @@ _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) | |
num_bytes = STP_BUFFER_SIZE; | |
} | |
+ if (!_stp_print_trylock_irqsave(&irqflags)) | |
+ return 0; | |
str = (char*)_stp_reserve_bytes(num_bytes); | |
if (str == NULL) { | |
_stp_error("Couldn't reserve any print buffer space\n"); | |
- return 0; | |
+ goto err_unlock; | |
} | |
+ got_print_lock = true; | |
size = num_bytes; | |
end = str + size - 1; | |
@@ -820,8 +825,10 @@ _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) | |
field_width, precision, | |
*fmt, flags); | |
if (unlikely(str == NULL)) { | |
- if (num_bytes > 0) | |
+ if (num_bytes > 0) { | |
_stp_unreserve_bytes(num_bytes); | |
+ goto err_unlock; | |
+ } | |
return 0; | |
} | |
continue; | |
@@ -923,7 +930,14 @@ _stp_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) | |
/* don't write out a null byte if the buf size is zero */ | |
*end = '\0'; | |
} | |
+ | |
+ if (got_print_lock) | |
+ _stp_print_unlock_irqrestore(&irqflags); | |
return str-buf; | |
+ | |
+err_unlock: | |
+ _stp_print_unlock_irqrestore(&irqflags); | |
+ return 0; | |
} | |
#endif /* _VSPRINTF_C_ */ | |
diff --git a/translate.cxx b/translate.cxx | |
index 53f1d0725..f0195486c 100644 | |
--- a/translate.cxx | |
+++ b/translate.cxx | |
@@ -1354,6 +1354,9 @@ c_unparser::emit_compiled_printfs () | |
o->newline() << "unsigned long ptr_value;"; | |
o->newline() << "int num_bytes;"; | |
+ if (print_to_stream) | |
+ o->newline() << "unsigned long irqflags;"; | |
+ | |
o->newline() << "(void) width;"; | |
o->newline() << "(void) precision;"; | |
o->newline() << "(void) ptr_value;"; | |
@@ -1452,7 +1455,9 @@ c_unparser::emit_compiled_printfs () | |
} | |
o->newline() << "num_bytes = clamp(num_bytes, 0, STP_BUFFER_SIZE);"; | |
- o->newline() << "str = (char*)_stp_reserve_bytes(num_bytes);"; | |
+ o->newline() << "if (!_stp_print_trylock_irqsave(&irqflags))"; | |
+ o->newline(1) << "return;"; | |
+ o->newline(-1) << "str = (char*)_stp_reserve_bytes(num_bytes);"; | |
o->newline() << "end = str ? str + num_bytes - 1 : 0;"; | |
} | |
else // !print_to_stream | |
@@ -1547,8 +1552,14 @@ c_unparser::emit_compiled_printfs () | |
o->newline() << "if (unlikely(str == NULL)) {"; | |
o->indent(1); | |
if (print_to_stream) | |
+ { | |
o->newline() << "_stp_unreserve_bytes(num_bytes);"; | |
- o->newline() << "return;"; | |
+ o->newline() << "goto err_unlock;"; | |
+ } | |
+ else | |
+ { | |
+ o->newline() << "return;"; | |
+ } | |
o->newline(-1) << "}"; | |
break; | |
@@ -1575,6 +1586,11 @@ c_unparser::emit_compiled_printfs () | |
o->newline(-1) << "}"; | |
+ if (print_to_stream) | |
+ { | |
+ o->newline(-1) << "err_unlock:"; | |
+ o->newline(1) << "_stp_print_unlock_irqrestore(&irqflags);"; | |
+ } | |
o->newline(-1) << "}"; | |
} | |
o->newline() << "#endif // STP_LEGACY_PRINT"; | |
-- | |
2.29.2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment