-
-
Save agentzh/d54da2db9521f26fd6f700d270f48334 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit d0e9867d305f01b51d8c5821a340ee0bdae0f1d2 | |
Author: Yichun Zhang (agentzh) <yichun@openresty.com> | |
Date: Sat Dec 16 17:35:03 2023 -0800 | |
PR31176: fix Spin lock deadlocks in memory pool allocations for mixed NMI and non-NMI contexts | |
The kernel's lockdep finds dead locks in the stap memory pool allocator's | |
spinlocks when mixing NMI and non-NMI contexts. | |
Now we use trylock in the NMI context to avoid waiting forever on a lock | |
held by a non-NMI context (which is interrupted by NMI). | |
This bug can be reproduced by running TEST 4 in | |
testsuite/systemtap.base/kernel-hw-breakpoint-addr.exp on a lockdep | |
kernel that is recent enough (like 5.11.22). | |
diff --git a/runtime/mempool.c b/runtime/mempool.c | |
index 1f206b216..661ea2107 100644 | |
--- a/runtime/mempool.c | |
+++ b/runtime/mempool.c | |
@@ -75,20 +75,27 @@ err: | |
/* allocate a buffer from a memory pool */ | |
static void *_stp_mempool_alloc(_stp_mempool_t *pool) | |
{ | |
- unsigned long flags; | |
+ unsigned long flags = 0; | |
struct _stp_mem_buffer *ptr = NULL; | |
+ void *ret = NULL; | |
+ | |
/* PR14804: tolerate accidental early call, before pool is | |
actually initialized. */ | |
if (pool == NULL) | |
return NULL; | |
- stp_spin_lock_irqsave(&pool->lock, flags); | |
+ | |
+ stp_nmi_spin_lock_irqsave(&pool->lock, flags, no_lock); | |
+ | |
if (likely(!list_empty(&pool->free_list))) { | |
ptr = (struct _stp_mem_buffer *)pool->free_list.next; | |
list_del_init(&ptr->list); | |
- stp_spin_unlock_irqrestore(&pool->lock, flags); | |
- return &ptr->buf; | |
+ ret = &ptr->buf; | |
} | |
- stp_spin_unlock_irqrestore(&pool->lock, flags); | |
+ | |
+ stp_nmi_spin_unlock_irqrestore(&pool->lock, flags); | |
+ return ret; | |
+ | |
+no_lock: | |
return NULL; | |
} | |
diff --git a/runtime/stp_helper_lock.h b/runtime/stp_helper_lock.h | |
index d2961b74c..72ac72c2c 100644 | |
--- a/runtime/stp_helper_lock.h | |
+++ b/runtime/stp_helper_lock.h | |
@@ -17,6 +17,22 @@ | |
#include <linux/spinlock.h> | |
+#define stp_nmi_spin_lock_irqsave(lock, flags, label) \ | |
+ if (in_nmi()) { \ | |
+ if (!stp_spin_trylock(lock)) { \ | |
+ goto label; \ | |
+ } \ | |
+ } else { \ | |
+ stp_spin_lock_irqsave(lock, flags); \ | |
+ } | |
+ | |
+#define stp_nmi_spin_unlock_irqrestore(lock, flags) \ | |
+ if (in_nmi()) { \ | |
+ stp_spin_unlock(lock); \ | |
+ } else { \ | |
+ stp_spin_unlock_irqrestore(lock, flags); \ | |
+ } | |
+ | |
#if defined(CONFIG_PREEMPT_RT_FULL) || defined(CONFIG_PREEMPT_RT) | |
#define stp_spinlock_t raw_spinlock_t | |
@@ -28,6 +44,7 @@ static inline void stp_spin_lock_init(raw_spinlock_t *lock) { raw_spin_lock_init | |
static inline void stp_spin_lock(raw_spinlock_t *lock) { raw_spin_lock(lock); } | |
static inline void stp_spin_unlock(raw_spinlock_t *lock) { raw_spin_unlock(lock); } | |
+#define stp_spin_trylock(lock) raw_spin_trylock(lock) | |
#define stp_spin_lock_irqsave(lock, flags) raw_spin_lock_irqsave(lock, flags) | |
#define stp_spin_unlock_irqrestore(lock, flags) raw_spin_unlock_irqrestore(lock, flags) | |
@@ -61,6 +78,7 @@ static inline void stp_spin_lock_init(spinlock_t *lock) { spin_lock_init(lock); | |
static inline void stp_spin_lock(spinlock_t *lock) { spin_lock(lock); } | |
static inline void stp_spin_unlock(spinlock_t *lock) { spin_unlock(lock); } | |
+#define stp_spin_trylock(lock) spin_trylock(lock) | |
#define stp_spin_lock_irqsave(lock, flags) spin_lock_irqsave(lock, flags) | |
#define stp_spin_unlock_irqrestore(lock, flags) spin_unlock_irqrestore(lock, flags) | |
diff --git a/runtime/transport/control.c b/runtime/transport/control.c | |
index 35608731c..2306b8913 100644 | |
--- a/runtime/transport/control.c | |
+++ b/runtime/transport/control.c | |
@@ -418,7 +418,7 @@ static void _stp_ctl_free_special_buffers(void) | |
static struct _stp_buffer *_stp_ctl_get_buffer(int type, const char *data, | |
unsigned len) | |
{ | |
- unsigned long flags; | |
+ unsigned long flags = 0; | |
struct _stp_buffer *bptr = NULL; | |
/* Is it a dynamically allocated message type? */ | |
@@ -481,12 +481,12 @@ static struct _stp_buffer *_stp_ctl_get_buffer(int type, const char *data, | |
} | |
if (bptr != NULL) { | |
/* OK, it is a special one, but is it free? */ | |
- stp_spin_lock_irqsave(&_stp_ctl_special_msg_lock, flags); | |
+ stp_nmi_spin_lock_irqsave(&_stp_ctl_special_msg_lock, flags, failed); | |
if (bptr->type == _STP_CTL_MSG_UNUSED) | |
bptr->type = type; | |
else | |
bptr = NULL; | |
- stp_spin_unlock_irqrestore(&_stp_ctl_special_msg_lock, flags); | |
+ stp_nmi_spin_unlock_irqrestore(&_stp_ctl_special_msg_lock, flags); | |
} | |
/* Got a special message buffer, with type set, fill it in, | |
@@ -501,6 +501,9 @@ static struct _stp_buffer *_stp_ctl_get_buffer(int type, const char *data, | |
} | |
} | |
return bptr; | |
+ | |
+failed: | |
+ return NULL; | |
} | |
/* Returns the given buffer to the pool when dynamically allocated. | |
@@ -535,7 +538,7 @@ static int _stp_ctl_send(int type, void *data, unsigned len) | |
{ | |
struct context* __restrict__ c = NULL; | |
struct _stp_buffer *bptr; | |
- unsigned long flags; | |
+ unsigned long flags = 0; | |
unsigned hlen; | |
#ifdef DEBUG_TRANS | |
@@ -567,22 +570,23 @@ static int _stp_ctl_send(int type, void *data, unsigned len) | |
skipped rather than deadlock. */ | |
c = _stp_runtime_entryfn_get_context(); | |
+ stp_nmi_spin_lock_irqsave(&_stp_ctl_ready_lock, flags, no_lock); | |
+ | |
/* get a buffer from the free pool */ | |
bptr = _stp_ctl_get_buffer(type, data, len); | |
if (unlikely(bptr == NULL)) { | |
/* Nothing else we can do... but let's not spam the kernel | |
with these reports. */ | |
/* printk(KERN_ERR "ctl_write_msg type=%d len=%d ENOMEM\n", type, len); */ | |
- _stp_runtime_entryfn_put_context(c); | |
- return -ENOMEM; | |
+ goto no_mem; | |
} | |
/* Put it on the pool of ready buffers. It's possible to recursively | |
hit a probe here, like a kprobe in NMI or the lock tracepoints, but | |
they will be squashed since we're holding the context busy. */ | |
- stp_spin_lock_irqsave(&_stp_ctl_ready_lock, flags); | |
list_add_tail(&bptr->list, &_stp_ctl_ready_q); | |
- stp_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags); | |
+ | |
+ stp_nmi_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags); | |
_stp_runtime_entryfn_put_context(c); | |
@@ -590,6 +594,15 @@ static int _stp_ctl_send(int type, void *data, unsigned len) | |
timer at this point, but calling mod_timer() at this | |
point would bring in more locking issues... */ | |
return len + sizeof(bptr->type); | |
+ | |
+no_lock: | |
+ _stp_runtime_entryfn_put_context(c); | |
+ return -EBUSY; | |
+ | |
+no_mem: | |
+ stp_nmi_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags); | |
+ _stp_runtime_entryfn_put_context(c); | |
+ return -ENOMEM; | |
} | |
/* Logs a warning or error through the control channel. This function mimics | |
@@ -603,12 +616,15 @@ static void _stp_ctl_log_werr(const char *logtype, size_t logtype_len, | |
{ | |
struct context *__restrict__ c; | |
struct _stp_buffer *bptr; | |
- unsigned long flags; | |
+ unsigned long flags = 0; | |
c = _stp_runtime_entryfn_get_context(); | |
+ | |
+ stp_nmi_spin_lock_irqsave(&_stp_ctl_ready_lock, flags, put_context); | |
+ | |
bptr = _stp_ctl_get_buffer(STP_OOB_DATA, logtype, logtype_len); | |
if (!bptr) | |
- goto put_context; | |
+ goto unlock; | |
/* | |
* This is a generic failure message for when there's no space left. We | |
@@ -634,9 +650,11 @@ static void _stp_ctl_log_werr(const char *logtype, size_t logtype_len, | |
bptr->buf[bptr->len++] = '\n'; | |
send_msg: | |
- stp_spin_lock_irqsave(&_stp_ctl_ready_lock, flags); | |
list_add_tail(&bptr->list, &_stp_ctl_ready_q); | |
- stp_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags); | |
+ | |
+unlock: | |
+ stp_nmi_spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags); | |
+ | |
put_context: | |
_stp_runtime_entryfn_put_context(c); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment