Created
November 12, 2020 18:43
-
-
Save kerneltoast/ad75180b735bf3e733e64a746168fc3a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 936d8f252fd1ea29f280cf1398e4f71ba2df4001 Mon Sep 17 00:00:00 2001 | |
From: Sultan Alsawaf <sultan@openresty.com> | |
Date: Tue, 10 Nov 2020 10:03:34 -0800 | |
Subject: [PATCH] stp_utrace: disable IRQs when holding the bucket spin lock | |
This lock can be acquired from inside an IRQ, leading to a deadlock: | |
WARNING: inconsistent lock state | |
4.14.35-1902.6.6.el7uek.x86_64.debug #2 Tainted: G OE | |
-------------------------------- | |
inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. | |
sh/15779 [HC1[1]:SC0[0]:HE0:SE1] takes: | |
(&(lock)->rlock#3){?.+.}, at: [<ffffffffc0c080b0>] _stp_mempool_alloc+0x35/0xab [orxray_lj_lua_fgraph_XXXXXXX] | |
{HARDIRQ-ON-W} state was registered at: | |
lock_acquire+0xe0/0x238 | |
_raw_spin_lock+0x3d/0x7a | |
utrace_task_alloc+0xa4/0xe3 [orxray_lj_lua_fgraph_XXXXXXX] | |
utrace_attach_task+0x136/0x194 [orxray_lj_lua_fgraph_XXXXXXX] | |
__stp_utrace_attach+0x57/0x216 [orxray_lj_lua_fgraph_XXXXXXX] | |
stap_start_task_finder+0x12e/0x33f [orxray_lj_lua_fgraph_XXXXXXX] | |
systemtap_module_init+0x114d/0x11f0 [orxray_lj_lua_fgraph_XXXXXXX] | |
_stp_handle_start+0xea/0x1c5 [orxray_lj_lua_fgraph_XXXXXXX] | |
_stp_ctl_write_cmd+0x28d/0x2d1 [orxray_lj_lua_fgraph_XXXXXXX] | |
full_proxy_write+0x67/0xbb | |
__vfs_write+0x3a/0x170 | |
vfs_write+0xc7/0x1c0 | |
SyS_write+0x58/0xbf | |
do_syscall_64+0x7e/0x22c | |
entry_SYSCALL_64_after_hwframe+0x16e/0x0 | |
irq event stamp: 9454 | |
hardirqs last enabled at (9453): [<ffffffffa696c960>] _raw_write_unlock_irqrestore+0x40/0x67 | |
hardirqs last disabled at (9454): [<ffffffffa6a05417>] apic_timer_interrupt+0x1c7/0x1d1 | |
softirqs last enabled at (9202): [<ffffffffa6c00361>] __do_softirq+0x361/0x4e5 | |
softirqs last disabled at (9195): [<ffffffffa60aeb76>] irq_exit+0xf6/0x102 | |
other info that might help us debug this: | |
Possible unsafe locking scenario: | |
CPU0 | |
---- | |
lock(&(lock)->rlock#3); | |
<Interrupt> | |
lock(&(lock)->rlock#3); | |
*** DEADLOCK *** | |
no locks held by sh/15779. | |
stack backtrace: | |
CPU: 16 PID: 15779 Comm: sh Tainted: G OE 4.14.35-1902.6.6.el7uek.x86_64.debug #2 | |
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 | |
Call Trace: | |
<IRQ> | |
dump_stack+0x81/0xb6 | |
print_usage_bug+0x1fc/0x20d | |
? check_usage_backwards+0x130/0x12b | |
mark_lock+0x1f8/0x27b | |
__lock_acquire+0x6e7/0x165a | |
? sched_clock_local+0x18/0x81 | |
? perf_swevent_hrtimer+0x136/0x151 | |
lock_acquire+0xe0/0x238 | |
? _stp_mempool_alloc+0x35/0xab [orxray_lj_lua_fgraph_XXXXXXX] | |
_raw_spin_lock_irqsave+0x55/0x97 | |
? _stp_mempool_alloc+0x35/0xab [orxray_lj_lua_fgraph_XXXXXXX] | |
_stp_mempool_alloc+0x35/0xab [orxray_lj_lua_fgraph_XXXXXXX] | |
_stp_ctl_get_buffer+0x69/0x215 [orxray_lj_lua_fgraph_XXXXXXX] | |
_stp_ctl_send+0x4e/0x169 [orxray_lj_lua_fgraph_XXXXXXX] | |
_stp_vlog+0xac/0x143 [orxray_lj_lua_fgraph_XXXXXXX] | |
? _stp_utrace_probe_cb+0xa4/0xa4 [orxray_lj_lua_fgraph_XXXXXXX] | |
_stp_warn+0x6a/0x88 [orxray_lj_lua_fgraph_XXXXXXX] | |
function___global_warn__overload_0+0x60/0xac [orxray_lj_lua_fgraph_XXXXXXX] | |
probe_67+0xce/0x10e [orxray_lj_lua_fgraph_XXXXXXX] | |
_stp_hrtimer_notify_function+0x2db/0x55f [orxray_lj_lua_fgraph_XXXXXXX] | |
__hrtimer_run_queues+0x132/0x5c5 | |
hrtimer_interrupt+0xb7/0x1ca | |
smp_apic_timer_interrupt+0xa5/0x35a | |
apic_timer_interrupt+0x1cc/0x1d1 | |
</IRQ> | |
--- | |
runtime/stp_utrace.c | 15 +++++++++------ | |
1 file changed, 9 insertions(+), 6 deletions(-) | |
diff --git a/runtime/stp_utrace.c b/runtime/stp_utrace.c | |
index 126db1869..064dbf190 100644 | |
--- a/runtime/stp_utrace.c | |
+++ b/runtime/stp_utrace.c | |
@@ -490,9 +490,9 @@ static int utrace_exit(void) | |
rcu_read_lock(); | |
stap_hlist_for_each_entry_rcu(utrace, node, &bucket->head, hlist) { | |
utrace->freed = true; | |
- stp_spin_lock(&bucket->lock); | |
+ stp_spin_lock_irqsave(&bucket->lock, flags); | |
hlist_del_rcu(&utrace->hlist); | |
- stp_spin_unlock(&bucket->lock); | |
+ stp_spin_unlock_irqrestore(&bucket->lock, flags); | |
utrace_cleanup(utrace); | |
} | |
@@ -724,6 +724,7 @@ static struct utrace *utrace_task_alloc(struct utrace_bucket *bucket, | |
struct task_struct *task) | |
{ | |
struct utrace *utrace; | |
+ unsigned long flags; | |
utrace = kmem_cache_zalloc(utrace_cachep, STP_ALLOC_FLAGS); | |
if (unlikely(!utrace)) | |
@@ -739,9 +740,9 @@ static struct utrace *utrace_task_alloc(struct utrace_bucket *bucket, | |
atomic_set(&utrace->resume_work_added, 0); | |
atomic_set(&utrace->report_work_added, 0); | |
- stp_spin_lock(&bucket->lock); | |
+ stp_spin_lock_irqsave(&bucket->lock, flags); | |
hlist_add_head_rcu(&utrace->hlist, &bucket->head); | |
- stp_spin_unlock(&bucket->lock); | |
+ stp_spin_unlock_irqrestore(&bucket->lock, flags); | |
return utrace; | |
} | |
@@ -768,15 +769,17 @@ static struct utrace *utrace_task_alloc(struct utrace_bucket *bucket, | |
*/ | |
static void utrace_free(struct utrace_bucket *bucket, struct utrace *utrace) | |
{ | |
+ unsigned long flags; | |
+ | |
if (unlikely(!utrace)) | |
return; | |
/* Remove this utrace from the mapping list of tasks to | |
* struct utrace. */ | |
utrace->freed = true; | |
- stp_spin_lock(&bucket->lock); | |
+ stp_spin_lock_irqsave(&bucket->lock, flags); | |
hlist_del_rcu(&utrace->hlist); | |
- stp_spin_unlock(&bucket->lock); | |
+ stp_spin_unlock_irqrestore(&bucket->lock, flags); | |
/* Free the utrace struct. */ | |
#ifdef STP_TF_DEBUG | |
-- | |
2.29.2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment