Last active
October 20, 2020 18:14
-
-
Save kerneltoast/8a9518f7f17b8fc1613be69a5d0719ff to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From f2de1ee1d341bb7ba6288474fff7ad2ca4de9a1d Mon Sep 17 00:00:00 2001 | |
From: Sultan Alsawaf <sultan@openresty.com> | |
Date: Tue, 20 Oct 2020 08:55:24 -0700 | |
Subject: [PATCH] task_finder_vma: rewrite using RCU to fix performance issues | |
The use of a single global rwlock to protect this file's hash table | |
results in significantly degraded performance when there are many | |
processes using the vma tracker in flight. A lot of time is spent | |
spinning on the rwlock when this happens. | |
To remedy this, make the hash table RCU safe so we'll never block upon | |
reading a hash list. | |
Another change made to improve performance is using the modulo of the | |
jhash rather than extracting what we need via bitwise AND, to improve | |
the distribution of hashes across the hash table. The task pointers | |
themselves are hashed now instead of their PID for reliability, since | |
PIDs are not a stable anchor point to a task struct. | |
While we're at it, clean up the rest of this file to bring it up to | |
current Linux kernel coding standards as well. | |
--- | |
runtime/linux/runtime.h | 2 + | |
runtime/task_finder_vma.c | 448 +++++++++++++++++++------------------- | |
2 files changed, 223 insertions(+), 227 deletions(-) | |
diff --git a/runtime/linux/runtime.h b/runtime/linux/runtime.h | |
index 07850c345..8e1ae2c42 100644 | |
--- a/runtime/linux/runtime.h | |
+++ b/runtime/linux/runtime.h | |
@@ -89,9 +89,11 @@ static void _stp_exit(void); | |
#ifdef STAPCONF_HLIST_4ARGS | |
#define stap_hlist_for_each_entry(a,b,c,d) hlist_for_each_entry(a,b,c,d) | |
+#define stap_hlist_for_each_entry_rcu(a,b,c,d) hlist_for_each_entry_rcu(a,b,c,d) | |
#define stap_hlist_for_each_entry_safe(a,b,c,d,e) hlist_for_each_entry_safe(a,b,c,d,e) | |
#else | |
#define stap_hlist_for_each_entry(a,b,c,d) (void) b; hlist_for_each_entry(a,c,d) | |
+#define stap_hlist_for_each_entry_rcu(a,b,c,d) (void) b; hlist_for_each_entry_rcu(a,c,d) | |
#define stap_hlist_for_each_entry_safe(a,b,c,d,e) (void) b; hlist_for_each_entry_safe(a,c,d,e) | |
#endif | |
diff --git a/runtime/task_finder_vma.c b/runtime/task_finder_vma.c | |
index b485e5b99..4dc2dc07d 100644 | |
--- a/runtime/task_finder_vma.c | |
+++ b/runtime/task_finder_vma.c | |
@@ -10,14 +10,44 @@ | |
#include "stp_helper_lock.h" | |
-// __stp_tf_vma_lock protects the hash table. | |
-// Documentation/spinlocks.txt suggest we can be a bit more clever | |
-// if we guarantee that in interrupt context we only read, not write | |
-// the datastructures. We should never change the hash table or the | |
-// contents in interrupt context (which should only ever call | |
-// stap_find_vma_map_info for getting stored vma info). So we might | |
-// want to look into that if this seems a bottleneck. | |
-static STP_DEFINE_RWLOCK(__stp_tf_vma_lock); | |
+/* atomic_try_cmpxchg and atomic_fetch_add_unless fallback from newer kernels */ | |
+#ifndef atomic_try_cmpxchg | |
+static __always_inline bool | |
+atomic_try_cmpxchg(atomic_t *v, int *old, int new) | |
+{ | |
+ int r, o = *old; | |
+ r = atomic_cmpxchg(v, o, new); | |
+ if (unlikely(r != o)) | |
+ *old = r; | |
+ return likely(r == o); | |
+} | |
+#define atomic_try_cmpxchg atomic_try_cmpxchg | |
+#endif | |
+ | |
+#ifndef atomic_fetch_add_unless | |
+/** | |
+ * atomic_fetch_add_unless - add unless the number is already a given value | |
+ * @v: pointer of type atomic_t | |
+ * @a: the amount to add to v... | |
+ * @u: ...unless v is equal to u. | |
+ * | |
+ * Atomically adds @a to @v, so long as @v was not already @u. | |
+ * Returns original value of @v | |
+ */ | |
+static __always_inline int | |
+atomic_fetch_add_unless(atomic_t *v, int a, int u) | |
+{ | |
+ int c = atomic_read(v); | |
+ | |
+ do { | |
+ if (unlikely(c == u)) | |
+ break; | |
+ } while (!atomic_try_cmpxchg(v, &c, c + a)); | |
+ | |
+ return c; | |
+} | |
+#define atomic_fetch_add_unless atomic_fetch_add_unless | |
+#endif | |
#define __STP_TF_HASH_BITS 4 | |
#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS) | |
@@ -28,20 +58,26 @@ static STP_DEFINE_RWLOCK(__stp_tf_vma_lock); | |
#error "gimme a little more TASK_FINDER_VMA_ENTRY_PATHLEN" | |
#endif | |
- | |
struct __stp_tf_vma_entry { | |
struct hlist_node hlist; | |
- pid_t pid; | |
+ struct rcu_head rcu; | |
+ atomic_t refcount; | |
+ struct task_struct *tsk; | |
unsigned long vm_start; | |
unsigned long vm_end; | |
- char path[TASK_FINDER_VMA_ENTRY_PATHLEN]; /* mmpath name, if known */ | |
+ char path[TASK_FINDER_VMA_ENTRY_PATHLEN]; /* mmpath name, if known */ | |
// User data (possibly stp_module) | |
void *user; | |
}; | |
-static struct hlist_head *__stp_tf_vma_map; | |
+struct __stp_tf_vma_bucket { | |
+ struct hlist_head head; | |
+ spinlock_t lock; | |
+}; | |
+ | |
+static struct __stp_tf_vma_bucket *__stp_tf_vma_map; | |
// __stp_tf_vma_new_entry(): Returns an newly allocated or NULL. | |
// Must only be called from user context. | |
@@ -51,22 +87,38 @@ static struct __stp_tf_vma_entry * | |
__stp_tf_vma_new_entry(void) | |
{ | |
struct __stp_tf_vma_entry *entry; | |
- size_t size = sizeof (struct __stp_tf_vma_entry); | |
+ // Alloc using kmalloc rather than the stp variant. This way the RCU | |
+ // callback freeing the entries will not depend on using a function | |
+ // within this module to free the allocated memory (_stp_kfree), which | |
+ // lets us omit a costly rcu_barrier operation upon module unload. | |
#ifdef CONFIG_UTRACE | |
- entry = (struct __stp_tf_vma_entry *) _stp_kmalloc_gfp(size, | |
- STP_ALLOC_SLEEP_FLAGS); | |
+ entry = kmalloc(sizeof(*entry), STP_ALLOC_SLEEP_FLAGS); | |
#else | |
- entry = (struct __stp_tf_vma_entry *) _stp_kmalloc_gfp(size, | |
- STP_ALLOC_FLAGS); | |
+ entry = kmalloc(sizeof(*entry), STP_ALLOC_FLAGS); | |
#endif | |
return entry; | |
} | |
-// __stp_tf_vma_release_entry(): Frees an entry. | |
+// __stp_tf_vma_put_entry(): Put a specified number of references on the entry. | |
static void | |
-__stp_tf_vma_release_entry(struct __stp_tf_vma_entry *entry) | |
+__stp_tf_vma_put_entry(struct __stp_tf_vma_bucket *bucket, | |
+ struct __stp_tf_vma_entry *entry, int count) | |
{ | |
- _stp_kfree (entry); | |
+ unsigned long flags; | |
+ int old; | |
+ | |
+ // We must atomically subtract only if the refcount is non-zero, as well | |
+ // as check to see if the new refcount is zero, in which case we should | |
+ // free the entry. | |
+ old = atomic_fetch_add_unless(&entry->refcount, -count, 0); | |
+ if (old - count) | |
+ return; | |
+ | |
+ spin_lock_irqsave(&bucket->lock, flags); | |
+ hlist_del_rcu(&entry->hlist); | |
+ spin_unlock_irqrestore(&bucket->lock, flags); | |
+ | |
+ kfree_rcu(entry, rcu); | |
} | |
// stap_initialize_vma_map(): Initialize the free list. Grabs the | |
@@ -77,145 +129,127 @@ __stp_tf_vma_release_entry(struct __stp_tf_vma_entry *entry) | |
static int | |
stap_initialize_vma_map(void) | |
{ | |
- size_t size = sizeof(struct hlist_head) * __STP_TF_TABLE_SIZE; | |
- struct hlist_head *map = (struct hlist_head *) _stp_kzalloc_gfp(size, | |
- STP_ALLOC_SLEEP_FLAGS); | |
- if (map == NULL) | |
+ struct __stp_tf_vma_bucket *buckets; | |
+ int i; | |
+ | |
+ buckets = _stp_kmalloc_gfp(sizeof(*buckets) * __STP_TF_TABLE_SIZE, | |
+ STP_ALLOC_SLEEP_FLAGS); | |
+ if (!buckets) | |
return -ENOMEM; | |
- __stp_tf_vma_map = map; | |
+ for (i = 0; i < __STP_TF_TABLE_SIZE; i++) { | |
+ struct __stp_tf_vma_bucket *bucket = &buckets[i]; | |
+ | |
+ INIT_HLIST_HEAD(&bucket->head); | |
+ spin_lock_init(&bucket->lock); | |
+ } | |
+ | |
+ __stp_tf_vma_map = buckets; | |
return 0; | |
} | |
// stap_destroy_vma_map(): Unconditionally destroys vma entries. | |
-// Nothing should be using it anymore. Doesn't lock anything and just | |
-// frees all items. | |
+// Nothing should be using it anymore. | |
static void | |
stap_destroy_vma_map(void) | |
{ | |
- if (__stp_tf_vma_map != NULL) { | |
- int i; | |
- for (i = 0; i < __STP_TF_TABLE_SIZE; i++) { | |
- struct hlist_head *head = &__stp_tf_vma_map[i]; | |
- struct hlist_node *node; | |
- struct hlist_node *n; | |
- struct __stp_tf_vma_entry *entry = NULL; | |
- | |
- if (hlist_empty(head)) | |
- continue; | |
- | |
- stap_hlist_for_each_entry_safe(entry, node, n, head, hlist) { | |
- hlist_del(&entry->hlist); | |
- __stp_tf_vma_release_entry(entry); | |
- } | |
- } | |
- _stp_kfree(__stp_tf_vma_map); | |
+ int i; | |
+ | |
+ if (!__stp_tf_vma_map) | |
+ return; | |
+ | |
+ for (i = 0; i < __STP_TF_TABLE_SIZE; i++) { | |
+ struct __stp_tf_vma_bucket *bucket = &__stp_tf_vma_map[i]; | |
+ struct __stp_tf_vma_entry *entry; | |
+ struct hlist_node *node; | |
+ | |
+ rcu_read_lock(); | |
+ stap_hlist_for_each_entry_rcu(entry, node, &bucket->head, hlist) | |
+ __stp_tf_vma_put_entry(bucket, entry, 1); | |
+ rcu_read_unlock(); | |
} | |
-} | |
+ _stp_kfree(__stp_tf_vma_map); | |
+} | |
// __stp_tf_vma_map_hash(): Compute the vma map hash. | |
static inline u32 | |
__stp_tf_vma_map_hash(struct task_struct *tsk) | |
{ | |
- return (jhash_1word(tsk->pid, 0) & (__STP_TF_TABLE_SIZE - 1)); | |
-} | |
- | |
-// Get vma_entry if the vma is present in the vma map hash table. | |
-// Returns NULL if not present. The __stp_tf_vma_lock must be read locked | |
-// before calling this function. | |
-static struct __stp_tf_vma_entry * | |
-__stp_tf_get_vma_map_entry_internal(struct task_struct *tsk, | |
- unsigned long vm_start) | |
-{ | |
- struct hlist_head *head; | |
- struct hlist_node *node; | |
- struct __stp_tf_vma_entry *entry; | |
+ u32 hash = jhash2((u32 *)&tsk, sizeof(tsk) / sizeof(u32), 0); | |
- head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; | |
- stap_hlist_for_each_entry(entry, node, head, hlist) { | |
- if (tsk->pid == entry->pid | |
- && vm_start == entry->vm_start) { | |
- return entry; | |
- } | |
- } | |
- return NULL; | |
+ return hash % (__STP_TF_TABLE_SIZE - 1); | |
} | |
-// Get vma_entry if the vma with the given vm_end is present in the vma map | |
-// hash table for the tsk. Returns NULL if not present. | |
-// The __stp_tf_vma_lock must be read locked before calling this function. | |
-static struct __stp_tf_vma_entry * | |
-__stp_tf_get_vma_map_entry_end_internal(struct task_struct *tsk, | |
- unsigned long vm_end) | |
+// __stp_tf_vma_bucket(): Get the bucket that should contain the task. | |
+static inline struct __stp_tf_vma_bucket * | |
+__stp_tf_get_vma_bucket(struct task_struct *tsk) | |
{ | |
- struct hlist_head *head; | |
- struct hlist_node *node; | |
- struct __stp_tf_vma_entry *entry; | |
- | |
- head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; | |
- stap_hlist_for_each_entry(entry, node, head, hlist) { | |
- if (tsk->pid == entry->pid | |
- && vm_end == entry->vm_end) { | |
- return entry; | |
- } | |
- } | |
- return NULL; | |
+ return &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; | |
} | |
+// Get vma entry if the vma is present in the vma map hash table satisfying the | |
+// given condition. | |
+#define __stp_tf_get_vma_map(bucket, tsk, acquire, condition) \ | |
+({ \ | |
+ struct __stp_tf_vma_entry *entry, *found = NULL; \ | |
+ struct hlist_node *node; \ | |
+ \ | |
+ rcu_read_lock(); \ | |
+ stap_hlist_for_each_entry_rcu(entry, node, &bucket->head, hlist) { \ | |
+ if (entry->tsk == tsk && (condition) && \ | |
+ atomic_add_unless(&entry->refcount, acquire, 0)) { \ | |
+ found = entry; \ | |
+ break; \ | |
+ } \ | |
+ } \ | |
+ rcu_read_unlock(); \ | |
+ \ | |
+ found; \ | |
+}) | |
// Add the vma info to the vma map hash table. | |
// Caller is responsible for name lifetime. | |
// Can allocate memory, so needs to be called | |
// only from user context. | |
static int | |
-stap_add_vma_map_info(struct task_struct *tsk, | |
- unsigned long vm_start, unsigned long vm_end, | |
- const char *path, void *user) | |
+stap_add_vma_map_info(struct task_struct *tsk, unsigned long vm_start, | |
+ unsigned long vm_end, const char *path, void *user) | |
{ | |
- struct hlist_head *head; | |
- struct hlist_node *node; | |
+ struct __stp_tf_vma_bucket *bucket = __stp_tf_get_vma_bucket(tsk); | |
struct __stp_tf_vma_entry *entry; | |
- struct __stp_tf_vma_entry *new_entry; | |
+ struct hlist_node *node; | |
unsigned long flags; | |
+ size_t path_len; | |
- // Take a write lock, since we are most likely going to write | |
- // after reading. But reserve a new entry first outside the lock. | |
- new_entry = __stp_tf_vma_new_entry(); | |
- stp_write_lock_irqsave(&__stp_tf_vma_lock, flags); | |
- entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start); | |
- if (entry != NULL) { | |
- stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags); | |
- if (new_entry) | |
- __stp_tf_vma_release_entry(new_entry); | |
- return -EBUSY; /* Already there */ | |
- } | |
+ // Check if the entry already exists | |
+ if (__stp_tf_get_vma_map(bucket, tsk, 0, entry->vm_start == vm_start)) | |
+ return -EEXIST; | |
- if (!new_entry) { | |
- stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags); | |
+ entry = __stp_tf_vma_new_entry(); | |
+ if (!entry) | |
return -ENOMEM; | |
- } | |
- // Fill in the info | |
- entry = new_entry; | |
- entry->pid = tsk->pid; | |
+ // Fill in the new entry | |
+ entry->refcount = (atomic_t)ATOMIC_INIT(1); | |
+ entry->tsk = tsk; | |
entry->vm_start = vm_start; | |
entry->vm_end = vm_end; | |
- if (strlen(path) >= TASK_FINDER_VMA_ENTRY_PATHLEN-3) | |
- { | |
- strlcpy (entry->path, "...", TASK_FINDER_VMA_ENTRY_PATHLEN); | |
- strlcpy (entry->path+3, &path[strlen(path)-TASK_FINDER_VMA_ENTRY_PATHLEN+4], | |
- TASK_FINDER_VMA_ENTRY_PATHLEN-3); | |
- } | |
- else | |
- { | |
- strlcpy (entry->path, path, TASK_FINDER_VMA_ENTRY_PATHLEN); | |
- } | |
entry->user = user; | |
- head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; | |
- hlist_add_head(&entry->hlist, head); | |
- stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags); | |
+ path_len = strlen(path); | |
+ if (path_len >= TASK_FINDER_VMA_ENTRY_PATHLEN - 3) { | |
+ strlcpy(entry->path, "...", TASK_FINDER_VMA_ENTRY_PATHLEN); | |
+ strlcpy(entry->path + 3, | |
+ &path[path_len - TASK_FINDER_VMA_ENTRY_PATHLEN + 4], | |
+ TASK_FINDER_VMA_ENTRY_PATHLEN - 3); | |
+ } else { | |
+ strlcpy(entry->path, path, TASK_FINDER_VMA_ENTRY_PATHLEN); | |
+ } | |
+ | |
+ spin_lock_irqsave(&bucket->lock, flags); | |
+ hlist_add_head_rcu(&entry->hlist, &bucket->head); | |
+ spin_unlock_irqrestore(&bucket->lock, flags); | |
return 0; | |
} | |
@@ -224,26 +258,19 @@ stap_add_vma_map_info(struct task_struct *tsk, | |
// task. Returns zero on success, -ESRCH if no existing matching entry could | |
// be found. | |
static int | |
-stap_extend_vma_map_info(struct task_struct *tsk, | |
- unsigned long vm_start, unsigned long vm_end) | |
+stap_extend_vma_map_info(struct task_struct *tsk, unsigned long vm_start, | |
+ unsigned long vm_end) | |
{ | |
- struct hlist_head *head; | |
- struct hlist_node *node; | |
+ struct __stp_tf_vma_bucket *bucket = __stp_tf_get_vma_bucket(tsk); | |
struct __stp_tf_vma_entry *entry; | |
- unsigned long flags; | |
- int res = -ESRCH; // Entry not there or doesn't match. | |
- | |
- // Take a write lock, since we are most likely going to write | |
- // to the entry after reading, if its vm_end matches our vm_start. | |
- stp_write_lock_irqsave(&__stp_tf_vma_lock, flags); | |
- entry = __stp_tf_get_vma_map_entry_end_internal(tsk, vm_start); | |
- if (entry != NULL) { | |
- entry->vm_end = vm_end; | |
- res = 0; | |
- } | |
- stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags); | |
- return res; | |
+ entry = __stp_tf_get_vma_map(bucket, tsk, 1, entry->vm_end == vm_start); | |
+ if (!entry) | |
+ return -ESRCH; | |
+ | |
+ entry->vm_end = vm_end; | |
+ __stp_tf_vma_put_entry(bucket, entry, 1); | |
+ return 0; | |
} | |
@@ -252,128 +279,95 @@ stap_extend_vma_map_info(struct task_struct *tsk, | |
static int | |
stap_remove_vma_map_info(struct task_struct *tsk, unsigned long vm_start) | |
{ | |
- struct hlist_head *head; | |
- struct hlist_node *node; | |
+ struct __stp_tf_vma_bucket *bucket = __stp_tf_get_vma_bucket(tsk); | |
struct __stp_tf_vma_entry *entry; | |
- int rc = -ESRCH; | |
- // Take a write lock since we are most likely going to delete | |
- // after reading. | |
- unsigned long flags; | |
- stp_write_lock_irqsave(&__stp_tf_vma_lock, flags); | |
- entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start); | |
- if (entry != NULL) { | |
- hlist_del(&entry->hlist); | |
- __stp_tf_vma_release_entry(entry); | |
- rc = 0; | |
- } | |
- stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags); | |
- return rc; | |
+ entry = __stp_tf_get_vma_map(bucket, tsk, 1, entry->vm_start == vm_start); | |
+ if (!entry) | |
+ return -ESRCH; | |
+ | |
+ // Put two references: one for the reference we just got, | |
+ // and another to free the entry. | |
+ __stp_tf_vma_put_entry(bucket, entry, 2); | |
+ return 0; | |
} | |
// Finds vma info if the vma is present in the vma map hash table for | |
// a given task and address (between vm_start and vm_end). | |
-// Returns -ESRCH if not present. The __stp_tf_vma_lock must *not* be | |
-// locked before calling this function. | |
+// Returns -ESRCH if not present. | |
static int | |
stap_find_vma_map_info(struct task_struct *tsk, unsigned long addr, | |
unsigned long *vm_start, unsigned long *vm_end, | |
const char **path, void **user) | |
{ | |
- struct hlist_head *head; | |
- struct hlist_node *node; | |
+ struct __stp_tf_vma_bucket *bucket; | |
struct __stp_tf_vma_entry *entry; | |
- struct __stp_tf_vma_entry *found_entry = NULL; | |
- int rc = -ESRCH; | |
- unsigned long flags; | |
- | |
- if (__stp_tf_vma_map == NULL) | |
- return rc; | |
- stp_read_lock_irqsave(&__stp_tf_vma_lock, flags); | |
- head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; | |
- stap_hlist_for_each_entry(entry, node, head, hlist) { | |
- if (tsk->pid == entry->pid | |
- && addr >= entry->vm_start | |
- && addr < entry->vm_end) { | |
- found_entry = entry; | |
- break; | |
- } | |
- } | |
- if (found_entry != NULL) { | |
- if (vm_start != NULL) | |
- *vm_start = found_entry->vm_start; | |
- if (vm_end != NULL) | |
- *vm_end = found_entry->vm_end; | |
- if (path != NULL) | |
- *path = found_entry->path; | |
- if (user != NULL) | |
- *user = found_entry->user; | |
- rc = 0; | |
- } | |
- stp_read_unlock_irqrestore(&__stp_tf_vma_lock, flags); | |
- return rc; | |
+ if (!__stp_tf_vma_map) | |
+ return -ESRCH; | |
+ | |
+ bucket = __stp_tf_get_vma_bucket(tsk); | |
+ entry = __stp_tf_get_vma_map(bucket, tsk, 1, addr >= entry->vm_start && | |
+ addr < entry->vm_end); | |
+ if (!entry) | |
+ return -ESRCH; | |
+ | |
+ if (vm_start) | |
+ *vm_start = entry->vm_start; | |
+ if (vm_end) | |
+ *vm_end = entry->vm_end; | |
+ if (path) | |
+ *path = entry->path; | |
+ if (user) | |
+ *user = entry->user; | |
+ | |
+ __stp_tf_vma_put_entry(bucket, entry, 1); | |
+ return 0; | |
} | |
// Finds vma info if the vma is present in the vma map hash table for | |
// a given task with the given user handle. | |
-// Returns -ESRCH if not present. The __stp_tf_vma_lock must *not* be | |
-// locked before calling this function. | |
+// Returns -ESRCH if not present. | |
static int | |
stap_find_vma_map_info_user(struct task_struct *tsk, void *user, | |
unsigned long *vm_start, unsigned long *vm_end, | |
const char **path) | |
{ | |
- struct hlist_head *head; | |
- struct hlist_node *node; | |
+ struct __stp_tf_vma_bucket *bucket; | |
struct __stp_tf_vma_entry *entry; | |
- struct __stp_tf_vma_entry *found_entry = NULL; | |
- int rc = -ESRCH; | |
- unsigned long flags; | |
- if (__stp_tf_vma_map == NULL) | |
- return rc; | |
+ if (!__stp_tf_vma_map) | |
+ return -ESRCH; | |
- stp_read_lock_irqsave(&__stp_tf_vma_lock, flags); | |
- head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; | |
- stap_hlist_for_each_entry(entry, node, head, hlist) { | |
- if (tsk->pid == entry->pid | |
- && user == entry->user) { | |
- found_entry = entry; | |
- break; | |
- } | |
- } | |
- if (found_entry != NULL) { | |
- if (vm_start != NULL) | |
- *vm_start = found_entry->vm_start; | |
- if (vm_end != NULL) | |
- *vm_end = found_entry->vm_end; | |
- if (path != NULL) | |
- *path = found_entry->path; | |
- rc = 0; | |
- } | |
- stp_read_unlock_irqrestore(&__stp_tf_vma_lock, flags); | |
- return rc; | |
+ bucket = __stp_tf_get_vma_bucket(tsk); | |
+ entry = __stp_tf_get_vma_map(bucket, tsk, 1, entry->user == user); | |
+ if (!entry) | |
+ return -ESRCH; | |
+ | |
+ if (vm_start) | |
+ *vm_start = entry->vm_start; | |
+ if (vm_end) | |
+ *vm_end = entry->vm_end; | |
+ if (path) | |
+ *path = entry->path; | |
+ | |
+ __stp_tf_vma_put_entry(bucket, entry, 1); | |
+ return 0; | |
} | |
static int | |
stap_drop_vma_maps(struct task_struct *tsk) | |
{ | |
- struct hlist_head *head; | |
- struct hlist_node *node; | |
- struct hlist_node *n; | |
+ struct __stp_tf_vma_bucket *bucket = __stp_tf_get_vma_bucket(tsk); | |
struct __stp_tf_vma_entry *entry; | |
+ struct hlist_node *node; | |
- unsigned long flags; | |
- stp_write_lock_irqsave(&__stp_tf_vma_lock, flags); | |
- head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)]; | |
- stap_hlist_for_each_entry_safe(entry, node, n, head, hlist) { | |
- if (tsk->pid == entry->pid) { | |
- hlist_del(&entry->hlist); | |
- __stp_tf_vma_release_entry(entry); | |
- } | |
- } | |
- stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags); | |
+ rcu_read_lock(); | |
+ stap_hlist_for_each_entry_rcu(entry, node, &bucket->head, hlist) { | |
+ if (entry->tsk == tsk) | |
+ __stp_tf_vma_put_entry(bucket, entry, 1); | |
+ } | |
+ rcu_read_unlock(); | |
return 0; | |
} | |
-- | |
2.28.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- systemtap-normal-logs/systemtap.sum 2020-10-20 00:21:11.000000000 -0700 | |
+++ systemtap-rcu-logs/systemtap.sum 2020-10-20 10:00:51.000000000 -0700 | |
@@ -1,4 +1,4 @@ | |
-Test run by root on Mon Oct 19 23:57:42 2020 | |
+Test run by root on Tue Oct 20 09:38:08 2020 | |
Native configuration is x86_64-pc-linux-gnu | |
=== systemtap tests === | |
@@ -171,7 +171,7 @@ | |
PASS: at_kderef shutdown and output | |
PASS: at_uderef startup | |
PASS: at_uderef load generation | |
-FAIL: at_uderef unexpected output | |
+PASS: at_uderef shutdown and output | |
Running /home/sultan/systemtap/testsuite/systemtap.base/atomic.exp ... | |
PASS: atomic1 expected error | |
PASS: atomic2 expected error | |
@@ -281,7 +281,7 @@ | |
Running /home/sultan/systemtap/testsuite/systemtap.base/bz1074541.exp ... | |
PASS: ./bz1074541 | |
Running /home/sultan/systemtap/testsuite/systemtap.base/bz1126645.exp ... | |
-FAIL: bz1126645 -p5 (40) | |
+FAIL: bz1126645 -p5 (2) | |
Running /home/sultan/systemtap/testsuite/systemtap.base/bz1214176.exp ... | |
PASS: stap -p4 -e { probe nfs.proc.read_done { println(server_ip) } } | |
PASS: stap -p4 -e { probe nfs.proc.read_setup { println(count) } } | |
@@ -657,7 +657,7 @@ | |
PASS: dw_entry_value shutdown and output | |
Running /home/sultan/systemtap/testsuite/systemtap.base/environment_sanity.exp ... | |
Host: Linux localhost.localdomain 5.8.15-201.fc32.x86_64 #1 SMP Thu Oct 15 15:56:44 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux | |
-Snapshot: version 4.4/0.181, commit release-4.3-85-g2f7e3794ac5b | |
+Snapshot: version 4.4/0.181, commit release-4.3-86-g146f2c2eb284 | |
GCC: 10.2.1 [gcc (GCC) 10.2.1 20200723 (Red Hat 10.2.1-1)] | |
Distro: Fedora release 32 (Thirty Two) | |
SElinux: Enforcing | |
@@ -2736,7 +2736,7 @@ | |
PASS: listing_mode_sanity (using arguments and script exited badly) | |
FAIL: listing_mode_sanity (stap -l ** exited badly) | |
Running /home/sultan/systemtap/testsuite/systemtap.base/lock-pushdown.exp ... | |
-FAIL: lock-pushdown | |
+PASS: lock-pushdown | |
PASS: lock-pushdown -u | |
PASS: lock-pushdown compat-4.3 | |
Running /home/sultan/systemtap/testsuite/systemtap.base/logical_and.exp ... | |
@@ -2836,10 +2836,10 @@ | |
Running /home/sultan/systemtap/testsuite/systemtap.base/optim.exp ... | |
PASS: optim | |
Running /home/sultan/systemtap/testsuite/systemtap.base/optim_stats.exp ... | |
-FAIL: TEST1 (5, -4) | |
-PASS: TEST2 (20, 46) | |
-FAIL: TEST3 (5, 0) | |
-PASS: TEST4 (20, 23) | |
+FAIL: TEST1 (5, -6) | |
+PASS: TEST2 (20, 45) | |
+FAIL: TEST3 (5, 4) | |
+PASS: TEST4 (20, 24) | |
Running /home/sultan/systemtap/testsuite/systemtap.base/optim_voidstmt.exp ... | |
PASS: optim_voidstmt startup | |
PASS: optim_voidstmt load generation | |
@@ -2873,7 +2873,7 @@ | |
FAIL: perf process (0 - 0) | |
PASS: perf counter | |
FAIL: perf global (0 - 0) | |
-PASS: counter order 100000 | |
+PASS: counter order 200000 | |
Running /home/sultan/systemtap/testsuite/systemtap.base/plt.exp ... | |
FAIL: plt | |
FAIL: plt library | |
@@ -3039,7 +3039,6 @@ | |
PASS: PROCFS_BPF value: goodbye | |
PASS: PROCFS_BPF load generation | |
PASS: PROCFS_BPF shutdown and output | |
-FAIL: PROCFS_BPF unexpected output (after passing output) | |
Running /home/sultan/systemtap/testsuite/systemtap.base/procfs.exp ... | |
PASS: PROCFS startup | |
PASS: PROCFS read 100 | |
@@ -3285,7 +3284,7 @@ | |
PASS: register_x86: TEST 3: 8-bit and 16-bit registers for ecx (kernel): stdout: string is "ecx = 0xffffffffbeefdead\nu ecx = 0xbeefdead\n\ncx = 0xffffffffffffdead\nu cx = 0xdead\n\ncl = 0xffffffffffffffad\nu cl = 0xad\n\nch = 0xffffffffffffffde\nu ch = 0xde\n" | |
PASS: register_x86: TEST 3: 8-bit and 16-bit registers for ecx (kernel): exit code: string is "0" | |
PASS: register_x86: TEST 4: 8-bit and 16-bit registers for eax (kernel): stdout: string is "ebx = 0xffffffffbeefdead\nu ebx = 0xbeefdead\n\nbx = 0xffffffffffffdead\nu bx = 0xdead\n\nbl = 0xffffffffffffffad\nu bl = 0xad\n\nbh = 0xffffffffffffffde\nu bh = 0xde\n" | |
-PASS: register_x86: TEST 4: 8-bit and 16-bit registers for eax (kernel): exit code: string is "0" | |
+FAIL: register_x86: TEST 4: 8-bit and 16-bit registers for eax (kernel): exit code: string should be "0", but got "1" | |
Running /home/sultan/systemtap/testsuite/systemtap.base/remote.exp ... | |
PASS: remote build direct: | |
PASS: remote run direct: | |
@@ -3898,7 +3897,7 @@ | |
PASS: tracepoints - kernel.trace("bla:") | |
PASS: tracepoints - kernel.trace("sched:") | |
PASS: systemtap.base/tracepoints.stp -w | |
-PASS: systemtap.base/tracepoints2.stp | |
+FAIL: systemtap.base/tracepoints2.stp | |
Running /home/sultan/systemtap/testsuite/systemtap.base/tracepoints_list.exp ... | |
UNTESTED: tracepoints_list (no perf) | |
Running /home/sultan/systemtap/testsuite/systemtap.base/tracescripts.exp ... | |
@@ -4125,7 +4124,7 @@ | |
PASS: temporary.stp | |
PASS: unreachable.stp | |
Running /home/sultan/systemtap/testsuite/systemtap.bpf/bpf.exp ... | |
-KFAIL: array.stp incorrect result (PRMS: BPF) | |
+PASS: array.stp | |
PASS: array_in.stp | |
PASS: array_preinit.stp | |
PASS: assignment.stp | |
@@ -4154,14 +4153,14 @@ | |
PASS: if.stp | |
PASS: increment1.stp | |
PASS: increment2.stp | |
-FAIL: kprobes.stp incorrect result | |
+PASS: kprobes.stp | |
PASS: ktime_get_ns.stp | |
PASS: logging1.stp | |
PASS: logging2.stp | |
FAIL: next.stp incorrect result | |
PASS: no_begin.stp | |
-KFAIL: no_begin_no_end.stp eof (startup) (PRMS: BPF) | |
-KFAIL: no_end.stp unexpected output (PRMS: BPF) | |
+PASS: no_begin_no_end.stp | |
+PASS: no_end.stp | |
PASS: order.stp | |
PASS: perf1.stp | |
FAIL: perf2.stp eof (startup) | |
@@ -4175,7 +4174,7 @@ | |
PASS: sprintf.stp | |
PASS: stat1.stp | |
PASS: stat2.stp | |
-PASS: stat3.stp | |
+FAIL: stat3.stp incorrect result | |
FAIL: string1.stp incorrect result | |
PASS: string2.stp | |
FAIL: string3.stp incorrect result | |
@@ -4242,7 +4241,7 @@ | |
PASS: stat1.stp | |
PASS: stat2.stp | |
PASS: stat3.stp | |
-FAIL: string1.stp incorrect result | |
+PASS: string1.stp | |
PASS: string2.stp | |
PASS: string3.stp | |
PASS: string4.stp | |
@@ -4282,7 +4281,7 @@ | |
PASS: dtrace_vfork_exec3 - build success | |
PASS: dtrace_vfork_exec4 startup | |
PASS: dtrace_vfork_exec4 load generation | |
-FAIL: dtrace_vfork_exec4 unexpected output | |
+PASS: dtrace_vfork_exec4 shutdown and output | |
Running /home/sultan/systemtap/testsuite/systemtap.clone/main_quiesce.exp ... | |
PASS: main_quiesce - compiled main_quiesce.c | |
PASS: main_quiesce startup | |
@@ -4483,7 +4482,7 @@ | |
PASS: systemtap.examples/io/nfs_func_users run | |
PASS: systemtap.examples/io/slowvfs support | |
PASS: systemtap.examples/io/slowvfs build | |
-PASS: systemtap.examples/io/slowvfs run | |
+FAIL: systemtap.examples/io/slowvfs run | |
PASS: systemtap.examples/io/switchfile build | |
PASS: systemtap.examples/io/switchfile run | |
PASS: systemtap.examples/io/traceio build | |
@@ -5077,7 +5076,7 @@ | |
PASS: hrtimer_onthefly - otf_start_enabled_iter_4 (valid output) | |
PASS: hrtimer_onthefly - otf_start_disabled_iter_5 (valid output) | |
PASS: hrtimer_onthefly - otf_start_enabled_iter_5 (valid output) | |
-PASS: hrtimer_onthefly - otf_timer_10ms (valid output) | |
+FAIL: hrtimer_onthefly - otf_timer_10ms (invalid output) | |
PASS: hrtimer_onthefly - otf_timer_5ms (valid output) | |
PASS: hrtimer_onthefly - otf_stress_2ms_iter_50 (survived) | |
PASS: hrtimer_onthefly - otf_stress_1ms_iter_50 (survived) | |
@@ -5097,8 +5096,8 @@ | |
PASS: kprobes_onthefly - otf_start_enabled_iter_3 (valid output) | |
PASS: kprobes_onthefly - otf_start_disabled_iter_4 (valid output) | |
PASS: kprobes_onthefly - otf_start_enabled_iter_4 (valid output) | |
-PASS: kprobes_onthefly - otf_start_disabled_iter_5 (valid output) | |
-PASS: kprobes_onthefly - otf_start_enabled_iter_5 (valid output) | |
+FAIL: kprobes_onthefly - otf_start_disabled_iter_5 (invalid output) | |
+FAIL: kprobes_onthefly - otf_start_enabled_iter_5 (invalid output) | |
PASS: kprobes_onthefly - otf_timer_100ms (valid output) | |
PASS: kprobes_onthefly - otf_timer_50ms (valid output) | |
PASS: kprobes_onthefly - otf_timer_10ms (valid output) | |
@@ -8350,6 +8349,7 @@ | |
Running /home/sultan/systemtap/testsuite/systemtap.server/client.exp ... | |
PASS: List existing online servers | |
PASS: List existing online servers | |
+PASS: List existing online servers | |
PASS: List existing trusted servers | |
PASS: List existing signing servers | |
PASS: List all existing servers | |
@@ -8640,7 +8640,7 @@ | |
FAIL: 64-bit preadv nd_syscall | |
PASS: 64-bit prlimit nd_syscall | |
PASS: 64-bit process_vm nd_syscall | |
-FAIL: 64-bit ptrace nd_syscall | |
+PASS: 64-bit ptrace nd_syscall | |
FAIL: 64-bit pwrite nd_syscall | |
FAIL: 64-bit pwritev nd_syscall | |
FAIL: 64-bit quotactl nd_syscall | |
@@ -8753,7 +8753,7 @@ | |
PASS: 32-bit fanotify nd_syscall | |
PASS: 32-bit flock nd_syscall | |
PASS: 32-bit forkwait nd_syscall | |
-PASS: 32-bit futex nd_syscall | |
+FAIL: 32-bit futex nd_syscall | |
PASS: 32-bit futimes nd_syscall | |
PASS: 32-bit fxattr nd_syscall | |
PASS: 32-bit getcpu nd_syscall | |
@@ -8859,7 +8859,7 @@ | |
PASS: 32-bit shutdown nd_syscall | |
FAIL: 32-bit sigaltstack nd_syscall | |
PASS: 32-bit sigmask nd_syscall | |
-PASS: 32-bit signal nd_syscall | |
+FAIL: 32-bit signal nd_syscall | |
PASS: 32-bit signalfd nd_syscall | |
PASS: 32-bit socket nd_syscall | |
PASS: 32-bit socketpair nd_syscall | |
@@ -8868,7 +8868,7 @@ | |
FAIL: 32-bit swap nd_syscall | |
PASS: 32-bit sync nd_syscall | |
FAIL: 32-bit sync_file_range nd_syscall | |
-PASS: 32-bit syncfs nd_syscall | |
+FAIL: 32-bit syncfs nd_syscall | |
PASS: 32-bit sysctl nd_syscall | |
FAIL: 32-bit sysfs nd_syscall | |
PASS: 32-bit sysinfo nd_syscall | |
@@ -8988,8 +8988,8 @@ | |
FAIL: 64-bit pread syscall | |
FAIL: 64-bit preadv syscall | |
PASS: 64-bit prlimit syscall | |
-PASS: 64-bit process_vm syscall | |
-PASS: 64-bit ptrace syscall | |
+FAIL: 64-bit process_vm syscall | |
+FAIL: 64-bit ptrace syscall | |
FAIL: 64-bit pwrite syscall | |
FAIL: 64-bit pwritev syscall | |
FAIL: 64-bit quotactl syscall | |
@@ -9034,7 +9034,7 @@ | |
PASS: 64-bit shmat syscall | |
PASS: 64-bit shmget syscall | |
PASS: 64-bit shutdown syscall | |
-PASS: 64-bit sigaltstack syscall | |
+FAIL: 64-bit sigaltstack syscall | |
UNSUPPORTED: 64-bit sigmask syscall not supported on this arch | |
PASS: 64-bit signal syscall | |
PASS: 64-bit signalfd syscall | |
@@ -9064,7 +9064,7 @@ | |
PASS: 64-bit unshare syscall | |
FAIL: 64-bit uselib syscall | |
PASS: 64-bit userfaultfd syscall | |
-FAIL: 64-bit vforkwait syscall | |
+PASS: 64-bit vforkwait syscall | |
PASS: 64-bit vhangup syscall | |
PASS: 64-bit wait syscall | |
PASS: 64-bit wait4 syscall | |
@@ -9102,7 +9102,7 @@ | |
PASS: 32-bit fanotify syscall | |
PASS: 32-bit flock syscall | |
PASS: 32-bit forkwait syscall | |
-PASS: 32-bit futex syscall | |
+FAIL: 32-bit futex syscall | |
PASS: 32-bit futimes syscall | |
PASS: 32-bit fxattr syscall | |
PASS: 32-bit getcpu syscall | |
@@ -9217,7 +9217,7 @@ | |
FAIL: 32-bit swap syscall | |
PASS: 32-bit sync syscall | |
FAIL: 32-bit sync_file_range syscall | |
-PASS: 32-bit syncfs syscall | |
+FAIL: 32-bit syncfs syscall | |
PASS: 32-bit sysctl syscall | |
FAIL: 32-bit sysfs syscall | |
PASS: 32-bit sysinfo syscall | |
@@ -9380,7 +9380,7 @@ | |
PASS: 64-bit shmat tp_syscall | |
PASS: 64-bit shmget tp_syscall | |
PASS: 64-bit shutdown tp_syscall | |
-PASS: 64-bit sigaltstack tp_syscall | |
+FAIL: 64-bit sigaltstack tp_syscall | |
UNSUPPORTED: 64-bit sigmask tp_syscall not supported on this arch | |
PASS: 64-bit signal tp_syscall | |
PASS: 64-bit signalfd tp_syscall | |
@@ -9391,7 +9391,7 @@ | |
FAIL: 64-bit swap tp_syscall | |
PASS: 64-bit sync tp_syscall | |
PASS: 64-bit sync_file_range tp_syscall | |
-FAIL: 64-bit syncfs tp_syscall | |
+PASS: 64-bit syncfs tp_syscall | |
PASS: 64-bit sysctl tp_syscall | |
FAIL: 64-bit sysfs tp_syscall | |
PASS: 64-bit sysinfo tp_syscall | |
@@ -9554,7 +9554,7 @@ | |
PASS: 32-bit shutdown tp_syscall | |
FAIL: 32-bit sigaltstack tp_syscall | |
PASS: 32-bit sigmask tp_syscall | |
-PASS: 32-bit signal tp_syscall | |
+FAIL: 32-bit signal tp_syscall | |
PASS: 32-bit signalfd tp_syscall | |
PASS: 32-bit socket tp_syscall | |
PASS: 32-bit socketpair tp_syscall | |
@@ -9592,7 +9592,7 @@ | |
PASS: pr16806 library compile | |
PASS: pr16806 exe compile | |
PASS: pr16806 ko compile | |
-PASS: pr16806 | |
+FAIL: pr16806 staprun | |
Running /home/sultan/systemtap/testsuite/systemtap.unprivileged/unprivileged_embedded_C.exp ... | |
PASS: unprivileged embedded C: Obtain list tapset functions containing embedded C | |
UNTESTED: unprivileged embedded C: no embedded C: container_of_task_rcu(long) | |
@@ -11057,7 +11057,7 @@ | |
PASS: unprivileged myproc: --unprivileged process(number).statement(string) | |
PASS: unprivileged myproc: --privilege=stapusr process(number).statement(string).nearest | |
PASS: unprivileged myproc: --unprivileged process(string).begin | |
-PASS: unprivileged myproc: --privilege=stapusr process(string).end | |
+FAIL: unprivileged myproc: --privilege=stapusr process(string).end | |
PASS: unprivileged myproc: --unprivileged process(string).function(number) | |
PASS: unprivileged myproc: --privilege=stapusr process(string).function(number).call | |
KFAIL: unprivileged myproc: --unprivileged process(string).function(number).inline (PRMS: GCC) | |
@@ -12247,11 +12247,11 @@ | |
=== systemtap Summary === | |
-# of expected passes 9726 | |
-# of unexpected failures 605 | |
+# of expected passes 9719 | |
+# of unexpected failures 615 | |
# of unexpected successes 8 | |
# of expected failures 344 | |
# of unknown successes 5 | |
-# of known failures 101 | |
+# of known failures 98 | |
# of untested testcases 845 | |
# of unsupported tests 19 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment