Skip to content

Instantly share code, notes, and snippets.

@kerneltoast
Created January 20, 2021 20:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kerneltoast/3fda157e59f28eae8abdd2ec14a16452 to your computer and use it in GitHub Desktop.
Save kerneltoast/3fda157e59f28eae8abdd2ec14a16452 to your computer and use it in GitHub Desktop.
From eae667381111910bcff8eb7d74cb14b7d048b1bc Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@openresty.com>
Date: Wed, 20 Jan 2021 12:51:19 -0800
Subject: [PATCH] stp_utrace: remove kmem cache usage
Some kernels appear to have trouble registering the same kmem_cache in
parallel, resulting in the following error as well as some other mayhem,
such as staprun hangs and kernel freezes:
sysfs: cannot create duplicate filename '/kernel/slab/:0000144'
This occurs when stap modules are registered in parallel with one
another.
The justification for using kmem caches in utrace is that the utrace
struct sizes are not powers of 2, and a lot of them can be allocated, so
leaving them to the kernel's default kmem caches can waste quite a bit
of memory. However, this is only a problem for the utrace struct, and
not really the utrace_engine struct, as the utrace_engine struct is 56
bytes on 64-bit, and can be allocated by the kernel's internal 64-byte
kmem cache with only 8 bytes wasted per allocation.
The same cannot be said for the utrace struct, since it's 144 bytes. It
would therefore be allocated from the 256-byte kmem cache, resulting in
112 bytes wasted per allocation. We can remedy this by reusing existing
memory in the struct for the 16-byte RCU callback head, bringing the
overall struct size down to 128 bytes and thus eliminating the need for
a kmem cache. This is safe because the reused struct members are no
longer used once the struct is ready to be freed.
This also eliminates a pesky rcu_barrier() we no longer required.
---
runtime/stp_utrace.c | 97 ++++++++++++++++----------------------------
1 file changed, 34 insertions(+), 63 deletions(-)
diff --git a/runtime/stp_utrace.c b/runtime/stp_utrace.c
index 7870c67c8..a0d2ae34f 100644
--- a/runtime/stp_utrace.c
+++ b/runtime/stp_utrace.c
@@ -77,7 +77,32 @@
*/
struct utrace {
spinlock_t lock;
- struct list_head attached, attaching;
+
+ union {
+ /*
+ * These lists are not used in RCU read side critical sections,
+ * so we can reuse the memory for the RCU callback once this
+ * struct is ready to be freed in order to save space. The RCU
+ * callback (struct callback_head) is the same size as struct
+ * list_head. We reuse the space of two list_head structs for
+ * one RCU callback struct in case the RCU struct's forced
+ * alignment necessitates extra space.
+ *
+ * The reason why this is safe is because RCU is not used to
+ * protect the lifetime of the entire utrace struct; RCU is only
+ * used to allow non-blocking iteration through the hashlists
+ * that the utrace structs reside in. Therefore, when the utrace
+ * struct is ready to be passed onto kfree_rcu(), we are assured
+ * that the entire utrace struct is unused except for the hlist
+ * member which needs to be kept intact to allow concurrent RCU
+ * readers to safely finish iterating through the hashlists.
+ */
+ struct {
+ struct list_head attached, attaching;
+ };
+
+ struct rcu_head rcu;
+ };
struct utrace_engine *reporting;
@@ -104,8 +129,6 @@ struct utrace {
struct task_work resume_work;
struct task_work report_work;
-
- struct rcu_head rcu;
};
#ifndef TASK_UTRACE_HASH_BITS
@@ -227,8 +250,6 @@ static void __stp_utrace_free_task_work(struct task_work *work)
__stp_utrace_free_task_work_from_pool(utrace_work);
}
-static struct kmem_cache *utrace_cachep;
-static struct kmem_cache *utrace_engine_cachep;
static const struct utrace_engine_ops utrace_detached_ops; /* forward decl */
static void utrace_report_clone(void *cb_data __attribute__ ((unused)),
@@ -334,8 +355,6 @@ static int utrace_init(void)
{
int i;
int rc = -1;
- static char kmem_cache1_name[50];
- static char kmem_cache2_name[50];
if (unlikely(stp_task_work_init() != 0))
goto error;
@@ -385,25 +404,6 @@ static int utrace_init(void)
}
#endif
- /* PR14781: avoid kmem_cache naming collisions (detected by CONFIG_DEBUG_VM)
- by plopping a non-conflicting token - in this case the address of a
- locally relevant variable - into the names. */
- snprintf(kmem_cache1_name, sizeof(kmem_cache1_name),
- "utrace_%lx", (unsigned long) (& utrace_cachep));
- utrace_cachep = kmem_cache_create(kmem_cache1_name,
- sizeof(struct utrace),
- 0, 0, NULL);
- if (unlikely(!utrace_cachep))
- goto error;
-
- snprintf(kmem_cache2_name, sizeof(kmem_cache2_name),
- "utrace_engine_%lx", (unsigned long) (& utrace_engine_cachep));
- utrace_engine_cachep = kmem_cache_create(kmem_cache2_name,
- sizeof(struct utrace_engine),
- 0, 0, NULL);
- if (unlikely(!utrace_engine_cachep))
- goto error;
-
rc = STP_TRACE_REGISTER(sched_process_fork, utrace_report_clone);
if (unlikely(rc != 0)) {
_stp_error("register_trace_sched_process_fork failed: %d", rc);
@@ -444,14 +444,6 @@ error2:
STP_TRACE_UNREGISTER(sched_process_fork, utrace_report_clone);
tracepoint_synchronize_unregister();
error:
- if (utrace_cachep) {
- kmem_cache_destroy(utrace_cachep);
- utrace_cachep = NULL;
- }
- if (utrace_engine_cachep) {
- kmem_cache_destroy(utrace_engine_cachep);
- utrace_engine_cachep = NULL;
- }
return rc;
}
@@ -508,20 +500,6 @@ static int utrace_exit(void)
}
stp_spin_unlock_irqrestore(&__stp_utrace_task_work_list_lock, flags);
- if (utrace_cachep) {
- /*
- * We need an RCU barrier because utrace_cachep is used in the
- * utrace RCU callback (utrace_free_rcu()).
- */
- rcu_barrier();
- kmem_cache_destroy(utrace_cachep);
- utrace_cachep = NULL;
- }
- if (utrace_engine_cachep) {
- kmem_cache_destroy(utrace_engine_cachep);
- utrace_engine_cachep = NULL;
- }
-
#ifdef STP_TF_DEBUG
printk(KERN_ERR "%s:%d - exit\n", __FUNCTION__, __LINE__);
#endif
@@ -554,13 +532,6 @@ stp_task_notify_resume(struct task_struct *target, struct utrace *utrace)
}
}
-static void utrace_free_rcu(struct rcu_head *rcu)
-{
- struct utrace *utrace = container_of(rcu, typeof(*utrace), rcu);
-
- kmem_cache_free(utrace_cachep, utrace);
-}
-
/*
* Clean up everything associated with @task.utrace.
*
@@ -580,11 +551,11 @@ static void utrace_cleanup(struct utrace *utrace)
#endif
list_del_init(&engine->entry);
/* FIXME: hmm, should this be utrace_engine_put()? */
- kmem_cache_free(utrace_engine_cachep, engine);
+ _stp_kfree(engine);
}
list_for_each_entry_safe(engine, next, &utrace->attaching, entry) {
list_del(&engine->entry);
- kmem_cache_free(utrace_engine_cachep, engine);
+ _stp_kfree(engine);
}
spin_unlock(&utrace->lock);
@@ -593,7 +564,7 @@ static void utrace_cleanup(struct utrace *utrace)
* since those were cancelled by
* utrace_cancel_all_task_work(). */
- call_rcu(&utrace->rcu, utrace_free_rcu);
+ kfree_rcu(utrace, rcu);
#ifdef STP_TF_DEBUG
printk(KERN_ERR "%s:%d exit\n", __FUNCTION__, __LINE__);
#endif
@@ -726,7 +697,7 @@ static struct utrace *utrace_task_alloc(struct utrace_bucket *bucket,
struct utrace *utrace;
unsigned long flags;
- utrace = kmem_cache_zalloc(utrace_cachep, STP_ALLOC_FLAGS);
+ utrace = kzalloc(sizeof(*utrace), STP_ALLOC_FLAGS);
if (unlikely(!utrace))
return NULL;
@@ -797,7 +768,7 @@ static void utrace_free(struct utrace_bucket *bucket, struct utrace *utrace)
FREE_IF_WORK(utrace->resume_work_added, utrace_resume);
FREE_IF_WORK(utrace->report_work_added, utrace_report_work);
- call_rcu(&utrace->rcu, utrace_free_rcu);
+ kfree_rcu(utrace, rcu);
}
/*
@@ -822,7 +793,7 @@ static void __utrace_engine_release(struct kref *kref)
BUG_ON(!list_empty(&engine->entry));
if (engine->release)
(*engine->release)(engine->data);
- kmem_cache_free(utrace_engine_cachep, engine);
+ _stp_kfree(engine);
}
static bool engine_matches(struct utrace_engine *engine, int flags,
@@ -982,7 +953,7 @@ static struct utrace_engine *utrace_attach_task(
return ERR_PTR(-ENOMEM);
}
- engine = kmem_cache_alloc(utrace_engine_cachep, STP_ALLOC_FLAGS);
+ engine = _stp_kmalloc(sizeof(*engine));
if (unlikely(!engine))
return ERR_PTR(-ENOMEM);
@@ -999,7 +970,7 @@ static struct utrace_engine *utrace_attach_task(
ret = utrace_add_engine(target, utrace, engine, flags, ops, data);
if (unlikely(ret)) {
- kmem_cache_free(utrace_engine_cachep, engine);
+ _stp_kfree(engine);
engine = ERR_PTR(ret);
}
--
2.30.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment