kerneltoast/0001-task_finder-try-harder-to-get-the-exe-file-from-an-m.patch Secret

## 0001-task_finder-try-harder-to-get-the-exe-file-from-an-m.patch
From 7571f6dc2fb1e14b47e80e9181e12ce32f559df4 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@openresty.com>
Date: Wed, 28 Oct 2020 13:23:53 -0700
Subject: [PATCH] task_finder: try harder to get the exe file from an mm

Although it would be nice if we could avoid sleeping in
stap_find_exe_file(), attempting to do so can cause the task finder to
not attach to a desired target process. This is especially noticeable
when a target PID is specified, in which case the target PID itself can
get skipped over by the task finder.

Sleeping in stap_find_exe_file() is therefore unavoidable, so we must
grab a reference to a task's mm and release the task lock in order to
sleep while trying to acquire the mmap semaphore.

We should also treat failures to get the exe file for a specific target
PID as fatal, since that means the target PID will never get attached.
---
 runtime/linux/task_finder.c  | 31 +++++++++++++++----------------
 runtime/linux/task_finder2.c | 32 +++++++++++++++-----------------
 runtime/task_finder_vma.c    | 16 +++++++---------
 3 files changed, 37 insertions(+), 42 deletions(-)

diff --git a/runtime/linux/task_finder.c b/runtime/linux/task_finder.c
index 4c2336c01..dff65312f 100644
--- a/runtime/linux/task_finder.c
+++ b/runtime/linux/task_finder.c
@@ -1663,26 +1663,25 @@ stap_start_task_finder(void)
 			goto stf_err;
 		}

-		// Grab the path associated with this task.
-		//
-		// Note we aren't calling get_task_mm()/mmput() here.
-		// Instead we're calling task_lock()/task_unlock().
-		// We really only need to lock the mm, but mmput() can
-		// sleep so we can't call it.  Also note that
-		// __stp_get_mm_path() grabs the mmap semaphore, which
-		// should also keep us safe.
+		// Grab the path associated with this task. We grab a reference
+		// to the mm which needs to be released later.
 		task_lock(tsk);
-		if (! tsk->mm) {
-			/* If the thread doesn't have a mm_struct, it is
-			 * a kernel thread which we need to skip. */
-			task_unlock(tsk);
-			continue;
-		}
-		mmpath = __stp_get_mm_path(tsk->mm, mmpath_buf, PATH_MAX);
+		mm = tsk->mm;
+		if (mm && !atomic_inc_not_zero(&mm->mm_users))
+			mm = NULL;
 		task_unlock(tsk);
+
+		// If the thread doesn't have a mm_struct, it is a kernel
+		// thread which we need to skip.
+		if (!mm)
+			continue;
+
+		mmpath = __stp_get_mm_path(mm, mmpath_buf, PATH_MAX);
+		mmput(mm);
 		if (mmpath == NULL || IS_ERR(mmpath)) {
 			rc = -PTR_ERR(mmpath);
-			if (rc == ENOENT) {
+			/* If this was our target then it's a fatal error */
+			if (!_stp_target && rc == ENOENT) {
 				rc = 0;	/* ignore ENOENT */
 				continue;
 			}
diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c
index a1f182695..b8b1bbfdc 100644
--- a/runtime/linux/task_finder2.c
+++ b/runtime/linux/task_finder2.c
@@ -1701,27 +1701,25 @@ stap_start_task_finder(void)
 			goto stf_err;
 		}

-		// Grab the path associated with this task.
-		//
-		// Note we aren't calling get_task_mm()/mmput() here.
-		// Instead we're calling task_lock()/task_unlock().
-		// We really only need to lock the mm, but mmput() can
-		// sleep so we can't call it.  Also note that
-		// __stp_get_mm_path() grabs the mmap semaphore, which
-		// should also keep us safe.
+		// Grab the path associated with this task. We grab a reference
+		// to the mm which needs to be released later.
 		task_lock(tsk);
-		if (! tsk->mm) {
-			/* If the thread doesn't have a mm_struct, it
-			 * is a kernel thread which we need to
-			 * skip. */
-			task_unlock(tsk);
-			continue;
-		}
-		mmpath = __stp_get_mm_path(tsk->mm, mmpath_buf, PATH_MAX);
+		mm = tsk->mm;
+		if (mm && !atomic_inc_not_zero(&mm->mm_users))
+			mm = NULL;
 		task_unlock(tsk);
+
+		// If the thread doesn't have a mm_struct, it is a kernel
+		// thread which we need to skip.
+		if (!mm)
+			continue;
+
+		mmpath = __stp_get_mm_path(mm, mmpath_buf, PATH_MAX);
+		mmput(mm);
 		if (mmpath == NULL || IS_ERR(mmpath)) {
 			rc = -PTR_ERR(mmpath);
-			if (rc == ENOENT) {
+			/* If this was our target then it's a fatal error */
+			if (!_stp_target && rc == ENOENT) {
 				rc = 0;	/* ignore ENOENT */
 				continue;
 			}
diff --git a/runtime/task_finder_vma.c b/runtime/task_finder_vma.c
index 51134fe79..2fa7a1119 100644
--- a/runtime/task_finder_vma.c
+++ b/runtime/task_finder_vma.c
@@ -374,19 +374,17 @@ stap_find_exe_file(struct mm_struct* mm)
 	//
 	// So, for kernels >= 4.1, we'll use get_mm_exe_file(). For
 	// kernels < 4.1 but with get_mm_exe_file() exported, we'll
-	// still use our own code. The original get_mm_exe_file() can
-	// sleep (since it calls down_read()), so we'll have to roll
-	// our own.
+	// still use our own code.
 #if defined(STAPCONF_DPATH_PATH) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0))
 	return get_mm_exe_file(mm);
 #else
 	struct file *exe_file = NULL;

-	// The down_read() function can sleep, so we'll call
-	// down_read_trylock() instead, which can fail.  If it
-	// fails, we'll just pretend this task didn't have a
-	// exe file.
-	if (mm && down_read_trylock(&mm->mmap_sem)) {
+	if (!mm)
+		return NULL;
+
+	down_read(&mm->mmap_sem);
+	{

 		// VM_EXECUTABLE was killed in kernel commit e9714acf,
 		// but in kernels that new we can just use
@@ -404,8 +402,8 @@ stap_find_exe_file(struct mm_struct* mm)
 #endif
 		if (exe_file)
 			get_file(exe_file);
-		up_read(&mm->mmap_sem);
 	}
+	up_read(&mm->mmap_sem);
 	return exe_file;
 #endif
 }
--
2.29.1
	From 7571f6dc2fb1e14b47e80e9181e12ce32f559df4 Mon Sep 17 00:00:00 2001
	From: Sultan Alsawaf <sultan@openresty.com>
	Date: Wed, 28 Oct 2020 13:23:53 -0700
	Subject: [PATCH] task_finder: try harder to get the exe file from an mm

	Although it would be nice if we could avoid sleeping in
	stap_find_exe_file(), attempting to do so can cause the task finder to
	not attach to a desired target process. This is especially noticeable
	when a target PID is specified, in which case the target PID itself can
	get skipped over by the task finder.

	Sleeping in stap_find_exe_file() is therefore unavoidable, so we must
	grab a reference to a task's mm and release the task lock in order to
	sleep while trying to acquire the mmap semaphore.

	We should also treat failures to get the exe file for a specific target
	PID as fatal, since that means the target PID will never get attached.
	---
	runtime/linux/task_finder.c \| 31 +++++++++++++++----------------
	runtime/linux/task_finder2.c \| 32 +++++++++++++++-----------------
	runtime/task_finder_vma.c \| 16 +++++++---------
	3 files changed, 37 insertions(+), 42 deletions(-)

	diff --git a/runtime/linux/task_finder.c b/runtime/linux/task_finder.c
	index 4c2336c01..dff65312f 100644
	--- a/runtime/linux/task_finder.c
	+++ b/runtime/linux/task_finder.c
	@@ -1663,26 +1663,25 @@ stap_start_task_finder(void)
	goto stf_err;
	}

	- // Grab the path associated with this task.
	- //
	- // Note we aren't calling get_task_mm()/mmput() here.
	- // Instead we're calling task_lock()/task_unlock().
	- // We really only need to lock the mm, but mmput() can
	- // sleep so we can't call it. Also note that
	- // __stp_get_mm_path() grabs the mmap semaphore, which
	- // should also keep us safe.
	+ // Grab the path associated with this task. We grab a reference
	+ // to the mm which needs to be released later.
	task_lock(tsk);
	- if (! tsk->mm) {
	- /* If the thread doesn't have a mm_struct, it is
	- * a kernel thread which we need to skip. */
	- task_unlock(tsk);
	- continue;
	- }
	- mmpath = __stp_get_mm_path(tsk->mm, mmpath_buf, PATH_MAX);
	+ mm = tsk->mm;
	+ if (mm && !atomic_inc_not_zero(&mm->mm_users))
	+ mm = NULL;
	task_unlock(tsk);
	+
	+ // If the thread doesn't have a mm_struct, it is a kernel
	+ // thread which we need to skip.
	+ if (!mm)
	+ continue;
	+
	+ mmpath = __stp_get_mm_path(mm, mmpath_buf, PATH_MAX);
	+ mmput(mm);
	if (mmpath == NULL \|\| IS_ERR(mmpath)) {
	rc = -PTR_ERR(mmpath);
	- if (rc == ENOENT) {
	+ /* If this was our target then it's a fatal error */
	+ if (!_stp_target && rc == ENOENT) {
	rc = 0; /* ignore ENOENT */
	continue;
	}
	diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c
	index a1f182695..b8b1bbfdc 100644
	--- a/runtime/linux/task_finder2.c
	+++ b/runtime/linux/task_finder2.c
	@@ -1701,27 +1701,25 @@ stap_start_task_finder(void)
	goto stf_err;
	}

	- // Grab the path associated with this task.
	- //
	- // Note we aren't calling get_task_mm()/mmput() here.
	- // Instead we're calling task_lock()/task_unlock().
	- // We really only need to lock the mm, but mmput() can
	- // sleep so we can't call it. Also note that
	- // __stp_get_mm_path() grabs the mmap semaphore, which
	- // should also keep us safe.
	+ // Grab the path associated with this task. We grab a reference
	+ // to the mm which needs to be released later.
	task_lock(tsk);
	- if (! tsk->mm) {
	- /* If the thread doesn't have a mm_struct, it
	- * is a kernel thread which we need to
	- * skip. */
	- task_unlock(tsk);
	- continue;
	- }
	- mmpath = __stp_get_mm_path(tsk->mm, mmpath_buf, PATH_MAX);
	+ mm = tsk->mm;
	+ if (mm && !atomic_inc_not_zero(&mm->mm_users))
	+ mm = NULL;
	task_unlock(tsk);
	+
	+ // If the thread doesn't have a mm_struct, it is a kernel
	+ // thread which we need to skip.
	+ if (!mm)
	+ continue;
	+
	+ mmpath = __stp_get_mm_path(mm, mmpath_buf, PATH_MAX);
	+ mmput(mm);
	if (mmpath == NULL \|\| IS_ERR(mmpath)) {
	rc = -PTR_ERR(mmpath);
	- if (rc == ENOENT) {
	+ /* If this was our target then it's a fatal error */
	+ if (!_stp_target && rc == ENOENT) {
	rc = 0; /* ignore ENOENT */
	continue;
	}
	diff --git a/runtime/task_finder_vma.c b/runtime/task_finder_vma.c
	index 51134fe79..2fa7a1119 100644
	--- a/runtime/task_finder_vma.c
	+++ b/runtime/task_finder_vma.c
	@@ -374,19 +374,17 @@ stap_find_exe_file(struct mm_struct* mm)
	//
	// So, for kernels >= 4.1, we'll use get_mm_exe_file(). For
	// kernels < 4.1 but with get_mm_exe_file() exported, we'll
	- // still use our own code. The original get_mm_exe_file() can
	- // sleep (since it calls down_read()), so we'll have to roll
	- // our own.
	+ // still use our own code.
	#if defined(STAPCONF_DPATH_PATH) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0))
	return get_mm_exe_file(mm);
	#else
	struct file *exe_file = NULL;

	- // The down_read() function can sleep, so we'll call
	- // down_read_trylock() instead, which can fail. If it
	- // fails, we'll just pretend this task didn't have a
	- // exe file.
	- if (mm && down_read_trylock(&mm->mmap_sem)) {
	+ if (!mm)
	+ return NULL;
	+
	+ down_read(&mm->mmap_sem);
	+ {

	// VM_EXECUTABLE was killed in kernel commit e9714acf,
	// but in kernels that new we can just use
	@@ -404,8 +402,8 @@ stap_find_exe_file(struct mm_struct* mm)
	#endif
	if (exe_file)
	get_file(exe_file);
	- up_read(&mm->mmap_sem);
	}
	+ up_read(&mm->mmap_sem);
	return exe_file;
	#endif
	}
	--
	2.29.1