Created
June 29, 2021 00:06
-
-
Save pasko/fb929922d2447bc31e267262941570cc to your computer and use it in GitHub Desktop.
Demonstrate on-demand ptrace() of a single thread.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Demonstrate on-demand ptrace() of a single thread, not affecting any other | |
// thread in the process. The Linux/x86-64 version. | |
// | |
// This differs from ptrace() examples I could find on the Web by: | |
// * Tracing the parent process from the child (same as Crashpad) | |
// * Allowing to run threads in the background | |
// | |
// This is just a *demo*. A lot of error situations are not checked for, same | |
// for process states (wrt to signals, for example). Uses the mix of | |
// fork(2)+threads and malloc() in the forked process. Uses pipe() for message | |
// passing and synchronization - there exist faster alternatives. | |
// | |
// -- | |
// Egor Pasko (pasko@chromium.org) | |
// | |
// Usage: | |
// gcc -g -O2 -o run_me strace_parent.c -lpthread && ./run_me | |
#define _POSIX_C_SOURCE 200112L | |
#include <errno.h> | |
#include <pthread.h> | |
#include <signal.h> | |
#include <stddef.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/prctl.h> | |
#include <sys/ptrace.h> | |
#include <sys/syscall.h> | |
#include <sys/user.h> | |
#include <sys/wait.h> | |
#include <unistd.h> | |
extern long syscall(long number, ...); | |
pid_t gettid() { | |
return syscall(__NR_gettid); | |
} | |
const int kFakeDetachSyscallNumber = 9999; | |
void RequestDetachFromPtrace() { | |
syscall(kFakeDetachSyscallNumber); | |
} | |
int TracedWorkload() { | |
printf("parent: in TracedWorkload()..\n"); | |
// Read. | |
FILE *urandom = fopen("/dev/urandom", "r"); | |
if (!urandom) { | |
perror("open urandom"); | |
return 1; | |
} | |
uint32_t x = 0; | |
fread(&x, sizeof(x), 1, urandom); | |
printf("parent: fread(\"/dev/urandom\")[1] = 0x%08x\n", x); | |
fclose(urandom); | |
// Write. | |
FILE *tmp = fopen("/tmp/strace-example-out.txt", "w"); | |
if (!tmp) { | |
perror("write tmp"); | |
return 1; | |
} | |
fwrite("1", 1, 1, tmp); | |
fclose(tmp); | |
// Detach. | |
RequestDetachFromPtrace(); | |
// Show that detach succeeded. | |
printf("parent: after detaching\n"); | |
return 0; | |
} | |
void* ThreadRoutine(void* arg) { | |
printf("in worker thread\n"); | |
for (;;) { | |
sleep(3); | |
printf("working\n"); | |
} | |
return NULL; | |
} | |
int main(int argc, char **argv) { | |
// Create a thread on the side to print "working" every 3 seconds without an | |
// end. It would show that that only the thread being ptrace()-d is stopped. | |
// Stopping all threads from the tracee is possible with SIGSTOP/PTRACE_SEIZE, | |
// but it would require a different ptrace() command sequence in the child | |
// process. | |
pthread_t thread; | |
pthread_attr_t attr; | |
pthread_attr_init(&attr); | |
if (pthread_create(&thread, &attr, ThreadRoutine, NULL)) { | |
fprintf(stderr, "pthread_create failed\n"); | |
return 1; | |
} | |
// Create a pipe. | |
int fds[2]; | |
if (pipe(fds) == -1) { | |
perror("pipe"); | |
return 1; | |
} | |
int receiver_fd = fds[0]; | |
int sender_fd = fds[1]; | |
printf("sender_fd: %d, receiver_fd: %d\n", sender_fd, receiver_fd); | |
// Another pipe to ping the parent. | |
if (pipe(fds) == -1) { | |
perror("pipe"); | |
return 1; | |
} | |
int wait_fd = fds[0]; | |
int wake_fd = fds[1]; | |
printf("wait_fd: %d, wake_fd: %d\n", wait_fd, wake_fd); | |
// Fork. | |
// | |
// Using fork() when running arbitrary code in other threads can deadlock: the | |
// child process may end up with a few locks held that will never be released. | |
// For long running strace collection, need to make sure the child process | |
// does not allocate. | |
// | |
// Alternative considered: tried to avoid the restrictions of | |
// kernel.yama.ptrace_scope=1 using PTRACE_ATTACH from another _thread_ | |
// (avoids fork(2)). Unfortunately, the kernel no longer tries to track the | |
// process group when rejecting PTRACE_ATTACH (since 2005). Linus recommends a | |
// workaround with CLONE_VFORK|CLONE_VM: https://lkml.org/lkml/2006/9/1/217. | |
// This allows to PTRACE_ATTACH from a clone indeed, but it is unclear what to | |
// do after that: the parent waits for _exit(2) or exec(2) and does not | |
// respond to any attempts to wake it up using SIGCONT or PTRACE_SYSCALL | |
// followed by waitpid(2). | |
pid_t pid = fork(); | |
if (pid == -1) { | |
perror("fork"); | |
return 1; | |
} | |
if (pid != 0) { | |
// In parent process. | |
// Allow the child process to trace this process. Silences Yama. On Android | |
// Yama is not enforced, and this adjustment probably needs to be removed. | |
if (prctl(PR_SET_PTRACER, pid, 0, 0, 0) != 0) { | |
perror("set the child as a ptracer"); | |
return 1; | |
} | |
// Send PID/TID to the child. | |
pid_t self_tid = gettid(); | |
pid_t self_pid = getpid(); | |
// TODO: HANDLE_EINTR and short reads. | |
if (write(sender_fd, &self_tid, sizeof(self_tid)) != sizeof(self_tid)) { | |
fprintf(stderr, "not enough bytes written to the pipe\n"); | |
return 1; | |
} | |
if (write(sender_fd, &self_pid, sizeof(self_pid)) != sizeof(self_pid)) { | |
fprintf(stderr, "not enough bytes written to the pipe\n"); | |
return 1; | |
} | |
printf("write succeeded, closing the sender FD\n"); | |
// Closing is important to wake up the read in another process. | |
close(sender_fd); | |
printf("parent: stopping, self_tid=%d, self_pid=%d\n", | |
self_tid, self_pid); | |
// Block on a syscall. | |
int result = 0; | |
if (read(wait_fd, &result, 1) != 1) { | |
perror("read wait_fd"); | |
return 1; | |
} | |
printf("parent: after stopping\n"); | |
return TracedWorkload(); | |
} | |
// In child process. Receive PID and TID of the parent. | |
pid_t parent_tid; | |
pid_t parent_pid; | |
printf("child: before reading parent_tid\n"); | |
if (read(receiver_fd, &parent_tid, sizeof(parent_tid)) == -1) { | |
perror("read ptid"); | |
return 1; | |
} | |
if (read(receiver_fd, &parent_pid, sizeof(parent_pid)) == -1) { | |
perror("read ppid"); | |
return 1; | |
} | |
if (!parent_tid) { | |
fprintf(stderr, "zero parent_tid looks suspicious"); | |
return 1; | |
} | |
printf("child: parent_tid=%d, parent_pid=%d\n", parent_tid, parent_pid); | |
// Attach to the parent. PTRACE_SEIZE is required for PTRACE_INTERRUPT. The | |
// PTRACE_INTERRUPT is required to ensure the parent has been stopped at the | |
// rignt point, not at some other random syscall. | |
pid = parent_tid; | |
if (ptrace(PTRACE_SEIZE, pid, 0, 0) != 0) { | |
perror("ptrace seize"); | |
return 1; | |
} | |
printf("child: seized\n"); | |
if (write(wake_fd, "1", 1) != 1) { | |
fprintf(stderr, "write wake_fd failed\n"); | |
return 1; | |
} | |
if (ptrace(PTRACE_INTERRUPT, pid, 0, 0) != 0) { | |
perror("PTRACE_INTERRUPT"); | |
return 1; | |
} | |
int status; | |
if (waitpid(pid, &status, __WALL) == -1) { | |
perror("waitpid after PTRACE_INTERRUPT and PTRACE_SEIZE"); | |
return 1; | |
} | |
printf("child: waited, sleeping\n"); | |
sleep(10); | |
printf("child: has slept\n"); | |
// Record every syscall until a request to detach is received or the tracee | |
// exits. | |
for (;;) { | |
// Continue until the next syscall is reached (but not started). | |
if (ptrace(PTRACE_SYSCALL, pid, 0, 0) == -1) { | |
perror("PTRACE_SYSCALL"); | |
return 1; | |
} | |
if (waitpid(pid, &status, __WALL) == -1) { | |
perror("waitpid after PTRACE_SYSCALL"); | |
return 1; | |
} | |
// Stop the loop when the process is gone. | |
if (WIFEXITED(status)) { | |
printf("child: saw parent exiting\n"); | |
return 0; | |
} | |
if (WIFSIGNALED(status)) { | |
printf("child: the parent was killed by signal %d\n", | |
WTERMSIG(status)); | |
return 0; | |
} | |
// Get registers. | |
struct user_regs_struct regs; | |
if (ptrace(PTRACE_GETREGS, pid, 0, ®s) == -1) { | |
perror("get regs"); | |
return 1; | |
} | |
// Detach if the tracee asks to do so. | |
long syscall_no = regs.orig_rax; | |
if (syscall_no == kFakeDetachSyscallNumber) { | |
sleep(15); | |
printf("child: parent requested to detach\n"); | |
struct user_regs_struct regs; | |
if (ptrace(PTRACE_GETREGS, pid, 0, ®s) == -1) { | |
perror("get regs on detach"); | |
return 1; | |
} | |
// Replace the fake syscall with nanosleep. It will return an error, | |
// which will be ignored. | |
int nanosleep_nr = 35; // On x86-64. | |
regs.rax = nanosleep_nr; | |
regs.orig_rax = nanosleep_nr; | |
// For arm (and perhaps aarch64) replacing the register is not sufficient, | |
// the PTRACE_SET_SYSCALL can be used. | |
if (ptrace(PTRACE_SETREGS, pid, 0, ®s) == -1) { | |
perror("set regs on detach"); | |
return 1; | |
} | |
// Detach. | |
if (ptrace(PTRACE_DETACH, pid, 0, 0) == -1) { | |
perror("detach"); | |
return 1; | |
} | |
// Continue all threads in parent. | |
kill(parent_pid, SIGCONT); | |
printf("child: detached\n"); | |
return 0; | |
} | |
// Print a representation of the system call. | |
// | |
// Below are a couple of shell functions for translating between syscall | |
// names and numbers. | |
// | |
// function get_syscall_number { | |
// printf __NR_"$1" | gcc -include sys/syscall.h -E - | |
// # On Linux x86-64 the low level file is at /usr/include/asm/unistd_64.h | |
// } | |
// function get_syscall_name { | |
// grep -P "#define __NR_.* $1\$" /usr/include/asm/unistd_64.h; | |
// } | |
// | |
char buf[128]; | |
int bytes_formatted = snprintf(buf, 128, | |
" strace: SYSCALL_%ld(%ld, %ld, %ld, %ld, %ld, %ld)", | |
syscall_no, | |
(long)regs.rdi, (long)regs.rsi, (long)regs.rdx, | |
(long)regs.r10, (long)regs.r8, (long)regs.r9); | |
if (bytes_formatted >= 128) { | |
fprintf(stderr, "syscall printing exceeded limits\n"); | |
return 1; | |
} | |
// Run the syscall and stop after it returns. | |
if (ptrace(PTRACE_SYSCALL, pid, 0, 0) == -1) { | |
perror("run syscall"); | |
return 1; | |
} | |
if (waitpid(pid, 0, 0) == -1) { | |
perror("waitpid after syscall runs"); | |
return 1; | |
} | |
// Print the syscall result. | |
if (ptrace(PTRACE_GETREGS, pid, 0, ®s) == -1) { | |
perror("get regs"); | |
return 1; | |
} | |
fprintf(stderr, "%s = %ld\n", buf, (long)regs.rax); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment