Skip to content

Instantly share code, notes, and snippets.

@pasko
Created June 29, 2021 00:06
Show Gist options
  • Save pasko/fb929922d2447bc31e267262941570cc to your computer and use it in GitHub Desktop.
Save pasko/fb929922d2447bc31e267262941570cc to your computer and use it in GitHub Desktop.
Demonstrate on-demand ptrace() of a single thread.
// Demonstrate on-demand ptrace() of a single thread, not affecting any other
// thread in the process. The Linux/x86-64 version.
//
// This differs from ptrace() examples I could find on the Web by:
// * Tracing the parent process from the child (same as Crashpad)
// * Allowing to run threads in the background
//
// This is just a *demo*. A lot of error situations are not checked for, same
// for process states (wrt to signals, for example). Uses the mix of
// fork(2)+threads and malloc() in the forked process. Uses pipe() for message
// passing and synchronization - there exist faster alternatives.
//
// --
// Egor Pasko (pasko@chromium.org)
//
// Usage:
// gcc -g -O2 -o run_me strace_parent.c -lpthread && ./run_me
#define _POSIX_C_SOURCE 200112L
#include <errno.h>
#include <pthread.h>
#include <signal.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/prctl.h>
#include <sys/ptrace.h>
#include <sys/syscall.h>
#include <sys/user.h>
#include <sys/wait.h>
#include <unistd.h>
extern long syscall(long number, ...);
pid_t gettid() {
return syscall(__NR_gettid);
}
const int kFakeDetachSyscallNumber = 9999;
void RequestDetachFromPtrace() {
syscall(kFakeDetachSyscallNumber);
}
int TracedWorkload() {
printf("parent: in TracedWorkload()..\n");
// Read.
FILE *urandom = fopen("/dev/urandom", "r");
if (!urandom) {
perror("open urandom");
return 1;
}
uint32_t x = 0;
fread(&x, sizeof(x), 1, urandom);
printf("parent: fread(\"/dev/urandom\")[1] = 0x%08x\n", x);
fclose(urandom);
// Write.
FILE *tmp = fopen("/tmp/strace-example-out.txt", "w");
if (!tmp) {
perror("write tmp");
return 1;
}
fwrite("1", 1, 1, tmp);
fclose(tmp);
// Detach.
RequestDetachFromPtrace();
// Show that detach succeeded.
printf("parent: after detaching\n");
return 0;
}
void* ThreadRoutine(void* arg) {
printf("in worker thread\n");
for (;;) {
sleep(3);
printf("working\n");
}
return NULL;
}
int main(int argc, char **argv) {
// Create a thread on the side to print "working" every 3 seconds without an
// end. It would show that that only the thread being ptrace()-d is stopped.
// Stopping all threads from the tracee is possible with SIGSTOP/PTRACE_SEIZE,
// but it would require a different ptrace() command sequence in the child
// process.
pthread_t thread;
pthread_attr_t attr;
pthread_attr_init(&attr);
if (pthread_create(&thread, &attr, ThreadRoutine, NULL)) {
fprintf(stderr, "pthread_create failed\n");
return 1;
}
// Create a pipe.
int fds[2];
if (pipe(fds) == -1) {
perror("pipe");
return 1;
}
int receiver_fd = fds[0];
int sender_fd = fds[1];
printf("sender_fd: %d, receiver_fd: %d\n", sender_fd, receiver_fd);
// Another pipe to ping the parent.
if (pipe(fds) == -1) {
perror("pipe");
return 1;
}
int wait_fd = fds[0];
int wake_fd = fds[1];
printf("wait_fd: %d, wake_fd: %d\n", wait_fd, wake_fd);
// Fork.
//
// Using fork() when running arbitrary code in other threads can deadlock: the
// child process may end up with a few locks held that will never be released.
// For long running strace collection, need to make sure the child process
// does not allocate.
//
// Alternative considered: tried to avoid the restrictions of
// kernel.yama.ptrace_scope=1 using PTRACE_ATTACH from another _thread_
// (avoids fork(2)). Unfortunately, the kernel no longer tries to track the
// process group when rejecting PTRACE_ATTACH (since 2005). Linus recommends a
// workaround with CLONE_VFORK|CLONE_VM: https://lkml.org/lkml/2006/9/1/217.
// This allows to PTRACE_ATTACH from a clone indeed, but it is unclear what to
// do after that: the parent waits for _exit(2) or exec(2) and does not
// respond to any attempts to wake it up using SIGCONT or PTRACE_SYSCALL
// followed by waitpid(2).
pid_t pid = fork();
if (pid == -1) {
perror("fork");
return 1;
}
if (pid != 0) {
// In parent process.
// Allow the child process to trace this process. Silences Yama. On Android
// Yama is not enforced, and this adjustment probably needs to be removed.
if (prctl(PR_SET_PTRACER, pid, 0, 0, 0) != 0) {
perror("set the child as a ptracer");
return 1;
}
// Send PID/TID to the child.
pid_t self_tid = gettid();
pid_t self_pid = getpid();
// TODO: HANDLE_EINTR and short reads.
if (write(sender_fd, &self_tid, sizeof(self_tid)) != sizeof(self_tid)) {
fprintf(stderr, "not enough bytes written to the pipe\n");
return 1;
}
if (write(sender_fd, &self_pid, sizeof(self_pid)) != sizeof(self_pid)) {
fprintf(stderr, "not enough bytes written to the pipe\n");
return 1;
}
printf("write succeeded, closing the sender FD\n");
// Closing is important to wake up the read in another process.
close(sender_fd);
printf("parent: stopping, self_tid=%d, self_pid=%d\n",
self_tid, self_pid);
// Block on a syscall.
int result = 0;
if (read(wait_fd, &result, 1) != 1) {
perror("read wait_fd");
return 1;
}
printf("parent: after stopping\n");
return TracedWorkload();
}
// In child process. Receive PID and TID of the parent.
pid_t parent_tid;
pid_t parent_pid;
printf("child: before reading parent_tid\n");
if (read(receiver_fd, &parent_tid, sizeof(parent_tid)) == -1) {
perror("read ptid");
return 1;
}
if (read(receiver_fd, &parent_pid, sizeof(parent_pid)) == -1) {
perror("read ppid");
return 1;
}
if (!parent_tid) {
fprintf(stderr, "zero parent_tid looks suspicious");
return 1;
}
printf("child: parent_tid=%d, parent_pid=%d\n", parent_tid, parent_pid);
// Attach to the parent. PTRACE_SEIZE is required for PTRACE_INTERRUPT. The
// PTRACE_INTERRUPT is required to ensure the parent has been stopped at the
// rignt point, not at some other random syscall.
pid = parent_tid;
if (ptrace(PTRACE_SEIZE, pid, 0, 0) != 0) {
perror("ptrace seize");
return 1;
}
printf("child: seized\n");
if (write(wake_fd, "1", 1) != 1) {
fprintf(stderr, "write wake_fd failed\n");
return 1;
}
if (ptrace(PTRACE_INTERRUPT, pid, 0, 0) != 0) {
perror("PTRACE_INTERRUPT");
return 1;
}
int status;
if (waitpid(pid, &status, __WALL) == -1) {
perror("waitpid after PTRACE_INTERRUPT and PTRACE_SEIZE");
return 1;
}
printf("child: waited, sleeping\n");
sleep(10);
printf("child: has slept\n");
// Record every syscall until a request to detach is received or the tracee
// exits.
for (;;) {
// Continue until the next syscall is reached (but not started).
if (ptrace(PTRACE_SYSCALL, pid, 0, 0) == -1) {
perror("PTRACE_SYSCALL");
return 1;
}
if (waitpid(pid, &status, __WALL) == -1) {
perror("waitpid after PTRACE_SYSCALL");
return 1;
}
// Stop the loop when the process is gone.
if (WIFEXITED(status)) {
printf("child: saw parent exiting\n");
return 0;
}
if (WIFSIGNALED(status)) {
printf("child: the parent was killed by signal %d\n",
WTERMSIG(status));
return 0;
}
// Get registers.
struct user_regs_struct regs;
if (ptrace(PTRACE_GETREGS, pid, 0, &regs) == -1) {
perror("get regs");
return 1;
}
// Detach if the tracee asks to do so.
long syscall_no = regs.orig_rax;
if (syscall_no == kFakeDetachSyscallNumber) {
sleep(15);
printf("child: parent requested to detach\n");
struct user_regs_struct regs;
if (ptrace(PTRACE_GETREGS, pid, 0, &regs) == -1) {
perror("get regs on detach");
return 1;
}
// Replace the fake syscall with nanosleep. It will return an error,
// which will be ignored.
int nanosleep_nr = 35; // On x86-64.
regs.rax = nanosleep_nr;
regs.orig_rax = nanosleep_nr;
// For arm (and perhaps aarch64) replacing the register is not sufficient,
// the PTRACE_SET_SYSCALL can be used.
if (ptrace(PTRACE_SETREGS, pid, 0, &regs) == -1) {
perror("set regs on detach");
return 1;
}
// Detach.
if (ptrace(PTRACE_DETACH, pid, 0, 0) == -1) {
perror("detach");
return 1;
}
// Continue all threads in parent.
kill(parent_pid, SIGCONT);
printf("child: detached\n");
return 0;
}
// Print a representation of the system call.
//
// Below are a couple of shell functions for translating between syscall
// names and numbers.
//
// function get_syscall_number {
// printf __NR_"$1" | gcc -include sys/syscall.h -E -
// # On Linux x86-64 the low level file is at /usr/include/asm/unistd_64.h
// }
// function get_syscall_name {
// grep -P "#define __NR_.* $1\$" /usr/include/asm/unistd_64.h;
// }
//
char buf[128];
int bytes_formatted = snprintf(buf, 128,
" strace: SYSCALL_%ld(%ld, %ld, %ld, %ld, %ld, %ld)",
syscall_no,
(long)regs.rdi, (long)regs.rsi, (long)regs.rdx,
(long)regs.r10, (long)regs.r8, (long)regs.r9);
if (bytes_formatted >= 128) {
fprintf(stderr, "syscall printing exceeded limits\n");
return 1;
}
// Run the syscall and stop after it returns.
if (ptrace(PTRACE_SYSCALL, pid, 0, 0) == -1) {
perror("run syscall");
return 1;
}
if (waitpid(pid, 0, 0) == -1) {
perror("waitpid after syscall runs");
return 1;
}
// Print the syscall result.
if (ptrace(PTRACE_GETREGS, pid, 0, &regs) == -1) {
perror("get regs");
return 1;
}
fprintf(stderr, "%s = %ld\n", buf, (long)regs.rax);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment