Skip to content

Instantly share code, notes, and snippets.

@bonzini
Created August 10, 2017 12:26
Show Gist options
  • Save bonzini/523c792d9365112e50a44fa2c81df7fa to your computer and use it in GitHub Desktop.
Save bonzini/523c792d9365112e50a44fa2c81df7fa to your computer and use it in GitHub Desktop.
An LD_PRELOAD library that waits for a given binary to be exec-ed, and forces it under a gdbserver
/* Copyright (C) 2012-2017 by László Nagy
Copyright (C) 2017 Paolo Bonzini
This file is based on Bear.
It is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Sample use:
* $ gcc -g exec-hook.c -ldl -shared -o exec-hook.so -fPIC -O2
* $ stat /bin/ls | grep Inode
* Device: fd01h/64769d Inode: 136420948 Links: 1
* $ INTERCEPT_DEV_INODE=64769:136420948 \
* INTERCEPT_SOCKET=localhost:12345 LD_PRELOAD=./exec-hook.so \
* sh -c 'ls -l'
* Process /proc/self/fd/5 created; pid = 32055
* Listening on port 12345
*
* Now in another terminal:
* $ gdb
* (gdb) target remote localhost:12345
* (gdb) c
* (gdb) quit
*
* Add ":NN" at the end of INTERCEPT_DEV_INODE's value to trap the
* (N+1)-th invocation of the program (that is, skip the first N).
*
* Linux only, I'm sorry!
*/
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/eventfd.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <unistd.h>
#include <syscall.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <errno.h>
extern char **environ;
#define GDBSERVER_DEFAULT "/usr/bin/gdbserver"
#define ENV_INTERCEPT "INTERCEPT_DEV_INODE"
#define ENV_SOCKET "INTERCEPT_SOCKET"
#define ENV_GDBSERVER "INTERCEPT_GDBSERVER"
#define ENV_EVENTFD "INTERCEPT_EVENTFD"
/* This mutex only protects within a thread, i.e. munge_exec against on_load.
* Cross-process synchronization uses an eventfd.
*/
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
static char *env_names[] = {
ENV_INTERCEPT,
ENV_SOCKET,
ENV_GDBSERVER,
ENV_EVENTFD,
};
#define ENV_SIZE (sizeof(env_names) / sizeof(env_names[0]))
static void on_load(void) __attribute__((constructor));
/* Utility methods to deal with string arrays. environment and process arguments
* are both represented as string arrays. */
static size_t strings_length(char **in)
{
size_t result = 0;
for (char **it = in; it && *it; ++it) {
++result;
}
return result;
}
static void strings_release(char **in)
{
int save_errno = errno;
for (char **it = in; it && *it; ++it) {
free(*it);
}
free(in);
errno = save_errno;
}
static char **strings_build(const char *arg, va_list *args)
{
char **result = 0;
size_t size = 0;
for (const char *it = arg; it; it = va_arg(*args, const char *)) {
result = realloc(result, (size + 2) * sizeof(const char *));
if (!result) {
return NULL;
}
char *copy = strdup(it);
if (!copy) {
goto undo;
}
result[size++] = copy;
result[size] = 0;
}
return result;
undo:
/* Return an empty array. */
strings_release(result);
return NULL;
}
static int strings_append_all(char ***p_out, char ** in)
{
char **out = *p_out;
size_t size = strings_length(out);
size_t in_size = strings_length(in);
char **result = realloc(out, (size + in_size + 1) * sizeof(char *));
if (!result) {
return -1;
}
*p_out = result;
char **out_it = result + size;
for (char **in_it = in; in_it && *in_it; ++in_it) {
char *copy = strdup(*in_it);
if (!copy) {
goto undo;
}
*out_it++ = copy;
*out_it = 0;
}
return 0;
undo:
if (out) {
/* This is the original size */
result[size] = NULL;
} else {
/* Return an empty array. */
strings_release(result);
*p_out = NULL;
}
return -1;
}
static char **strings_copy(char **in)
{
char **out = NULL;
if (strings_append_all(&out, in) < 0) {
return NULL;
}
return out;
}
static int strings_append(char ***p_out, char *e)
{
char **out = *p_out;
size_t size = strings_length(out);
char **result = realloc(out, (size + 2) * sizeof(char *));
if (!result) {
return -1;
}
result[size++] = e;
result[size++] = 0;
*p_out = result;
return 0;
}
static int strings_append_copy(char ***p_out, const char *e)
{
char *s = strdup(e);
if (!s) {
return -1;
}
return strings_append(p_out, s);
}
/* we update the environment assure that children processes will copy
* the desired behaviour
*/
static char *initial_env[ENV_SIZE];
static void capture_env(void)
{
for (size_t it = 0; it < ENV_SIZE; ++it) {
char const *env_value = getenv(env_names[it]);
char *env_copy = (env_value) ? strdup(env_value) : NULL;
initial_env[it] = env_copy;
}
}
static char **restore_environ_var(char *envs[], char *key, char *value)
{
/* find the key if it's there */
size_t key_length = strlen(key);
char **it = envs;
for (; it && *it; ++it) {
if (!memcmp(*it, key, key_length) && (*it)[key_length] == '=')
break;
}
/* allocate a environment entry */
size_t value_length = strlen(value);
size_t env_length = key_length + value_length + 2;
char *env = malloc(env_length);
if (!env) {
goto out;
}
sprintf(env, "%s=%s", key, value);
/* replace or append the environment entry */
if (it && *it) {
free((void *)*it);
*it = env;
return envs;
}
if (strings_append(&envs, env) < 0) {
goto out;
}
return envs;
out:
strings_release(envs);
return NULL;
}
static char **restore_environment(char *const envp[])
{
char **result = strings_copy((char **)envp);
for (size_t it = 0; it < ENV_SIZE; ++it) {
if (initial_env[it]) {
result = restore_environ_var(result, env_names[it], initial_env[it]);
if (!result) {
break;
}
}
}
return result;
}
/* Invoke the Linux system calls. */
static int call_execve(const char *path, char *const argv[],
char *const envp[])
{
char **menvp = restore_environment(envp);
syscall(SYS_execve, path, argv, menvp);
strings_release(menvp);
return -1;
}
static int call_execveat(int dirfd, const char *path, char *const argv[],
char *const envp[], int flags)
{
char **menvp = restore_environment(envp);
if (dirfd == AT_FDCWD && flags == 0) {
syscall(SYS_execve, path, argv, menvp);
} else {
syscall(SYS_execveat, dirfd, path, argv, menvp, flags);
}
strings_release(menvp);
return -1;
}
/* This is the main part of the hook. */
static const char *socket;
static const char *gdbserver;
static int evfd;
static int dev;
static int ino;
static int skip = 0;
static int intercept_parse_envvar(const char *devino)
{
if (sscanf(devino, "%d:%d:%d", &dev, &ino, &skip) < 2) {
return -1;
}
return 0;
}
/* Ensure that we're the first to actually exec the target. */
static int get_token(void)
{
uint64_t val;
/* The value we read is the number of execs left until the trapping one,
* plus one. The "plus one" is because zero blocks reads to the eventfd.
* So 1 means the trapped exec already happened, 2 means it is the next one,
* etc.
*/
if (read(evfd, &val, 8) < 8) {
return 0;
}
/* Entered critical section, no one else can read the eventfd until
* the next write. This critical section is cross-process, unlike
* the mutex!
*/
if (val > 1) {
uint64_t new = val - 1;
write(evfd, &new, 8);
return val == 2;
} else {
uint64_t new = 1;
write(evfd, &new, 8);
return 0;
}
}
static int intercept_init_eventfd(void)
{
evfd = eventfd(0, 0);
if (evfd == -1) {
return -1;
}
uint64_t value = skip + 2;
write(evfd, &value, 8);
/* Pass the eventfd file descriptor to children processes */
char c[12];
sprintf(c, "%d", evfd);
setenv(ENV_EVENTFD, c, 0);
return 0;
}
/* Reopen an O_PATH file descriptor into a read-only one. Cannot
* do openat(fd, "", O_RDONLY) for an O_PATH file descriptor?
*/
static int reopen_path(int fd)
{
char name[30];
sprintf(name, "/proc/self/fd/%d", fd);
return open(name, O_RDONLY);
}
static int munge_exec(int fd, char* const* argv, char *const* envp, int flags)
{
struct stat st;
/* Check if we're active. */
if (!socket) {
return 0;
}
/* Check if the file matches the desired executable */
if (fstatat(fd, "", &st, flags | AT_EMPTY_PATH) == -1) {
return -1;
}
if (st.st_dev != dev || st.st_ino != ino) {
return 0;
}
/* Check if it's the right time to start the gdbserver */
if (!get_token()) {
return 0;
}
int new_fd = reopen_path(fd);
if (new_fd == -1) {
return -1;
}
/* Build gdbserver command line */
char **new_argv = NULL;
if (strings_append_copy(&new_argv, gdbserver) < 0) {
return - 1;
}
if (strings_append_copy(&new_argv, socket) < 0) {
return -1;
}
char name[30];
sprintf(name, "/proc/self/fd/%d", new_fd);
if (strings_append_copy(&new_argv, name) < 0) {
return -1;
}
if (strings_append_all(&new_argv, (char **) argv + 1) < 0) {
return -1;
}
call_execve(gdbserver, new_argv, envp);
strings_release(new_argv);
return -1;
}
static int my_execveat(int fd, char *const argv[], char *const envp[],
int orig_dirfd, const char *orig_path,
int orig_flags)
{
pthread_mutex_lock(&mutex);
int result = munge_exec(fd, argv, envp, orig_flags);
if (result) {
goto bad;
}
/* Pass call through. */
call_execveat(fd, "", argv, envp, orig_flags | AT_EMPTY_PATH);
if (errno == ENOENT || errno == ENOSYS) {
/* When coming from execve, this actually becomes an
* execve system call, so that we can run on Linux < 3.19.
*/
call_execveat(orig_dirfd, orig_path, argv, envp, orig_flags);
}
bad:;
int save_errno = errno;
pthread_mutex_unlock(&mutex);
errno = save_errno;
return -1;
}
/* These are the functions we are try to hijack. */
/* Not yet in glibc, but prepare for the future */
int execveat(int dirfd, const char *path, char *const argv[], char *const envp[],
int flags)
{
int fd = openat(dirfd, path, O_PATH | O_CLOEXEC);
if (fd == -1) {
return -1;
}
my_execveat(fd, argv, envp, dirfd, path, flags);
return -1;
}
static int my_execve(const char *path, char *const argv[], char *const envp[])
{
int fd = open(path, O_PATH | O_CLOEXEC);
if (fd == -1) {
return -1;
}
my_execveat(fd, argv, envp, AT_FDCWD, path, 0);
return -1;
}
int fexecve(int fd, char *const argv[], char *const envp[])
{
char name[30];
sprintf(name, "/proc/self/fd/%d", fd);
return my_execveat(fd, argv, envp, AT_FDCWD, name, 0);
}
int execve(const char *path, char *const argv[], char *const envp[])
{
return my_execve(path, argv, envp);
}
int execv(const char *path, char *const argv[])
{
return my_execve(path, argv, environ);
}
int execl(const char *path, const char *arg, ...)
{
va_list args;
va_start(args, arg);
char **argv = strings_build(arg, &args);
va_end(args);
my_execve(path, argv, environ);
strings_release(argv);
return -1;
}
// int execle(const char *path, const char *arg, ..., char * const envp[]);
int execle(const char *path, const char *arg, ...)
{
va_list args;
va_start(args, arg);
char **argv = strings_build(arg, &args);
char *const *envp = va_arg(args, char *const *);
va_end(args);
my_execve(path, argv, envp);
strings_release(argv);
return -1;
}
/* These are the functions we are trying to hijack, for which we resolve
* the PATH ourselves.
*/
static int do_shell(const char *file,
char *const argv[], char *const envp[])
{
char **new_argv = NULL;
if (strings_append_copy(&new_argv, argv[0]) < 0) {
return -1;
}
if (strings_append_copy(&new_argv, "--") < 0) {
return -1;
}
if (strings_append_copy(&new_argv, file) < 0) {
return -1;
}
if (strings_append_all(&new_argv, (char **) argv + 1) < 0) {
return -1;
}
my_execve("/bin/sh", new_argv, envp);
strings_release(new_argv);
return -1;
}
static int do_execvpe(const char *file, const char *search_path,
char *const argv[], char *const envp[])
{
int ret = ENOENT;
if (strchr(file, '/') != NULL) {
my_execve(file, argv, envp);
if (errno == ENOEXEC) {
return do_shell(file, argv, envp);
}
return -1;
}
if (!search_path) {
search_path = getenv("PATH");
if (!search_path) {
search_path = "/bin:/usr/bin";
}
}
int l = strnlen(file, NAME_MAX + 1);
if (l > NAME_MAX) {
errno = ENAMETOOLONG;
return -1;
}
while (*search_path) {
const char *p = search_path;
const char *q = strchr(p, ':');
if (!q) {
q = p + strlen(p);
search_path = q;
} else {
search_path = q + 1;
}
int n = q - p;
char path[n + l + 2];
memcpy(path, p, n);
path[n] = '/';
strcpy(path+n+1, file);
my_execve(path, argv, envp);
if (errno == ENOEXEC) {
return do_shell(path, argv, envp);
} else if ((errno == EACCES || errno == ENOTDIR) && ret == ENOENT) {
ret = errno;
} else if (errno != ENOENT) {
break;
}
}
errno = ret;
return -1;
}
int execvpe(const char *file, char *const argv[], char *const envp[])
{
return do_execvpe(file, NULL, argv, envp);
}
int execvp(const char *file, char *const argv[])
{
return do_execvpe(file, NULL, argv, environ);
}
int execvP(const char *file, const char *search_path, char *const argv[])
{
return do_execvpe(file, search_path, argv, environ);
}
int execlp(const char *file, const char *arg, ...)
{
va_list args;
va_start(args, arg);
char **argv = strings_build(arg, &args);
va_end(args);
do_execvpe(file, NULL, argv, environ);
strings_release(argv);
return -1;
}
#if 0
/* Currently we cannot do anything about these functions. */
int posix_spawn(pid_t *restrict pid, const char *restrict path,
const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *restrict attrp,
char *const argv[restrict], char *const envp[restrict])
{
errno = ENOSYS;
return -1;
}
int posix_spawnp(pid_t *restrict pid, const char *restrict file,
const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *restrict attrp,
char *const argv[restrict], char *const envp[restrict])
{
errno = ENOSYS;
return -1;
}
#endif
/* The initialization method. */
static void on_load(void)
{
char *devino_var = getenv(ENV_INTERCEPT);
char *socket_var = getenv(ENV_SOCKET);
char *gdbserver_var = getenv(ENV_GDBSERVER);
char *evfd_var = getenv(ENV_EVENTFD);
pthread_mutex_lock(&mutex);
if (devino_var && intercept_parse_envvar(devino_var) != -1 &&
socket_var && socket_var[0] && socket_var[0] != '-') {
if (evfd_var) {
evfd = atoi(evfd_var);
} else {
intercept_init_eventfd();
}
if (evfd != -1) {
socket = socket_var;
gdbserver = gdbserver_var ? gdbserver_var : GDBSERVER_DEFAULT;
}
}
capture_env();
pthread_mutex_unlock(&mutex);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment