Skip to content

Instantly share code, notes, and snippets.

@rain-1
Created May 12, 2017 18:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save rain-1/be98648b0172817ec1c90d46942793bd to your computer and use it in GitHub Desktop.
Save rain-1/be98648b0172817ec1c90d46942793bd to your computer and use it in GitHub Desktop.
fishbowl.c
// This program runs another program in a "fishbowl" set to the
// current working directory. The idea is that the subprocess
// should only be able to edit files in that path or anything
// descended from it. It can read outside the fishbowl but if it
// attempts to create or edit files outside of it that syscall
// is blocked (by switching it to getpid which does nothing).
//
// A malicious program can bypass the fishbowl using threads to
// make a syscall and then swap the path after verification.
// This is not a security tool, it is just to protect against
// user error (e.g. if you run a buggy script that accidentally
// deletes or overwrites a file you wanted).
//
// So it is even less secure than a chroot. This could be fixed
// by implementing it in the kernel itself as a new syscall,
// like OpenBSD pledge.
//
// ~/bowl$ fishbowl `which sh`
// Fishbowl: blocking attempt to write to </dev/tty>.
// ~/bowl$ touch test
// ~/bowl$ touch ~/test
// Fishbowl: blocking attempt to write to </home/goldie/test>.
// touch: cannot touch ‘/home/goldie/test’: Bad file descriptor
// ~/bowl$ exit
// Fishbowl: blocking attempt to write to </home/goldie/.bash_history>.
//
// Related/Different tools:
// * users/groups
// * chroot
// * seccomp
// * linux container
// * ld preload
// * pledge
//
// * <http://dev.exherbo.org/~alip/sydbox/sydbox.html>
// * <https://gitweb.gentoo.org/proj/sandbox.git/tree/README>
// * <https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt>
// * <http://chdir.org/~nico/seccomp-nurse/>
// * <https://lwn.net/Articles/347547/>
// * <http://eigenstate.org/notes/seccomp>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/reg.h>
#include <sys/wait.h>
#include <sys/syscall.h>
// SOURCES:
// * man ptrace, execve, waitpid
// * https://blog.nelhage.com/2010/08/write-yourself-an-strace-in-70-lines-of-code/
// * http://alip.github.io/code/ptrace-linux-deny.c
// * http://theantway.com/2013/01/notes-for-playing-with-ptrace-on-64-bits-ubuntu-12-10/
// * https://stackoverflow.com/questions/4414605/how-can-linux-ptrace-be-unsafe-or-contain-a-race-condition (how you can get past it wyth race conditions)
// * http://stackoverflow.com/a/11092828 (reason for kill(getpid(), SIGSTOP); between ptrace and execve)
// * http://blog.rchapman.org/post/36801038863/linux-system-call-table-for-x86-64
// * https://github.com/MerlijnWajer/tracy/
// * https://github.com/Pardus-Linux/catbox/blob/4c5af965cb93a0eacb9d2991df2b5838e9fd0b54/src/core.c#L210
// TODO:
// * is argv, envp necessarily NULL terminated or do we need to do that?
// * other syscalls, writev, access, truncate, rename, mkdir, rmdir, creat, link, unlink?
// * is wait == child good enough or do we have to maintain a list of pids? zombies?
// * can it be made to work recursively?
// * handle signals. we currently eat SIGINT.
// * go over the path handling logic carefully.
// * investigate symlink situation. (what if you point outside the fishbowl?)
// NOTES:
// This is for 64-bit architecture.
// Threads can get past it
int cwd_len;
char cwd[PATH_MAX];
char proc[512];
char path[PATH_MAX];
char real[PATH_MAX];
void child(int argc, char **argv, char **envp);
void parent(pid_t);
void parent_handle_open_syscall(pid_t child);
int main(int argc, char **argv, char **envp) {
pid_t pid;
if(argc <= 1) {
puts("USAGE: ./fishbowl `pwd <program>` <arg> ...");
return EXIT_SUCCESS;
}
getcwd(cwd, PATH_MAX);
cwd_len = strlen(cwd);
pid = fork();
if(pid < 0) {
puts("Error: Could not fork.");
return EXIT_FAILURE;
}
if(pid == 0) {
child(argc-1, argv+1, envp);
}
else {
parent(pid);
}
return EXIT_SUCCESS;
}
void child(int argc, char** argv, char **envp) {
ptrace(PTRACE_TRACEME, 0, NULL, NULL);
kill(getpid(), SIGSTOP);
execve(argv[0], argv, envp);
puts("Error: execve failed in child.");
}
void parent(pid_t child) {
pid_t wait, grandchild;
int status, event;
long syscall;
wait = waitpid(child, &status, 0);
assert(WIFSTOPPED(status));
assert(WSTOPSIG(status) == SIGSTOP);
ptrace(PTRACE_SETOPTIONS, child, NULL, (void*)(PTRACE_O_TRACESYSGOOD|PTRACE_O_TRACEFORK|PTRACE_O_TRACEEXEC));
ptrace(PTRACE_SYSCALL, child, NULL, NULL);
do {
wait = waitpid(-1, &status, 0);
if(wait == -1) {
puts("Error: Waitpid failed killing subprocesses.");
kill(child, SIGKILL);
return;
}
if(WIFEXITED(status)) {
if(wait == child) {
return;
}
else {
continue;
}
}
assert(WIFSTOPPED(status));
if(WSTOPSIG(status) == SIGTRAP|0x80) {
syscall = ptrace(PTRACE_PEEKUSER, wait, 8*ORIG_RAX, NULL);
if(syscall == 2) { // TODO: put in the syscall name
parent_handle_open_syscall(wait);
}
ptrace(PTRACE_SYSCALL, wait, NULL, NULL);
}
else { // TODO: change to an else if
event = (status >> 16) & 0xffff;
assert (event == PTRACE_EVENT_FORK
|| event == PTRACE_EVENT_VFORK
|| event == PTRACE_EVENT_CLONE);
ptrace(PTRACE_GETEVENTMSG, wait, 0, &grandchild);
ptrace(PTRACE_SETOPTIONS, grandchild, NULL, (void*)(PTRACE_O_TRACESYSGOOD|PTRACE_O_TRACEFORK|PTRACE_O_TRACEEXEC));
ptrace(PTRACE_SYSCALL, grandchild, NULL, NULL);
ptrace(PTRACE_SYSCALL, wait, NULL, NULL);
}
} while(1);
}
// copied from nelhage
// TODO: used a global buffer instead of malloc
char *read_string(pid_t child, unsigned long addr) {
char *val = malloc(PATH_MAX);
int allocated = PATH_MAX;
int read = 0;
unsigned long tmp;
while (1) {
if (read + sizeof tmp > allocated) {
allocated *= 2;
val = realloc(val, allocated);
}
tmp = ptrace(PTRACE_PEEKDATA, child, addr + read);
if(errno != 0) {
val[read] = 0;
break;
}
memcpy(val + read, &tmp, sizeof tmp);
if (memchr(&tmp, 0, sizeof tmp) != NULL)
break;
read += sizeof tmp;
}
return val;
}
void parent_handle_open_syscall(pid_t child) {
long filename_ptr, flags/*, mode*/;
int len;
char *filename;
filename_ptr = ptrace(PTRACE_PEEKUSER, child, 8*RDI, NULL);
flags = ptrace(PTRACE_PEEKUSER, child, 8*RSI, NULL);
//mode = ptrace(PTRACE_PEEKUSER, child, 8*RDX, NULL);
if(flags & O_WRONLY || flags & O_RDWR) {
filename = read_string(child, filename_ptr);
if(filename[0] == '/') {
realpath(filename, real);
}
else {
sprintf(proc, "/proc/%d/cwd", child);
len = readlink(proc, path, PATH_MAX);
path[len] = '/';
len++;
path[len] = 0;
strncpy(path+len, filename, PATH_MAX-len);
realpath(path, real);
}
if(strlen(real) < cwd_len || memcmp(cwd, real, cwd_len)) {
fprintf(stderr, "Fishbowl: blocking attempt to write to <%s>.\n", filename);
ptrace(PTRACE_POKEUSER, child, 8*ORIG_RAX, (void*)39); // TODO: getpid
}
free(filename);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment