Created
October 15, 2021 02:45
-
-
Save tiqwab/de9605f496b8915a3551ba27bc0a31a9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#include <sched.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <unistd.h> | |
#include <sys/wait.h> | |
#include <sys/syscall.h> | |
#include <sys/mount.h> | |
#include <sys/stat.h> | |
#include <limits.h> | |
#include <sys/mman.h> | |
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \ | |
} while (0) | |
static int | |
pivot_root(const char *new_root, const char *put_old) | |
{ | |
return syscall(SYS_pivot_root, new_root, put_old); | |
} | |
#define STACK_SIZE (1024 * 1024) | |
static int /* Startup function for cloned child */ | |
child(void *arg) | |
{ | |
char **args = arg; | |
char *new_root = args[0]; | |
const char *put_old = "/oldrootfs"; | |
char path[PATH_MAX]; | |
/* Ensure that 'new_root' and its parent mount don't have | |
shared propagation (which would cause pivot_root() to | |
return an error), and prevent propagation of mount | |
events to the initial mount namespace. */ | |
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == -1) | |
errExit("mount-MS_PRIVATE"); | |
/* Ensure that 'new_root' is a mount point. */ | |
if (mount(new_root, new_root, NULL, MS_BIND, NULL) == -1) | |
errExit("mount-MS_BIND"); | |
/* Mount proc */ | |
snprintf(path, sizeof(path), "%s/proc", new_root); | |
if (mount(path, path, "proc", 0, NULL) == -1) { | |
errExit("mount-proc"); | |
} | |
/* Create directory to which old root will be pivoted. */ | |
snprintf(path, sizeof(path), "%s/%s", new_root, put_old); | |
if (mkdir(path, 0777) == -1) | |
errExit("mkdir"); | |
/* And pivot the root filesystem. */ | |
if (pivot_root(new_root, path) == -1) | |
errExit("pivot_root"); | |
/* Switch the current working directory to "/". */ | |
if (chdir("/") == -1) | |
errExit("chdir"); | |
/* Unmount old root and remove mount point. */ | |
if (umount2(put_old, MNT_DETACH) == -1) | |
perror("umount2"); | |
if (rmdir(put_old) == -1) | |
perror("rmdir"); | |
/* Execute the command specified in argv[1]... */ | |
execv(args[1], &args[1]); | |
errExit("execv"); | |
} | |
int | |
main(int argc, char *argv[]) | |
{ | |
if (strncmp(argv[1], "test", 4) == 0) { | |
printf("test\n"); | |
exit(EXIT_SUCCESS); | |
} else { | |
/* Create a child process in a new mount namespace. */ | |
char *stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, | |
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); | |
if (stack == MAP_FAILED) | |
errExit("mmap"); | |
if (clone(child, stack + STACK_SIZE, | |
CLONE_NEWNS | SIGCHLD, &argv[1]) == -1) | |
errExit("clone"); | |
/* Parent falls through to here; wait for child. */ | |
if (wait(NULL) == -1) | |
errExit("wait"); | |
exit(EXIT_SUCCESS); | |
} | |
} |
Author
tiqwab
commented
Oct 24, 2021
•
例えば /bin/ls -l /
だと /
を open する様子が確認できる。
$ sudo strace -p 92011
strace: Process 92011 attached
...
openat(AT_FDCWD, "/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3
...
これが /proc/self/exe test
だと対応する openat システムコールは呼ばれない。
$ sudo strace -p 92074
strace: Process 92074 attached
restart_syscall(<... resuming interrupted read ...>) = 0
execve("/proc/self/exe", ["/proc/self/exe", "test"], 0x7ffc85c5cea0 /* 15 vars */) = 0
brk(NULL) = 0x5593154e0000
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=7274, ...}) = 0
mmap(NULL, 7274, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f0facf39000
close(3) = 0
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\260\34\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2030544, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f0facf37000
mmap(NULL, 4131552, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f0fac923000
mprotect(0x7f0facb0a000, 2097152, PROT_NONE) = 0
mmap(0x7f0facd0a000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7f0facd0a000
mmap(0x7f0facd10000, 15072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f0facd10000
close(3) = 0
arch_prctl(ARCH_SET_FS, 0x7f0facf384c0) = 0
mprotect(0x7f0facd0a000, 16384, PROT_READ) = 0
mprotect(0x5593148ba000, 4096, PROT_READ) = 0
mprotect(0x7f0facf3b000, 4096, PROT_READ) = 0
munmap(0x7f0facf39000, 7274) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0xd), ...}) = 0
brk(NULL) = 0x5593154e0000
brk(0x559315501000) = 0x559315501000
write(1, "test output\n", 12) = 12
exit_group(0) = ?
+++ exited with 0 +++
既にそのプロセスで open しているファイルを実行する場合、新たに open する必要はない。
/proc/self/exe
の場合 ls -l /proc/self/fd
等には出てこないがそのプロセスでは実行している以上ファイルを利用しているので open せずとも実行できる。open しないのでパスを辿る必要もなく rootfs 下のファイルでなくても実行できてしまう... ということか?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment