Skip to content

Instantly share code, notes, and snippets.

@mej
Last active May 21, 2016 01:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mej/87d41b2480ac33ab6747a9cb688af407 to your computer and use it in GitHub Desktop.
Save mej/87d41b2480ac33ab6747a9cb688af407 to your computer and use it in GitHub Desktop.
A small program to wrap namespace creation
#define _GNU_SOURCE
#define _BSD_SOURCE
#include <errno.h>
#include <sched.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#define PROGRAM "nsrun"
#define HOSTNAME_PREFIX PROGRAM
#define CHILD_STACK_SIZE (1 << 20)
#ifndef USE_GLIBC_WRAPPER
# define USE_GLIBC_WRAPPER 0
#endif
#if defined(MAXPATHLEN)
# define BUFSIZE_NSID (MAXPATHLEN + 1)
#elif defined(PATH_MAX)
# define BUFSIZE_NSID (PATH_MAX + 1)
#else
# define BUFSIZE_NSID (1024 + 1)
#endif
#if defined(MAXHOSTNAMELEN)
# define BUFSIZE_HOSTNAME (MAXHOSTNAMELEN + 1)
#elif defined(HOST_NAME_MAX)
# define BUFSIZE_HOSTNAME (HOST_NAME_MAX + 1)
#else
# define BUFSIZE_HOSTNAME (255 + 1)
#endif
#if (_UTSNAME_LENGTH * 6 + BUFSIZE_HOSTNAME + BUFSIZE_NSID + 8) > CHILD_STACK_SIZE
# error "Insufficient stack size for child!"
#endif
#define die(msg, ...) do { \
fprintf(stderr, PROGRAM ": FATAL: " msg " -- %s\n" , ##__VA_ARGS__, strerror(errno)); \
exit(EXIT_FAILURE); \
} while (0)
#define err(msg, ...) fprintf(stderr, PROGRAM ": ERROR: " msg " -- %s\n" , ##__VA_ARGS__, strerror(errno))
#define warn(msg, ...) fprintf(stderr, PROGRAM ": WARNING: " msg " -- %s\n" , ##__VA_ARGS__, strerror(errno))
char *def_argv[] = { "/bin/sh", NULL };
char child_stack[CHILD_STACK_SIZE];
char orig_nsid[BUFSIZE_NSID];
ssize_t orig_nsnum;
int ns_flags = (CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWUTS);
ssize_t
get_nsid(char *buf, size_t bufsize)
{
struct stat st;
if ((buf != NULL) && (bufsize > 0)) {
if (readlink("/proc/self/ns/uts", buf, bufsize - 1) < 0) {
die("Unable to obtain UTS namespace identifier");
}
}
if (stat("/proc/self/ns/uts", &st) < 0) {
die("Unable to stat(/proc/self/ns/uts) to obtain UTS namespace identifier");
}
return ((ssize_t) st.st_ino);
}
int
child_fn(void *data)
{
struct utsname uts;
char new_name[BUFSIZE_HOSTNAME];
ssize_t nsnum;
if (uname(&uts) < 0) {
die("Unable to obtain UTS info");
}
nsnum = get_nsid(NULL, 0);
if (nsnum == orig_nsnum) {
die("Unable to change UTS namespace (NSID %ld/%08x matches parent)", nsnum, nsnum);
}
snprintf(new_name, BUFSIZE_HOSTNAME, "%s_%08x.%s", HOSTNAME_PREFIX, nsnum, uts.nodename);
if (sethostname(new_name, strlen(new_name)) < 0) {
die("Unable to set new hostname");
}
if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
warn("Unable to enslave the root filesystem mount in this namespace");
}
if (mount("proc", "/proc", "proc", MS_RDONLY | MS_RELATIME | MS_NOEXEC | MS_NODEV, "") < 0) {
warn("Unable to mount procfs onto /proc");
}
if (mount(NULL, "/proc", NULL, MS_PRIVATE | MS_REC, NULL) < 0) {
warn("Unable to privatize the /proc filesystem mount in this namespace");
}
execvp(*((char **) data), data);
die("Unable to exec() subprogram %s", *((char **) data));
}
int
main(int argc, char *argv[])
{
char *stack_top;
pid_t cpid, wpid;
int clone_flags = SIGCHLD | CLONE_VFORK;
int wstatus;
/* Set default for subprocess argument list (/bin/sh) */
if (argc == 1) {
char *p;
argv = def_argv;
if (((p = secure_getenv("SHELL")) != NULL) && (*p == '/')) {
argv[0] = p;
}
} else {
argv++;
}
/* Get original namespace ID. */
orig_nsnum = get_nsid(orig_nsid, BUFSIZE_NSID);
if (geteuid() != 0) {
if (unshare(CLONE_NEWUSER) < 0) {
warn("Unable to unshare user namespace");
} else {
if (seteuid(0) != 0) {
warn("Unable to seteuid(0)");
}
}
}
if (geteuid() == 0) {
clone_flags |= ns_flags;
}
#if USE_GLIBC_WRAPPER
if ((cpid = clone(child_fn, child_stack + CHILD_STACK_SIZE, clone_flags, argv)) == -1) {
die("Unable to clone() child process %s", argv[0]);
}
#else
if (clone_flags & CLONE_VM) {
/* clone() requires a stack if we want to share address space. */
stack_top = child_stack + CHILD_STACK_SIZE;
} else {
/* We're not sharing address space, so copy-on-write instead. */
stack_top = NULL;
}
if ((cpid = syscall(SYS_clone, clone_flags, stack_top, NULL, NULL, NULL)) == -1) {
die("Unable to clone() child process %s", argv[0]);
} else if (cpid == 0) {
return child_fn(argv);
}
#endif
if (!(clone_flags & CLONE_PARENT)) {
while ((wpid = wait(&wstatus)) > 0) {
if (wpid == cpid) {
/* Our immediate child exited. We're done. */
if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
fprintf(stderr, PROGRAM ": Child process \"%s\" returned %d\n", argv[0], WEXITSTATUS(wstatus));
exit(WEXITSTATUS(wstatus));
} else if (WIFSIGNALED(wstatus)) {
fprintf(stderr, PROGRAM ": Child process \"%s\" terminated by signal %d\n", argv[0], WTERMSIG(wstatus));
exit(WEXITSTATUS(wstatus) | (WTERMSIG(wstatus) << 8));
}
break;
}
}
}
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment