Created
February 23, 2024 17:21
-
-
Save oskarirauta/ca0e47ca9c89f920ad281795522e4cdb to your computer and use it in GitHub Desktop.
uxc infra container support
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2013 Felix Fietkau <nbd@openwrt.org> | |
* Copyright (C) 2013 John Crispin <blogic@openwrt.org> | |
* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU Lesser General Public License version 2.1 | |
* as published by the Free Software Foundation | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
*/ | |
#define _GNU_SOURCE | |
#include <sys/resource.h> | |
#include <sys/types.h> | |
#include <sys/socket.h> | |
#include <sys/stat.h> | |
#include <grp.h> | |
#include <net/if.h> | |
#include <unistd.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <fcntl.h> | |
#include <pwd.h> | |
#include <libgen.h> | |
#include <unistd.h> | |
#define SYSLOG_NAMES | |
#include <syslog.h> | |
#include <libubox/md5.h> | |
#include "../procd.h" | |
#include "service.h" | |
#include "instance.h" | |
#define UJAIL_BIN_PATH "/sbin/ujail" | |
#define CGROUP_BASEDIR "/sys/fs/cgroup/services" | |
enum { | |
INSTANCE_ATTR_COMMAND, | |
INSTANCE_ATTR_ENV, | |
INSTANCE_ATTR_DATA, | |
INSTANCE_ATTR_NETDEV, | |
INSTANCE_ATTR_FILE, | |
INSTANCE_ATTR_TRIGGER, | |
INSTANCE_ATTR_RESPAWN, | |
INSTANCE_ATTR_NICE, | |
INSTANCE_ATTR_LIMITS, | |
INSTANCE_ATTR_WATCH, | |
INSTANCE_ATTR_ERROR, | |
INSTANCE_ATTR_USER, | |
INSTANCE_ATTR_GROUP, | |
INSTANCE_ATTR_STDOUT, | |
INSTANCE_ATTR_STDERR, | |
INSTANCE_ATTR_NO_NEW_PRIVS, | |
INSTANCE_ATTR_JAIL, | |
INSTANCE_ATTR_TRACE, | |
INSTANCE_ATTR_SECCOMP, | |
INSTANCE_ATTR_CAPABILITIES, | |
INSTANCE_ATTR_PIDFILE, | |
INSTANCE_ATTR_RELOADSIG, | |
INSTANCE_ATTR_TERMTIMEOUT, | |
INSTANCE_ATTR_FACILITY, | |
INSTANCE_ATTR_EXTROOT, | |
INSTANCE_ATTR_OVERLAYDIR, | |
INSTANCE_ATTR_TMPOVERLAYSIZE, | |
INSTANCE_ATTR_BUNDLE, | |
INSTANCE_ATTR_INFRA, | |
INSTANCE_ATTR_WATCHDOG, | |
__INSTANCE_ATTR_MAX | |
}; | |
static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = { | |
[INSTANCE_ATTR_COMMAND] = { "command", BLOBMSG_TYPE_ARRAY }, | |
[INSTANCE_ATTR_ENV] = { "env", BLOBMSG_TYPE_TABLE }, | |
[INSTANCE_ATTR_DATA] = { "data", BLOBMSG_TYPE_TABLE }, | |
[INSTANCE_ATTR_NETDEV] = { "netdev", BLOBMSG_TYPE_ARRAY }, | |
[INSTANCE_ATTR_FILE] = { "file", BLOBMSG_TYPE_ARRAY }, | |
[INSTANCE_ATTR_TRIGGER] = { "triggers", BLOBMSG_TYPE_ARRAY }, | |
[INSTANCE_ATTR_RESPAWN] = { "respawn", BLOBMSG_TYPE_ARRAY }, | |
[INSTANCE_ATTR_NICE] = { "nice", BLOBMSG_TYPE_INT32 }, | |
[INSTANCE_ATTR_LIMITS] = { "limits", BLOBMSG_TYPE_TABLE }, | |
[INSTANCE_ATTR_WATCH] = { "watch", BLOBMSG_TYPE_ARRAY }, | |
[INSTANCE_ATTR_ERROR] = { "error", BLOBMSG_TYPE_ARRAY }, | |
[INSTANCE_ATTR_USER] = { "user", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_GROUP] = { "group", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_STDOUT] = { "stdout", BLOBMSG_TYPE_BOOL }, | |
[INSTANCE_ATTR_STDERR] = { "stderr", BLOBMSG_TYPE_BOOL }, | |
[INSTANCE_ATTR_NO_NEW_PRIVS] = { "no_new_privs", BLOBMSG_TYPE_BOOL }, | |
[INSTANCE_ATTR_JAIL] = { "jail", BLOBMSG_TYPE_TABLE }, | |
[INSTANCE_ATTR_TRACE] = { "trace", BLOBMSG_TYPE_BOOL }, | |
[INSTANCE_ATTR_SECCOMP] = { "seccomp", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_CAPABILITIES] = { "capabilities", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_RELOADSIG] = { "reload_signal", BLOBMSG_TYPE_INT32 }, | |
[INSTANCE_ATTR_TERMTIMEOUT] = { "term_timeout", BLOBMSG_TYPE_INT32 }, | |
[INSTANCE_ATTR_FACILITY] = { "facility", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_EXTROOT] = { "extroot", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_OVERLAYDIR] = { "overlaydir", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_TMPOVERLAYSIZE] = { "tmpoverlaysize", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_BUNDLE] = { "bundle", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_INFRA] = { "infra", BLOBMSG_TYPE_STRING }, | |
[INSTANCE_ATTR_WATCHDOG] = { "watchdog", BLOBMSG_TYPE_ARRAY }, | |
}; | |
enum { | |
JAIL_ATTR_NAME, | |
JAIL_ATTR_HOSTNAME, | |
JAIL_ATTR_PROCFS, | |
JAIL_ATTR_SYSFS, | |
JAIL_ATTR_UBUS, | |
JAIL_ATTR_LOG, | |
JAIL_ATTR_RONLY, | |
JAIL_ATTR_MOUNT, | |
JAIL_ATTR_NETNS, | |
JAIL_ATTR_USERNS, | |
JAIL_ATTR_CGROUPSNS, | |
JAIL_ATTR_CONSOLE, | |
JAIL_ATTR_REQUIREJAIL, | |
JAIL_ATTR_IMMEDIATELY, | |
JAIL_ATTR_PIDFILE, | |
JAIL_ATTR_SETNS, | |
__JAIL_ATTR_MAX, | |
}; | |
static const struct blobmsg_policy jail_attr[__JAIL_ATTR_MAX] = { | |
[JAIL_ATTR_NAME] = { "name", BLOBMSG_TYPE_STRING }, | |
[JAIL_ATTR_HOSTNAME] = { "hostname", BLOBMSG_TYPE_STRING }, | |
[JAIL_ATTR_PROCFS] = { "procfs", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_SYSFS] = { "sysfs", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_UBUS] = { "ubus", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_LOG] = { "log", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_RONLY] = { "ronly", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_MOUNT] = { "mount", BLOBMSG_TYPE_TABLE }, | |
[JAIL_ATTR_NETNS] = { "netns", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_USERNS] = { "userns", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_CGROUPSNS] = { "cgroupsns", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_CONSOLE] = { "console", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_REQUIREJAIL] = { "requirejail", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_IMMEDIATELY] = { "immediately", BLOBMSG_TYPE_BOOL }, | |
[JAIL_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING }, | |
[JAIL_ATTR_SETNS] = { "setns", BLOBMSG_TYPE_ARRAY }, | |
}; | |
enum { | |
JAIL_SETNS_ATTR_PID, | |
JAIL_SETNS_ATTR_NS, | |
__JAIL_SETNS_ATTR_MAX, | |
}; | |
static const struct blobmsg_policy jail_setns_attr[__JAIL_SETNS_ATTR_MAX] = { | |
[JAIL_SETNS_ATTR_PID] = { "pid", BLOBMSG_TYPE_INT32 }, | |
[JAIL_SETNS_ATTR_NS] = { "namespaces", BLOBMSG_TYPE_ARRAY }, | |
}; | |
struct instance_netdev { | |
struct blobmsg_list_node node; | |
int ifindex; | |
}; | |
struct instance_file { | |
struct blobmsg_list_node node; | |
uint32_t md5[4]; | |
}; | |
struct rlimit_name { | |
const char *name; | |
int resource; | |
}; | |
static const struct rlimit_name rlimit_names[] = { | |
{ "as", RLIMIT_AS }, | |
{ "core", RLIMIT_CORE }, | |
{ "cpu", RLIMIT_CPU }, | |
{ "data", RLIMIT_DATA }, | |
{ "fsize", RLIMIT_FSIZE }, | |
{ "memlock", RLIMIT_MEMLOCK }, | |
{ "nofile", RLIMIT_NOFILE }, | |
{ "nproc", RLIMIT_NPROC }, | |
{ "rss", RLIMIT_RSS }, | |
{ "stack", RLIMIT_STACK }, | |
#ifdef linux | |
{ "nice", RLIMIT_NICE }, | |
{ "rtprio", RLIMIT_RTPRIO }, | |
{ "msgqueue", RLIMIT_MSGQUEUE }, | |
{ "sigpending", RLIMIT_SIGPENDING }, | |
#endif | |
{ NULL, 0 } | |
}; | |
static void closefd(int fd) | |
{ | |
if (fd > STDERR_FILENO) | |
close(fd); | |
} | |
/* convert a string into numeric syslog facility or return -1 if no match found */ | |
static int | |
syslog_facility_str_to_int(const char *facility) | |
{ | |
CODE *p = facilitynames; | |
while (p->c_name && strcasecmp(p->c_name, facility)) | |
p++; | |
return p->c_val; | |
} | |
static void | |
instance_limits(const char *limit, const char *value) | |
{ | |
int i; | |
struct rlimit rlim; | |
unsigned long cur, max; | |
for (i = 0; rlimit_names[i].name != NULL; i++) { | |
if (strcmp(rlimit_names[i].name, limit)) | |
continue; | |
if (!strcmp(value, "unlimited")) { | |
rlim.rlim_cur = RLIM_INFINITY; | |
rlim.rlim_max = RLIM_INFINITY; | |
} else { | |
if (getrlimit(rlimit_names[i].resource, &rlim)) | |
return; | |
cur = rlim.rlim_cur; | |
max = rlim.rlim_max; | |
if (sscanf(value, "%lu %lu", &cur, &max) < 1) | |
return; | |
rlim.rlim_cur = cur; | |
rlim.rlim_max = max; | |
} | |
setrlimit(rlimit_names[i].resource, &rlim); | |
return; | |
} | |
} | |
static char * | |
instance_gen_setns_argstr(struct blob_attr *attr) | |
{ | |
struct blob_attr *tb[__JAIL_SETNS_ATTR_MAX]; | |
struct blob_attr *cur; | |
int rem, len, total; | |
char *ret; | |
blobmsg_parse(jail_setns_attr, __JAIL_SETNS_ATTR_MAX, tb, | |
blobmsg_data(attr), blobmsg_data_len(attr)); | |
if (!tb[JAIL_SETNS_ATTR_PID] || !tb[JAIL_SETNS_ATTR_NS]) | |
return NULL; | |
len = snprintf(NULL, 0, "%d:", blobmsg_get_u32(tb[JAIL_SETNS_ATTR_PID])); | |
blobmsg_for_each_attr(cur, tb[JAIL_SETNS_ATTR_NS], rem) { | |
char *tmp; | |
if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING) | |
return NULL; | |
tmp = blobmsg_get_string(cur); | |
if (!tmp) | |
return NULL; | |
len += strlen(tmp) + 1; | |
} | |
total = len; | |
ret = malloc(total); | |
if (!ret) | |
return NULL; | |
len = snprintf(ret, total, "%d:", blobmsg_get_u32(tb[JAIL_SETNS_ATTR_PID])); | |
blobmsg_for_each_attr(cur, tb[JAIL_SETNS_ATTR_NS], rem) { | |
strncpy(&ret[len], blobmsg_get_string(cur), total - len); | |
len += strlen(blobmsg_get_string(cur)); | |
ret[len++] = ','; | |
} | |
ret[total - 1] = '\0'; | |
return ret; | |
} | |
static inline int | |
jail_run(struct service_instance *in, char **argv) | |
{ | |
char *term_timeout_str; | |
struct blobmsg_list_node *var; | |
struct jail *jail = &in->jail; | |
int argc = 0; | |
argv[argc++] = UJAIL_BIN_PATH; | |
if (asprintf(&term_timeout_str, "%d", in->term_timeout) == -1) | |
exit(ENOMEM); | |
argv[argc++] = "-t"; | |
argv[argc++] = term_timeout_str; | |
if (jail->name) { | |
argv[argc++] = "-n"; | |
argv[argc++] = jail->name; | |
} | |
if (jail->hostname) { | |
argv[argc++] = "-h"; | |
argv[argc++] = jail->hostname; | |
} | |
if (in->seccomp) { | |
argv[argc++] = "-S"; | |
argv[argc++] = in->seccomp; | |
} | |
if (in->user) { | |
argv[argc++] = "-U"; | |
argv[argc++] = in->user; | |
} | |
if (in->group) { | |
argv[argc++] = "-G"; | |
argv[argc++] = in->group; | |
} | |
if (in->capabilities) { | |
argv[argc++] = "-C"; | |
argv[argc++] = in->capabilities; | |
} | |
if (in->no_new_privs) | |
argv[argc++] = "-c"; | |
if (jail->procfs) | |
argv[argc++] = "-p"; | |
if (jail->sysfs) | |
argv[argc++] = "-s"; | |
if (jail->ubus) | |
argv[argc++] = "-u"; | |
if (jail->log) | |
argv[argc++] = "-l"; | |
if (jail->ronly) | |
argv[argc++] = "-o"; | |
if (jail->netns) | |
argv[argc++] = "-N"; | |
if (jail->userns) | |
argv[argc++] = "-f"; | |
if (jail->cgroupsns) | |
argv[argc++] = "-F"; | |
if (jail->console) | |
argv[argc++] = "-y"; | |
if (in->extroot) { | |
argv[argc++] = "-R"; | |
argv[argc++] = in->extroot; | |
} | |
if (in->overlaydir) { | |
argv[argc++] = "-O"; | |
argv[argc++] = in->overlaydir; | |
} | |
if (in->tmpoverlaysize) { | |
argv[argc++] = "-T"; | |
argv[argc++] = in->tmpoverlaysize; | |
} | |
if (in->immediately) | |
argv[argc++] = "-i"; | |
if (jail->pidfile) { | |
argv[argc++] = "-P"; | |
argv[argc++] = jail->pidfile; | |
} | |
if (in->bundle) { | |
argv[argc++] = "-J"; | |
argv[argc++] = in->bundle; | |
} | |
if (in->infra) { | |
argv[argc++] = "-I"; | |
argv[argc++] = in->infra; | |
} | |
if (in->require_jail) | |
argv[argc++] = "-E"; | |
blobmsg_list_for_each(&in->env, var) { | |
argv[argc++] = "-e"; | |
argv[argc++] = (char *) blobmsg_name(var->data); | |
} | |
blobmsg_list_for_each(&jail->mount, var) { | |
const char *type = blobmsg_data(var->data); | |
if (*type == '1') | |
argv[argc++] = "-w"; | |
else | |
argv[argc++] = "-r"; | |
argv[argc++] = (char *) blobmsg_name(var->data); | |
} | |
blobmsg_list_for_each(&jail->setns, var) { | |
char *setns_arg = instance_gen_setns_argstr(var->data); | |
if (setns_arg) { | |
argv[argc++] = "-j"; | |
argv[argc++] = setns_arg; | |
} | |
} | |
argv[argc++] = "--"; | |
return argc; | |
} | |
static int | |
instance_removepid(struct service_instance *in) { | |
if (!in->pidfile) | |
return 0; | |
if (unlink(in->pidfile)) { | |
ERROR("Failed to remove pidfile: %s: %m\n", in->pidfile); | |
return 1; | |
} | |
return 0; | |
} | |
static int | |
instance_writepid(struct service_instance *in) | |
{ | |
FILE *_pidfile; | |
if (!in->pidfile) { | |
return 0; | |
} | |
_pidfile = fopen(in->pidfile, "w"); | |
if (_pidfile == NULL) { | |
ERROR("failed to open pidfile for writing: %s: %m", in->pidfile); | |
return 1; | |
} | |
if (fprintf(_pidfile, "%d\n", in->proc.pid) < 0) { | |
ERROR("failed to write pidfile: %s: %m", in->pidfile); | |
fclose(_pidfile); | |
return 2; | |
} | |
if (fclose(_pidfile)) { | |
ERROR("failed to close pidfile: %s: %m", in->pidfile); | |
return 3; | |
} | |
return 0; | |
} | |
static void | |
instance_run(struct service_instance *in, int _stdout, int _stderr) | |
{ | |
struct blobmsg_list_node *var; | |
struct blob_attr *cur; | |
char **argv; | |
int argc = 1; /* NULL terminated */ | |
int rem, _stdin; | |
bool seccomp = !in->trace && !in->has_jail && in->seccomp; | |
bool setlbf = _stdout >= 0; | |
if (in->nice) | |
setpriority(PRIO_PROCESS, 0, in->nice); | |
blobmsg_for_each_attr(cur, in->command, rem) | |
argc++; | |
blobmsg_list_for_each(&in->env, var) | |
setenv(blobmsg_name(var->data), blobmsg_data(var->data), 1); | |
if (seccomp) | |
setenv("SECCOMP_FILE", in->seccomp, 1); | |
if (setlbf) | |
setenv("LD_PRELOAD", "/lib/libsetlbf.so", 1); | |
blobmsg_list_for_each(&in->limits, var) | |
instance_limits(blobmsg_name(var->data), blobmsg_data(var->data)); | |
if (in->trace || seccomp) | |
argc += 1; | |
argv = alloca(sizeof(char *) * (argc + in->jail.argc)); | |
argc = 0; | |
#ifdef SECCOMP_SUPPORT | |
if (in->trace) | |
argv[argc++] = "/sbin/utrace"; | |
else if (seccomp) | |
argv[argc++] = "/sbin/seccomp-trace"; | |
#else | |
if (in->trace || seccomp) | |
ULOG_WARN("Seccomp support for %s::%s not available\n", in->srv->name, in->name); | |
#endif | |
if (in->has_jail) { | |
argc = jail_run(in, argv); | |
if (argc != in->jail.argc) | |
ULOG_WARN("expected %i jail params, used %i for %s::%s\n", | |
in->jail.argc, argc, in->srv->name, in->name); | |
} | |
blobmsg_for_each_attr(cur, in->command, rem) | |
argv[argc++] = blobmsg_data(cur); | |
argv[argc] = NULL; | |
_stdin = open("/dev/null", O_RDONLY); | |
if (_stdout == -1) | |
_stdout = open("/dev/null", O_WRONLY); | |
if (_stderr == -1) | |
_stderr = open("/dev/null", O_WRONLY); | |
if (_stdin > -1) { | |
dup2(_stdin, STDIN_FILENO); | |
closefd(_stdin); | |
} | |
if (_stdout > -1) { | |
dup2(_stdout, STDOUT_FILENO); | |
closefd(_stdout); | |
} | |
if (_stderr > -1) { | |
dup2(_stderr, STDERR_FILENO); | |
closefd(_stderr); | |
} | |
if (!in->has_jail && in->user && in->pw_gid && initgroups(in->user, in->pw_gid)) { | |
ERROR("failed to initgroups() for user %s: %m\n", in->user); | |
exit(127); | |
} | |
if (!in->has_jail && in->gr_gid && setgid(in->gr_gid)) { | |
ERROR("failed to set group id %d: %m\n", in->gr_gid); | |
exit(127); | |
} | |
if (!in->has_jail && in->uid && setuid(in->uid)) { | |
ERROR("failed to set user id %d: %m\n", in->uid); | |
exit(127); | |
} | |
execvp(argv[0], argv); | |
exit(127); | |
} | |
static void | |
instance_add_cgroup(const char *service, const char *instance) | |
{ | |
struct stat sb; | |
char cgnamebuf[256]; | |
int fd; | |
if (stat("/sys/fs/cgroup/cgroup.subtree_control", &sb)) | |
return; | |
mkdir(CGROUP_BASEDIR, 0700); | |
snprintf(cgnamebuf, sizeof(cgnamebuf), "%s/%s", CGROUP_BASEDIR, service); | |
mkdir(cgnamebuf, 0700); | |
snprintf(cgnamebuf, sizeof(cgnamebuf), "%s/%s/%s", CGROUP_BASEDIR, service, instance); | |
mkdir(cgnamebuf, 0700); | |
strcat(cgnamebuf, "/cgroup.procs"); | |
fd = open(cgnamebuf, O_WRONLY); | |
if (fd == -1) | |
return; | |
dprintf(fd, "%d", getpid()); | |
close(fd); | |
} | |
static void | |
instance_free_stdio(struct service_instance *in) | |
{ | |
if (in->_stdout.fd.fd > -1) { | |
ustream_free(&in->_stdout.stream); | |
close(in->_stdout.fd.fd); | |
in->_stdout.fd.fd = -1; | |
} | |
if (in->_stderr.fd.fd > -1) { | |
ustream_free(&in->_stderr.stream); | |
close(in->_stderr.fd.fd); | |
in->_stderr.fd.fd = -1; | |
} | |
if (in->console.fd.fd > -1) { | |
ustream_free(&in->console.stream); | |
close(in->console.fd.fd); | |
in->console.fd.fd = -1; | |
} | |
if (in->console_client.fd.fd > -1) { | |
ustream_free(&in->console_client.stream); | |
close(in->console_client.fd.fd); | |
in->console_client.fd.fd = -1; | |
} | |
} | |
void | |
instance_start(struct service_instance *in) | |
{ | |
int pid; | |
int opipe[2] = { -1, -1 }; | |
int epipe[2] = { -1, -1 }; | |
if (!avl_is_empty(&in->errors.avl)) { | |
LOG("Not starting instance %s::%s, an error was indicated\n", in->srv->name, in->name); | |
return; | |
} | |
if (!in->bundle && !in->command) { | |
LOG("Not starting instance %s::%s, command not set\n", in->srv->name, in->name); | |
return; | |
} | |
if (in->proc.pending) { | |
if (in->halt) | |
in->restart = true; | |
return; | |
} | |
instance_free_stdio(in); | |
if (in->_stdout.fd.fd > -2) { | |
if (pipe(opipe)) { | |
ULOG_WARN("pipe() failed: %m\n"); | |
opipe[0] = opipe[1] = -1; | |
} | |
} | |
if (in->_stderr.fd.fd > -2) { | |
if (pipe(epipe)) { | |
ULOG_WARN("pipe() failed: %m\n"); | |
epipe[0] = epipe[1] = -1; | |
} | |
} | |
in->restart = false; | |
in->halt = false; | |
if (!in->valid) | |
return; | |
pid = fork(); | |
if (pid < 0) | |
return; | |
if (!pid) { | |
uloop_done(); | |
closefd(opipe[0]); | |
closefd(epipe[0]); | |
instance_add_cgroup(in->srv->name, in->name); | |
instance_run(in, opipe[1], epipe[1]); | |
return; | |
} | |
P_DEBUG(2, "Started instance %s::%s[%d]\n", in->srv->name, in->name, pid); | |
in->proc.pid = pid; | |
instance_writepid(in); | |
clock_gettime(CLOCK_MONOTONIC, &in->start); | |
uloop_process_add(&in->proc); | |
if (opipe[0] > -1) { | |
ustream_fd_init(&in->_stdout, opipe[0]); | |
closefd(opipe[1]); | |
fcntl(opipe[0], F_SETFD, FD_CLOEXEC); | |
} | |
if (epipe[0] > -1) { | |
ustream_fd_init(&in->_stderr, epipe[0]); | |
closefd(epipe[1]); | |
fcntl(epipe[0], F_SETFD, FD_CLOEXEC); | |
} | |
if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) { | |
uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000); | |
P_DEBUG(2, "Started instance %s::%s watchdog timer : timeout = %d\n", in->srv->name, in->name, in->watchdog.freq); | |
} | |
service_event("instance.start", in->srv->name, in->name); | |
} | |
static void | |
instance_stdio(struct ustream *s, int prio, struct service_instance *in) | |
{ | |
char *newline, *str, *arg0, ident[32]; | |
int len; | |
arg0 = basename(blobmsg_data(blobmsg_data(in->command))); | |
snprintf(ident, sizeof(ident), "%s[%d]", arg0, in->proc.pid); | |
ulog_open(ULOG_SYSLOG, in->syslog_facility, ident); | |
do { | |
str = ustream_get_read_buf(s, &len); | |
if (!str) | |
break; | |
newline = memchr(str, '\n', len); | |
if (!newline && (s->r.buffer_len != len)) | |
break; | |
if (newline) { | |
*newline = 0; | |
len = newline + 1 - str; | |
} | |
ulog(prio, "%s\n", str); | |
ustream_consume(s, len); | |
} while (1); | |
ulog_open(ULOG_SYSLOG, LOG_DAEMON, "procd"); | |
} | |
static void | |
instance_stdout(struct ustream *s, int bytes) | |
{ | |
instance_stdio(s, LOG_INFO, | |
container_of(s, struct service_instance, _stdout.stream)); | |
} | |
static void | |
instance_console(struct ustream *s, int bytes) | |
{ | |
struct service_instance *in = container_of(s, struct service_instance, console.stream); | |
char *buf; | |
int len; | |
do { | |
buf = ustream_get_read_buf(s, &len); | |
if (!buf) | |
break; | |
ulog(LOG_INFO, "out: %s\n", buf); | |
/* test if console client is attached */ | |
if (in->console_client.fd.fd > -1) | |
ustream_write(&in->console_client.stream, buf, len, false); | |
ustream_consume(s, len); | |
} while (1); | |
} | |
static void | |
instance_console_client(struct ustream *s, int bytes) | |
{ | |
struct service_instance *in = container_of(s, struct service_instance, console_client.stream); | |
char *buf; | |
int len; | |
do { | |
buf = ustream_get_read_buf(s, &len); | |
if (!buf) | |
break; | |
ulog(LOG_INFO, "in: %s\n", buf); | |
ustream_write(&in->console.stream, buf, len, false); | |
ustream_consume(s, len); | |
} while (1); | |
} | |
static void | |
instance_stderr(struct ustream *s, int bytes) | |
{ | |
instance_stdio(s, LOG_ERR, | |
container_of(s, struct service_instance, _stderr.stream)); | |
} | |
static void | |
instance_timeout(struct uloop_timeout *t) | |
{ | |
struct service_instance *in; | |
in = container_of(t, struct service_instance, timeout); | |
if (in->halt) { | |
LOG("Instance %s::%s pid %d not stopped on SIGTERM, sending SIGKILL instead\n", | |
in->srv->name, in->name, in->proc.pid); | |
kill(in->proc.pid, SIGKILL); | |
} else if (in->restart || in->respawn) | |
instance_start(in); | |
} | |
static void | |
instance_delete(struct service_instance *in) | |
{ | |
struct service *s = in->srv; | |
avl_delete(&s->instances.avl, &in->node.avl); | |
instance_free(in); | |
service_stopped(s); | |
} | |
static int | |
instance_exit_code(int ret) | |
{ | |
if (WIFEXITED(ret)) { | |
return WEXITSTATUS(ret); | |
} | |
if (WIFSIGNALED(ret)) { | |
return SIGNALLED_OFFSET + WTERMSIG(ret); | |
} | |
if (WIFSTOPPED(ret)) { | |
return WSTOPSIG(ret); | |
} | |
return 1; | |
} | |
static void | |
instance_exit(struct uloop_process *p, int ret) | |
{ | |
struct service_instance *in; | |
struct timespec tp; | |
long runtime; | |
in = container_of(p, struct service_instance, proc); | |
clock_gettime(CLOCK_MONOTONIC, &tp); | |
runtime = tp.tv_sec - in->start.tv_sec; | |
P_DEBUG(2, "Instance %s::%s exit with error code %d after %ld seconds\n", in->srv->name, in->name, ret, runtime); | |
in->exit_code = instance_exit_code(ret); | |
uloop_timeout_cancel(&in->timeout); | |
uloop_timeout_cancel(&in->watchdog.timeout); | |
service_event("instance.stop", in->srv->name, in->name); | |
if (in->halt) { | |
instance_removepid(in); | |
if (in->restart) | |
instance_start(in); | |
else | |
instance_delete(in); | |
} else if (in->restart) { | |
instance_start(in); | |
} else if (in->respawn) { | |
if (runtime < in->respawn_threshold) | |
in->respawn_count++; | |
else | |
in->respawn_count = 0; | |
if (in->respawn_count > in->respawn_retry && in->respawn_retry > 0 ) { | |
LOG("Instance %s::%s s in a crash loop %d crashes, %ld seconds since last crash\n", | |
in->srv->name, in->name, in->respawn_count, runtime); | |
in->restart = in->respawn = 0; | |
in->halt = 1; | |
service_event("instance.fail", in->srv->name, in->name); | |
} else { | |
service_event("instance.respawn", in->srv->name, in->name); | |
uloop_timeout_set(&in->timeout, in->respawn_timeout * 1000); | |
} | |
} | |
} | |
void | |
instance_stop(struct service_instance *in, bool halt) | |
{ | |
if (!in->proc.pending) { | |
if (halt) | |
instance_delete(in); | |
return; | |
} | |
in->halt = halt; | |
in->restart = in->respawn = false; | |
kill(in->proc.pid, SIGTERM); | |
if (!in->has_jail) | |
uloop_timeout_set(&in->timeout, in->term_timeout * 1000); | |
} | |
static void | |
instance_restart(struct service_instance *in) | |
{ | |
if (!in->proc.pending) | |
return; | |
if (in->reload_signal) { | |
kill(in->proc.pid, in->reload_signal); | |
return; | |
} | |
in->halt = true; | |
in->restart = true; | |
kill(in->proc.pid, SIGTERM); | |
if (!in->has_jail) | |
uloop_timeout_set(&in->timeout, in->term_timeout * 1000); | |
} | |
static void | |
instance_watchdog(struct uloop_timeout *t) | |
{ | |
struct service_instance *in = container_of(t, struct service_instance, watchdog.timeout); | |
P_DEBUG(3, "instance %s::%s watchdog timer expired\n", in->srv->name, in->name); | |
if (in->respawn) | |
instance_restart(in); | |
else | |
instance_stop(in, true); | |
} | |
static bool string_changed(const char *a, const char *b) | |
{ | |
return !((!a && !b) || (a && b && !strcmp(a, b))); | |
} | |
static bool | |
instance_config_changed(struct service_instance *in, struct service_instance *in_new) | |
{ | |
if (!in->valid) | |
return true; | |
if (!blob_attr_equal(in->command, in_new->command)) | |
return true; | |
if (string_changed(in->bundle, in_new->bundle)) | |
return true; | |
if (string_changed(in->infra, in_new->infra)) | |
return true; | |
if (string_changed(in->extroot, in_new->extroot)) | |
return true; | |
if (string_changed(in->overlaydir, in_new->overlaydir)) | |
return true; | |
if (string_changed(in->tmpoverlaysize, in_new->tmpoverlaysize)) | |
return true; | |
if (!blobmsg_list_equal(&in->env, &in_new->env)) | |
return true; | |
if (!blobmsg_list_equal(&in->netdev, &in_new->netdev)) | |
return true; | |
if (!blobmsg_list_equal(&in->file, &in_new->file)) | |
return true; | |
if (in->nice != in_new->nice) | |
return true; | |
if (in->syslog_facility != in_new->syslog_facility) | |
return true; | |
if (string_changed(in->user, in_new->user)) | |
return true; | |
if (string_changed(in->group, in_new->group)) | |
return true; | |
if (in->uid != in_new->uid) | |
return true; | |
if (in->pw_gid != in_new->pw_gid) | |
return true; | |
if (in->gr_gid != in_new->gr_gid) | |
return true; | |
if (string_changed(in->pidfile, in_new->pidfile)) | |
return true; | |
if (in->respawn_retry != in_new->respawn_retry) | |
return true; | |
if (in->respawn_threshold != in_new->respawn_threshold) | |
return true; | |
if (in->respawn_timeout != in_new->respawn_timeout) | |
return true; | |
if (in->reload_signal != in_new->reload_signal) | |
return true; | |
if (in->term_timeout != in_new->term_timeout) | |
return true; | |
if (string_changed(in->seccomp, in_new->seccomp)) | |
return true; | |
if (string_changed(in->capabilities, in_new->capabilities)) | |
return true; | |
if (!blobmsg_list_equal(&in->limits, &in_new->limits)) | |
return true; | |
if (!blobmsg_list_equal(&in->jail.mount, &in_new->jail.mount)) | |
return true; | |
if (!blobmsg_list_equal(&in->jail.setns, &in_new->jail.setns)) | |
return true; | |
if (!blobmsg_list_equal(&in->errors, &in_new->errors)) | |
return true; | |
if (in->has_jail != in_new->has_jail) | |
return true; | |
if (in->trace != in_new->trace) | |
return true; | |
if (in->require_jail != in_new->require_jail) | |
return true; | |
if (in->immediately != in_new->immediately) | |
return true; | |
if (in->no_new_privs != in_new->no_new_privs) | |
return true; | |
if (string_changed(in->jail.name, in_new->jail.name)) | |
return true; | |
if (string_changed(in->jail.hostname, in_new->jail.hostname)) | |
return true; | |
if (string_changed(in->jail.pidfile, in_new->jail.pidfile)) | |
return true; | |
if (in->jail.procfs != in_new->jail.procfs) | |
return true; | |
if (in->jail.sysfs != in_new->jail.sysfs) | |
return true; | |
if (in->jail.ubus != in_new->jail.ubus) | |
return true; | |
if (in->jail.log != in_new->jail.log) | |
return true; | |
if (in->jail.ronly != in_new->jail.ronly) | |
return true; | |
if (in->jail.netns != in_new->jail.netns) | |
return true; | |
if (in->jail.userns != in_new->jail.userns) | |
return true; | |
if (in->jail.cgroupsns != in_new->jail.cgroupsns) | |
return true; | |
if (in->jail.console != in_new->jail.console) | |
return true; | |
if (in->watchdog.mode != in_new->watchdog.mode) | |
return true; | |
if (in->watchdog.freq != in_new->watchdog.freq) | |
return true; | |
return false; | |
} | |
static bool | |
instance_netdev_cmp(struct blobmsg_list_node *l1, struct blobmsg_list_node *l2) | |
{ | |
struct instance_netdev *n1 = container_of(l1, struct instance_netdev, node); | |
struct instance_netdev *n2 = container_of(l2, struct instance_netdev, node); | |
return n1->ifindex == n2->ifindex; | |
} | |
static void | |
instance_netdev_update(struct blobmsg_list_node *l) | |
{ | |
struct instance_netdev *n = container_of(l, struct instance_netdev, node); | |
n->ifindex = if_nametoindex(n->node.avl.key); | |
} | |
static bool | |
instance_file_cmp(struct blobmsg_list_node *l1, struct blobmsg_list_node *l2) | |
{ | |
struct instance_file *f1 = container_of(l1, struct instance_file, node); | |
struct instance_file *f2 = container_of(l2, struct instance_file, node); | |
return !memcmp(f1->md5, f2->md5, sizeof(f1->md5)); | |
} | |
static void | |
instance_file_update(struct blobmsg_list_node *l) | |
{ | |
struct instance_file *f = container_of(l, struct instance_file, node); | |
md5_ctx_t md5; | |
char buf[256]; | |
int len, fd; | |
memset(f->md5, 0, sizeof(f->md5)); | |
fd = open(l->avl.key, O_RDONLY); | |
if (fd < 0) | |
return; | |
md5_begin(&md5); | |
do { | |
len = read(fd, buf, sizeof(buf)); | |
if (len < 0) { | |
if (errno == EINTR) | |
continue; | |
break; | |
} | |
if (!len) | |
break; | |
md5_hash(buf, len, &md5); | |
} while(1); | |
md5_end(f->md5, &md5); | |
close(fd); | |
} | |
static void | |
instance_fill_any(struct blobmsg_list *l, struct blob_attr *cur) | |
{ | |
if (!cur) | |
return; | |
blobmsg_list_fill(l, blobmsg_data(cur), blobmsg_data_len(cur), false); | |
} | |
static bool | |
instance_fill_array(struct blobmsg_list *l, struct blob_attr *cur, blobmsg_update_cb cb, bool array) | |
{ | |
struct blobmsg_list_node *node; | |
if (!cur) | |
return true; | |
if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING)) | |
return false; | |
blobmsg_list_fill(l, blobmsg_data(cur), blobmsg_data_len(cur), array); | |
if (cb) { | |
blobmsg_list_for_each(l, node) | |
cb(node); | |
} | |
return true; | |
} | |
static int | |
instance_jail_parse(struct service_instance *in, struct blob_attr *attr) | |
{ | |
struct blob_attr *tb[__JAIL_ATTR_MAX]; | |
struct jail *jail = &in->jail; | |
struct blobmsg_list_node *var; | |
blobmsg_parse(jail_attr, __JAIL_ATTR_MAX, tb, | |
blobmsg_data(attr), blobmsg_data_len(attr)); | |
jail->argc = 4; | |
if (tb[JAIL_ATTR_REQUIREJAIL] && blobmsg_get_bool(tb[JAIL_ATTR_REQUIREJAIL])) { | |
in->require_jail = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_IMMEDIATELY] && blobmsg_get_bool(tb[JAIL_ATTR_IMMEDIATELY])) { | |
in->immediately = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_NAME]) { | |
jail->name = strdup(blobmsg_get_string(tb[JAIL_ATTR_NAME])); | |
jail->argc += 2; | |
} | |
if (tb[JAIL_ATTR_HOSTNAME]) { | |
jail->hostname = strdup(blobmsg_get_string(tb[JAIL_ATTR_HOSTNAME])); | |
jail->argc += 2; | |
} | |
if (tb[JAIL_ATTR_PROCFS] && blobmsg_get_bool(tb[JAIL_ATTR_PROCFS])) { | |
jail->procfs = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_SYSFS] && blobmsg_get_bool(tb[JAIL_ATTR_SYSFS])) { | |
jail->sysfs = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_UBUS] && blobmsg_get_bool(tb[JAIL_ATTR_UBUS])) { | |
jail->ubus = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_LOG] && blobmsg_get_bool(tb[JAIL_ATTR_LOG])) { | |
jail->log = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_RONLY] && blobmsg_get_bool(tb[JAIL_ATTR_RONLY])) { | |
jail->ronly = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_NETNS] && blobmsg_get_bool(tb[JAIL_ATTR_NETNS])) { | |
jail->netns = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_USERNS] && blobmsg_get_bool(tb[JAIL_ATTR_USERNS])) { | |
jail->userns = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_CGROUPSNS] && blobmsg_get_bool(tb[JAIL_ATTR_CGROUPSNS])) { | |
jail->cgroupsns = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_CONSOLE] && blobmsg_get_bool(tb[JAIL_ATTR_CONSOLE])) { | |
jail->console = true; | |
jail->argc++; | |
} | |
if (tb[JAIL_ATTR_PIDFILE]) { | |
jail->pidfile = strdup(blobmsg_get_string(tb[JAIL_ATTR_PIDFILE])); | |
jail->argc += 2; | |
} | |
if (tb[JAIL_ATTR_SETNS]) { | |
struct blob_attr *cur; | |
int rem; | |
blobmsg_for_each_attr(cur, tb[JAIL_ATTR_SETNS], rem) | |
jail->argc += 2; | |
blobmsg_list_fill(&jail->setns, blobmsg_data(tb[JAIL_ATTR_SETNS]), | |
blobmsg_data_len(tb[JAIL_ATTR_SETNS]), true); | |
} | |
if (tb[JAIL_ATTR_MOUNT]) { | |
struct blob_attr *cur; | |
int rem; | |
blobmsg_for_each_attr(cur, tb[JAIL_ATTR_MOUNT], rem) | |
jail->argc += 2; | |
instance_fill_array(&jail->mount, tb[JAIL_ATTR_MOUNT], NULL, false); | |
} | |
blobmsg_list_for_each(&in->env, var) | |
jail->argc += 2; | |
if (in->seccomp) | |
jail->argc += 2; | |
if (in->capabilities) | |
jail->argc += 2; | |
if (in->user) | |
jail->argc += 2; | |
if (in->group) | |
jail->argc += 2; | |
if (in->extroot) | |
jail->argc += 2; | |
if (in->overlaydir) | |
jail->argc += 2; | |
if (in->tmpoverlaysize) | |
jail->argc += 2; | |
if (in->no_new_privs) | |
jail->argc++; | |
if (in->bundle) | |
jail->argc += 2; | |
if (in->infra) | |
jail->argc += 2; | |
return true; | |
} | |
static bool | |
instance_config_parse_command(struct service_instance *in, struct blob_attr **tb) | |
{ | |
struct blob_attr *cur, *cur2; | |
bool ret = false; | |
int rem; | |
cur = tb[INSTANCE_ATTR_COMMAND]; | |
if (!cur) { | |
in->command = NULL; | |
return true; | |
} | |
if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING)) | |
return false; | |
blobmsg_for_each_attr(cur2, cur, rem) { | |
ret = true; | |
break; | |
} | |
in->command = cur; | |
return ret; | |
} | |
static bool | |
instance_config_parse(struct service_instance *in) | |
{ | |
struct blob_attr *tb[__INSTANCE_ATTR_MAX]; | |
struct blob_attr *cur, *cur2; | |
struct stat s; | |
int rem, r; | |
blobmsg_parse(instance_attr, __INSTANCE_ATTR_MAX, tb, | |
blobmsg_data(in->config), blobmsg_data_len(in->config)); | |
if (!tb[INSTANCE_ATTR_BUNDLE] && !instance_config_parse_command(in, tb)) | |
return false; | |
if (tb[INSTANCE_ATTR_TERMTIMEOUT]) | |
in->term_timeout = blobmsg_get_u32(tb[INSTANCE_ATTR_TERMTIMEOUT]); | |
if (tb[INSTANCE_ATTR_RESPAWN]) { | |
int i = 0; | |
uint32_t vals[3] = { 3600, 5, 5}; | |
blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_RESPAWN], rem) { | |
if ((i >= 3) && (blobmsg_type(cur2) == BLOBMSG_TYPE_STRING)) | |
continue; | |
vals[i] = atoi(blobmsg_get_string(cur2)); | |
i++; | |
} | |
in->respawn = true; | |
in->respawn_count = 0; | |
in->respawn_threshold = vals[0]; | |
in->respawn_timeout = vals[1]; | |
in->respawn_retry = vals[2]; | |
} | |
if (tb[INSTANCE_ATTR_TRIGGER]) { | |
in->trigger = tb[INSTANCE_ATTR_TRIGGER]; | |
trigger_add(in->trigger, in); | |
} | |
if (tb[INSTANCE_ATTR_WATCH]) { | |
blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_WATCH], rem) { | |
if (blobmsg_type(cur2) != BLOBMSG_TYPE_STRING) | |
continue; | |
P_DEBUG(3, "watch for %s\n", blobmsg_get_string(cur2)); | |
watch_add(blobmsg_get_string(cur2), in); | |
} | |
} | |
if ((cur = tb[INSTANCE_ATTR_NICE])) { | |
in->nice = (int8_t) blobmsg_get_u32(cur); | |
if (in->nice < -20 || in->nice > 20) | |
return false; | |
} | |
if (tb[INSTANCE_ATTR_USER]) { | |
const char *user = blobmsg_get_string(tb[INSTANCE_ATTR_USER]); | |
struct passwd *p = getpwnam(user); | |
if (p) { | |
in->user = strdup(user); | |
in->uid = p->pw_uid; | |
in->gr_gid = in->pw_gid = p->pw_gid; | |
} | |
} | |
if (tb[INSTANCE_ATTR_GROUP]) { | |
const char *group = blobmsg_get_string(tb[INSTANCE_ATTR_GROUP]); | |
struct group *p = getgrnam(group); | |
if (p) { | |
in->group = strdup(group); | |
in->gr_gid = p->gr_gid; | |
} | |
} | |
if (tb[INSTANCE_ATTR_TRACE]) | |
in->trace = blobmsg_get_bool(tb[INSTANCE_ATTR_TRACE]); | |
if (tb[INSTANCE_ATTR_NO_NEW_PRIVS]) | |
in->no_new_privs = blobmsg_get_bool(tb[INSTANCE_ATTR_NO_NEW_PRIVS]); | |
if (!in->trace && tb[INSTANCE_ATTR_SECCOMP]) | |
in->seccomp = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_SECCOMP])); | |
if (tb[INSTANCE_ATTR_CAPABILITIES]) | |
in->capabilities = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_CAPABILITIES])); | |
if (tb[INSTANCE_ATTR_EXTROOT]) | |
in->extroot = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_EXTROOT])); | |
if (tb[INSTANCE_ATTR_OVERLAYDIR]) | |
in->overlaydir = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_OVERLAYDIR])); | |
if (tb[INSTANCE_ATTR_TMPOVERLAYSIZE]) | |
in->tmpoverlaysize = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_TMPOVERLAYSIZE])); | |
if (tb[INSTANCE_ATTR_BUNDLE]) | |
in->bundle = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_BUNDLE])); | |
if (tb[INSTANCE_ATTR_INFRA]) | |
in->infra = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_INFRA])); | |
if (tb[INSTANCE_ATTR_PIDFILE]) { | |
char *pidfile = blobmsg_get_string(tb[INSTANCE_ATTR_PIDFILE]); | |
if (pidfile) | |
in->pidfile = strdup(pidfile); | |
} | |
if (tb[INSTANCE_ATTR_RELOADSIG]) | |
in->reload_signal = blobmsg_get_u32(tb[INSTANCE_ATTR_RELOADSIG]); | |
if (tb[INSTANCE_ATTR_STDOUT] && blobmsg_get_bool(tb[INSTANCE_ATTR_STDOUT])) | |
in->_stdout.fd.fd = -1; | |
if (tb[INSTANCE_ATTR_STDERR] && blobmsg_get_bool(tb[INSTANCE_ATTR_STDERR])) | |
in->_stderr.fd.fd = -1; | |
instance_fill_any(&in->data, tb[INSTANCE_ATTR_DATA]); | |
if (!instance_fill_array(&in->env, tb[INSTANCE_ATTR_ENV], NULL, false)) | |
return false; | |
if (!instance_fill_array(&in->netdev, tb[INSTANCE_ATTR_NETDEV], instance_netdev_update, true)) | |
return false; | |
if (!instance_fill_array(&in->file, tb[INSTANCE_ATTR_FILE], instance_file_update, true)) | |
return false; | |
if (!instance_fill_array(&in->limits, tb[INSTANCE_ATTR_LIMITS], NULL, false)) | |
return false; | |
if (!instance_fill_array(&in->errors, tb[INSTANCE_ATTR_ERROR], NULL, true)) | |
return false; | |
if (tb[INSTANCE_ATTR_FACILITY]) { | |
int facility = syslog_facility_str_to_int(blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY])); | |
if (facility != -1) { | |
in->syslog_facility = facility; | |
P_DEBUG(3, "setting facility '%s'\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY])); | |
} else | |
P_DEBUG(3, "unknown syslog facility '%s' given, using default (LOG_DAEMON)\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY])); | |
} | |
if (tb[INSTANCE_ATTR_WATCHDOG]) { | |
int i = 0; | |
uint32_t vals[2] = { 0, 30 }; | |
blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_WATCHDOG], rem) { | |
if (i >= 2) | |
break; | |
vals[i] = atoi(blobmsg_get_string(cur2)); | |
i++; | |
} | |
if (vals[0] >= 0 && vals[0] < __INSTANCE_WATCHDOG_MODE_MAX) { | |
in->watchdog.mode = vals[0]; | |
P_DEBUG(3, "setting watchdog mode (%d)\n", vals[0]); | |
} else { | |
in->watchdog.mode = 0; | |
P_DEBUG(3, "unknown watchdog mode (%d) given, using default (0)\n", vals[0]); | |
} | |
if (vals[1] > 0) { | |
in->watchdog.freq = vals[1]; | |
P_DEBUG(3, "setting watchdog timeout (%d)\n", vals[0]); | |
} else { | |
in->watchdog.freq = 30; | |
P_DEBUG(3, "invalid watchdog timeout (%d) given, using default (30)\n", vals[1]); | |
} | |
} | |
if (!in->trace && tb[INSTANCE_ATTR_JAIL]) | |
in->has_jail = instance_jail_parse(in, tb[INSTANCE_ATTR_JAIL]); | |
if (in->has_jail) { | |
r = stat(UJAIL_BIN_PATH, &s); | |
if (r < 0) { | |
if (in->require_jail) { | |
ERROR("Cannot jail service %s::%s. %s: %m (%d)\n", | |
in->srv->name, in->name, UJAIL_BIN_PATH, r); | |
return false; | |
} | |
P_DEBUG(2, "unable to find %s: %m (%d)\n", UJAIL_BIN_PATH, r); | |
in->has_jail = false; | |
} | |
} | |
return true; | |
} | |
static void | |
instance_config_cleanup(struct service_instance *in) | |
{ | |
blobmsg_list_free(&in->env); | |
blobmsg_list_free(&in->data); | |
blobmsg_list_free(&in->netdev); | |
blobmsg_list_free(&in->file); | |
blobmsg_list_free(&in->limits); | |
blobmsg_list_free(&in->errors); | |
blobmsg_list_free(&in->jail.mount); | |
blobmsg_list_free(&in->jail.setns); | |
} | |
static void | |
instance_config_move_strdup(char **dst, char *src) | |
{ | |
if (*dst) { | |
free(*dst); | |
*dst = NULL; | |
} | |
if (!src) | |
return; | |
*dst = strdup(src); | |
} | |
static void | |
instance_config_move(struct service_instance *in, struct service_instance *in_src) | |
{ | |
instance_config_cleanup(in); | |
blobmsg_list_move(&in->env, &in_src->env); | |
blobmsg_list_move(&in->data, &in_src->data); | |
blobmsg_list_move(&in->netdev, &in_src->netdev); | |
blobmsg_list_move(&in->file, &in_src->file); | |
blobmsg_list_move(&in->limits, &in_src->limits); | |
blobmsg_list_move(&in->errors, &in_src->errors); | |
blobmsg_list_move(&in->jail.mount, &in_src->jail.mount); | |
blobmsg_list_move(&in->jail.setns, &in_src->jail.setns); | |
in->trigger = in_src->trigger; | |
in->command = in_src->command; | |
in->respawn = in_src->respawn; | |
in->respawn_retry = in_src->respawn_retry; | |
in->respawn_threshold = in_src->respawn_threshold; | |
in->respawn_timeout = in_src->respawn_timeout; | |
in->reload_signal = in_src->reload_signal; | |
in->term_timeout = in_src->term_timeout; | |
in->watchdog.mode = in_src->watchdog.mode; | |
in->watchdog.freq = in_src->watchdog.freq; | |
in->watchdog.timeout = in_src->watchdog.timeout; | |
in->name = in_src->name; | |
in->nice = in_src->nice; | |
in->trace = in_src->trace; | |
in->node.avl.key = in_src->node.avl.key; | |
in->syslog_facility = in_src->syslog_facility; | |
in->require_jail = in_src->require_jail; | |
in->no_new_privs = in_src->no_new_privs; | |
in->immediately = in_src->immediately; | |
in->uid = in_src->uid; | |
in->pw_gid = in_src->pw_gid; | |
in->gr_gid = in_src->gr_gid; | |
in->has_jail = in_src->has_jail; | |
in->jail.procfs = in_src->jail.procfs; | |
in->jail.sysfs = in_src->jail.sysfs; | |
in->jail.ubus = in_src->jail.ubus; | |
in->jail.log = in_src->jail.log; | |
in->jail.ronly = in_src->jail.ronly; | |
in->jail.netns = in_src->jail.netns; | |
in->jail.cgroupsns = in_src->jail.cgroupsns; | |
in->jail.console = in_src->jail.console; | |
in->jail.argc = in_src->jail.argc; | |
instance_config_move_strdup(&in->pidfile, in_src->pidfile); | |
instance_config_move_strdup(&in->seccomp, in_src->seccomp); | |
instance_config_move_strdup(&in->capabilities, in_src->capabilities); | |
instance_config_move_strdup(&in->bundle, in_src->bundle); | |
instance_config_move_strdup(&in->infra, in_src->infra); | |
instance_config_move_strdup(&in->extroot, in_src->extroot); | |
instance_config_move_strdup(&in->overlaydir, in_src->overlaydir); | |
instance_config_move_strdup(&in->tmpoverlaysize, in_src->tmpoverlaysize); | |
instance_config_move_strdup(&in->user, in_src->user); | |
instance_config_move_strdup(&in->group, in_src->group); | |
instance_config_move_strdup(&in->jail.name, in_src->jail.name); | |
instance_config_move_strdup(&in->jail.hostname, in_src->jail.hostname); | |
instance_config_move_strdup(&in->jail.pidfile, in_src->jail.pidfile); | |
free(in->config); | |
in->config = in_src->config; | |
in_src->config = NULL; | |
} | |
void | |
instance_update(struct service_instance *in, struct service_instance *in_new) | |
{ | |
bool changed = instance_config_changed(in, in_new); | |
bool running = in->proc.pending; | |
bool stopping = in->halt; | |
if (!running || stopping) { | |
instance_config_move(in, in_new); | |
instance_start(in); | |
} else { | |
if (changed) | |
instance_restart(in); | |
instance_config_move(in, in_new); | |
/* restart happens in the child callback handler */ | |
} | |
} | |
void | |
instance_free(struct service_instance *in) | |
{ | |
instance_free_stdio(in); | |
uloop_process_delete(&in->proc); | |
uloop_timeout_cancel(&in->timeout); | |
uloop_timeout_cancel(&in->watchdog.timeout); | |
trigger_del(in); | |
watch_del(in); | |
instance_config_cleanup(in); | |
free(in->config); | |
free(in->user); | |
free(in->group); | |
free(in->extroot); | |
free(in->overlaydir); | |
free(in->tmpoverlaysize); | |
free(in->bundle); | |
free(in->infra); | |
free(in->jail.name); | |
free(in->jail.hostname); | |
free(in->jail.pidfile); | |
free(in->seccomp); | |
free(in->capabilities); | |
free(in->pidfile); | |
free(in); | |
} | |
void | |
instance_init(struct service_instance *in, struct service *s, struct blob_attr *config) | |
{ | |
config = blob_memdup(config); | |
in->srv = s; | |
in->name = blobmsg_name(config); | |
in->config = config; | |
in->timeout.cb = instance_timeout; | |
in->proc.cb = instance_exit; | |
in->term_timeout = 5; | |
in->syslog_facility = LOG_DAEMON; | |
in->exit_code = 0; | |
in->require_jail = false; | |
in->immediately = false; | |
in->_stdout.fd.fd = -2; | |
in->_stdout.stream.string_data = true; | |
in->_stdout.stream.notify_read = instance_stdout; | |
in->_stderr.fd.fd = -2; | |
in->_stderr.stream.string_data = true; | |
in->_stderr.stream.notify_read = instance_stderr; | |
in->console.fd.fd = -2; | |
in->console.stream.string_data = true; | |
in->console.stream.notify_read = instance_console; | |
in->console_client.fd.fd = -2; | |
in->console_client.stream.string_data = true; | |
in->console_client.stream.notify_read = instance_console_client; | |
blobmsg_list_init(&in->netdev, struct instance_netdev, node, instance_netdev_cmp); | |
blobmsg_list_init(&in->file, struct instance_file, node, instance_file_cmp); | |
blobmsg_list_simple_init(&in->env); | |
blobmsg_list_simple_init(&in->data); | |
blobmsg_list_simple_init(&in->limits); | |
blobmsg_list_simple_init(&in->errors); | |
blobmsg_list_simple_init(&in->jail.mount); | |
blobmsg_list_simple_init(&in->jail.setns); | |
in->watchdog.timeout.cb = instance_watchdog; | |
in->valid = instance_config_parse(in); | |
} | |
void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose) | |
{ | |
void *i; | |
if (!in->valid) | |
return; | |
i = blobmsg_open_table(b, in->name); | |
blobmsg_add_u8(b, "running", in->proc.pending); | |
if (in->proc.pending) | |
blobmsg_add_u32(b, "pid", in->proc.pid); | |
if (in->command) | |
blobmsg_add_blob(b, in->command); | |
if (in->bundle) | |
blobmsg_add_string(b, "bundle", in->bundle); | |
if (in->infra) | |
blobmsg_add_string(b, "infra", in->infra); | |
blobmsg_add_u32(b, "term_timeout", in->term_timeout); | |
if (!in->proc.pending) | |
blobmsg_add_u32(b, "exit_code", in->exit_code); | |
if (!avl_is_empty(&in->errors.avl)) { | |
struct blobmsg_list_node *var; | |
void *e = blobmsg_open_array(b, "errors"); | |
blobmsg_list_for_each(&in->errors, var) | |
blobmsg_add_string(b, NULL, blobmsg_data(var->data)); | |
blobmsg_close_table(b, e); | |
} | |
if (!avl_is_empty(&in->env.avl)) { | |
struct blobmsg_list_node *var; | |
void *e = blobmsg_open_table(b, "env"); | |
blobmsg_list_for_each(&in->env, var) | |
blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data)); | |
blobmsg_close_table(b, e); | |
} | |
if (!avl_is_empty(&in->data.avl)) { | |
struct blobmsg_list_node *var; | |
void *e = blobmsg_open_table(b, "data"); | |
blobmsg_list_for_each(&in->data, var) | |
blobmsg_add_blob(b, var->data); | |
blobmsg_close_table(b, e); | |
} | |
if (!avl_is_empty(&in->limits.avl)) { | |
struct blobmsg_list_node *var; | |
void *e = blobmsg_open_table(b, "limits"); | |
blobmsg_list_for_each(&in->limits, var) | |
blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data)); | |
blobmsg_close_table(b, e); | |
} | |
if (!avl_is_empty(&in->netdev.avl)) { | |
struct blobmsg_list_node *var; | |
void *n = blobmsg_open_array(b, "netdev"); | |
blobmsg_list_for_each(&in->netdev, var) | |
blobmsg_add_string(b, NULL, blobmsg_data(var->data)); | |
blobmsg_close_array(b, n); | |
} | |
if (in->reload_signal) | |
blobmsg_add_u32(b, "reload_signal", in->reload_signal); | |
if (in->respawn) { | |
void *r = blobmsg_open_table(b, "respawn"); | |
blobmsg_add_u32(b, "threshold", in->respawn_threshold); | |
blobmsg_add_u32(b, "timeout", in->respawn_timeout); | |
blobmsg_add_u32(b, "retry", in->respawn_retry); | |
blobmsg_close_table(b, r); | |
} | |
if (in->trace) | |
blobmsg_add_u8(b, "trace", true); | |
if (in->no_new_privs) | |
blobmsg_add_u8(b, "no_new_privs", true); | |
if (in->seccomp) | |
blobmsg_add_string(b, "seccomp", in->seccomp); | |
if (in->capabilities) | |
blobmsg_add_string(b, "capabilities", in->capabilities); | |
if (in->pidfile) | |
blobmsg_add_string(b, "pidfile", in->pidfile); | |
if (in->user) | |
blobmsg_add_string(b, "user", in->user); | |
if (in->group) | |
blobmsg_add_string(b, "group", in->group); | |
if (in->has_jail) { | |
void *r = blobmsg_open_table(b, "jail"); | |
if (in->jail.name) | |
blobmsg_add_string(b, "name", in->jail.name); | |
if (!in->bundle) { | |
if (in->jail.hostname) | |
blobmsg_add_string(b, "hostname", in->jail.hostname); | |
blobmsg_add_u8(b, "procfs", in->jail.procfs); | |
blobmsg_add_u8(b, "sysfs", in->jail.sysfs); | |
blobmsg_add_u8(b, "ubus", in->jail.ubus); | |
blobmsg_add_u8(b, "log", in->jail.log); | |
blobmsg_add_u8(b, "ronly", in->jail.ronly); | |
blobmsg_add_u8(b, "netns", in->jail.netns); | |
blobmsg_add_u8(b, "userns", in->jail.userns); | |
blobmsg_add_u8(b, "cgroupsns", in->jail.cgroupsns); | |
} else { | |
if (in->jail.pidfile) | |
blobmsg_add_string(b, "pidfile", in->jail.pidfile); | |
blobmsg_add_u8(b, "immediately", in->immediately); | |
} | |
blobmsg_add_u8(b, "console", (in->console.fd.fd > -1)); | |
blobmsg_close_table(b, r); | |
if (!avl_is_empty(&in->jail.mount.avl)) { | |
struct blobmsg_list_node *var; | |
void *e = blobmsg_open_table(b, "mount"); | |
blobmsg_list_for_each(&in->jail.mount, var) | |
blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data)); | |
blobmsg_close_table(b, e); | |
} | |
if (!avl_is_empty(&in->jail.setns.avl)) { | |
struct blobmsg_list_node *var; | |
void *s = blobmsg_open_array(b, "setns"); | |
blobmsg_list_for_each(&in->jail.setns, var) | |
blobmsg_add_blob(b, var->data); | |
blobmsg_close_array(b, s); | |
} | |
} | |
if (in->extroot) | |
blobmsg_add_string(b, "extroot", in->extroot); | |
if (in->overlaydir) | |
blobmsg_add_string(b, "overlaydir", in->overlaydir); | |
if (in->tmpoverlaysize) | |
blobmsg_add_string(b, "tmpoverlaysize", in->tmpoverlaysize); | |
if (verbose && in->trigger) | |
blobmsg_add_blob(b, in->trigger); | |
if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) { | |
void *r = blobmsg_open_table(b, "watchdog"); | |
blobmsg_add_u32(b, "mode", in->watchdog.mode); | |
blobmsg_add_u32(b, "timeout", in->watchdog.freq); | |
blobmsg_close_table(b, r); | |
} | |
blobmsg_close_table(b, i); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2013 Felix Fietkau <nbd@openwrt.org> | |
* Copyright (C) 2013 John Crispin <blogic@openwrt.org> | |
* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU Lesser General Public License version 2.1 | |
* as published by the Free Software Foundation | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
*/ | |
#ifndef __PROCD_INSTANCE_H | |
#define __PROCD_INSTANCE_H | |
#include <libubox/vlist.h> | |
#include <libubox/uloop.h> | |
#include <libubox/ustream.h> | |
#include "../utils/utils.h" | |
#define RESPAWN_ERROR (5 * 60) | |
#define SIGNALLED_OFFSET 128 | |
struct jail { | |
bool procfs; | |
bool sysfs; | |
bool ubus; | |
bool log; | |
bool ronly; | |
bool netns; | |
bool userns; | |
bool cgroupsns; | |
bool console; | |
char *name; | |
char *hostname; | |
char *pidfile; | |
struct blobmsg_list mount; | |
struct blobmsg_list setns; | |
int argc; | |
}; | |
typedef enum instance_watchdog { | |
INSTANCE_WATCHDOG_MODE_DISABLED, | |
INSTANCE_WATCHDOG_MODE_PASSIVE, | |
INSTANCE_WATCHDOG_MODE_ACTIVE, | |
__INSTANCE_WATCHDOG_MODE_MAX, | |
} instance_watchdog_mode_t; | |
struct watchdog { | |
instance_watchdog_mode_t mode; | |
uint32_t freq; | |
struct uloop_timeout timeout; | |
}; | |
struct service_instance { | |
struct vlist_node node; | |
struct service *srv; | |
const char *name; | |
int8_t nice; | |
bool valid; | |
char *user; | |
uid_t uid; | |
gid_t pw_gid; | |
char *group; | |
gid_t gr_gid; | |
bool halt; | |
bool restart; | |
bool respawn; | |
int respawn_count; | |
int reload_signal; | |
struct timespec start; | |
bool trace; | |
bool has_jail; | |
bool require_jail; | |
bool immediately; | |
bool no_new_privs; | |
struct jail jail; | |
char *seccomp; | |
char *capabilities; | |
char *pidfile; | |
char *extroot; | |
char *overlaydir; | |
char *tmpoverlaysize; | |
char *bundle; | |
char *infra; | |
int syslog_facility; | |
int exit_code; | |
uint32_t term_timeout; | |
uint32_t respawn_timeout; | |
uint32_t respawn_threshold; | |
uint32_t respawn_retry; | |
struct blob_attr *config; | |
struct uloop_process proc; | |
struct uloop_timeout timeout; | |
struct ustream_fd _stdout; | |
struct ustream_fd _stderr; | |
struct ustream_fd console; | |
struct ustream_fd console_client; | |
struct blob_attr *command; | |
struct blob_attr *trigger; | |
struct blobmsg_list env; | |
struct blobmsg_list data; | |
struct blobmsg_list netdev; | |
struct blobmsg_list file; | |
struct blobmsg_list limits; | |
struct blobmsg_list errors; | |
struct watchdog watchdog; | |
}; | |
void instance_start(struct service_instance *in); | |
void instance_stop(struct service_instance *in, bool halt); | |
void instance_update(struct service_instance *in, struct service_instance *in_new); | |
void instance_init(struct service_instance *in, struct service *s, struct blob_attr *config); | |
void instance_free(struct service_instance *in); | |
void instance_dump(struct blob_buf *b, struct service_instance *in, int debug); | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2015 John Crispin <blogic@openwrt.org> | |
* Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org> | |
* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU Lesser General Public License version 2.1 | |
* as published by the Free Software Foundation | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
*/ | |
#define _GNU_SOURCE | |
#include <sys/mount.h> | |
#include <sys/prctl.h> | |
#include <sys/wait.h> | |
#include <sys/types.h> | |
#include <sys/time.h> | |
#include <sys/resource.h> | |
#include <sys/stat.h> | |
#include <sys/sysmacros.h> | |
/* musl only defined 15 limit types, make sure all 16 are supported */ | |
#ifndef RLIMIT_RTTIME | |
#define RLIMIT_RTTIME 15 | |
#undef RLIMIT_NLIMITS | |
#define RLIMIT_NLIMITS 16 | |
#undef RLIM_NLIMITS | |
#define RLIM_NLIMITS 16 | |
#endif | |
#include <assert.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <errno.h> | |
#include <pwd.h> | |
#include <grp.h> | |
#include <string.h> | |
#include <fcntl.h> | |
#include <sched.h> | |
#include <linux/filter.h> | |
#include <linux/limits.h> | |
#include <linux/nsfs.h> | |
#include <linux/securebits.h> | |
#include <signal.h> | |
#include <inttypes.h> | |
#include "capabilities.h" | |
#include "elf.h" | |
#include "fs.h" | |
#include "jail.h" | |
#include "log.h" | |
#include "seccomp-oci.h" | |
#include "cgroups.h" | |
#include "netifd.h" | |
#include <libubox/blobmsg.h> | |
#include <libubox/blobmsg_json.h> | |
#include <libubox/list.h> | |
#include <libubox/vlist.h> | |
#include <libubox/uloop.h> | |
#include <libubox/utils.h> | |
#include <libubus.h> | |
#ifndef CLONE_NEWCGROUP | |
#define CLONE_NEWCGROUP 0x02000000 | |
#endif | |
#define STACK_SIZE (1024 * 1024) | |
#define OPT_ARGS "cC:d:e:EfFG:h:ij:J:ln:NoO:pP:r:R:sS:uU:w:t:T:y" | |
#define OCI_VERSION_STRING "1.0.2" | |
struct hook_execvpe { | |
char *file; | |
char **argv; | |
char **envp; | |
int timeout; | |
}; | |
struct sysctl_val { | |
char *entry; | |
char *value; | |
}; | |
struct mknod_args { | |
char *path; | |
mode_t mode; | |
dev_t dev; | |
uid_t uid; | |
gid_t gid; | |
}; | |
static struct { | |
char *name; | |
char *hostname; | |
char **jail_argv; | |
char *cwd; | |
char *seccomp; | |
struct sock_fprog *ociseccomp; | |
char *capabilities; | |
struct jail_capset capset; | |
char *user; | |
char *group; | |
char *extroot; | |
char *overlaydir; | |
char *tmpoverlaysize; | |
char **envp; | |
char *uidmap; | |
char *gidmap; | |
char *pidfile; | |
struct sysctl_val **sysctl; | |
int no_new_privs; | |
int namespace; | |
struct { | |
int pid; | |
int net; | |
int ns; | |
int ipc; | |
int uts; | |
int user; | |
int cgroup; | |
#ifdef CLONE_NEWTIME | |
int time; | |
#endif | |
} setns; | |
int procfs; | |
int ronly; | |
int sysfs; | |
int console; | |
int pw_uid; | |
int pw_gid; | |
int gr_gid; | |
int root_map_uid; | |
gid_t *additional_gids; | |
size_t num_additional_gids; | |
mode_t umask; | |
bool set_umask; | |
int require_jail; | |
struct { | |
struct hook_execvpe **createRuntime; | |
struct hook_execvpe **createContainer; | |
struct hook_execvpe **startContainer; | |
struct hook_execvpe **poststart; | |
struct hook_execvpe **poststop; | |
} hooks; | |
struct rlimit *rlimits[RLIM_NLIMITS]; | |
int oom_score_adj; | |
bool set_oom_score_adj; | |
struct mknod_args **devices; | |
char *ocibundle; | |
char *infra; | |
bool immediately; | |
struct blob_attr *annotations; | |
int term_timeout; | |
} opts; | |
static struct blob_buf ocibuf; | |
extern int pivot_root(const char *new_root, const char *put_old); | |
int debug = 0; | |
static char child_stack[STACK_SIZE]; | |
static struct ubus_context *parent_ctx; | |
int console_fd; | |
static inline bool has_namespaces(void) | |
{ | |
return ((opts.setns.pid != -1) || | |
(opts.setns.net != -1) || | |
(opts.setns.ns != -1) || | |
(opts.setns.ipc != -1) || | |
(opts.setns.uts != -1) || | |
(opts.setns.user != -1) || | |
(opts.setns.cgroup != -1) || | |
#ifdef CLONE_NEWTIME | |
(opts.setns.time != -1) || | |
#endif | |
opts.namespace); | |
} | |
static void free_oci_envp(char **p) { | |
char **tmp; | |
if (p) { | |
tmp = p; | |
while (*tmp) | |
free(*(tmp++)); | |
free(p); | |
} | |
} | |
static void free_hooklist(struct hook_execvpe **hooklist) | |
{ | |
struct hook_execvpe *cur; | |
if (!hooklist) | |
return; | |
cur = *hooklist; | |
while (cur) { | |
free_oci_envp(cur->argv); | |
free_oci_envp(cur->envp); | |
free(cur->file); | |
free(cur++); | |
} | |
free(hooklist); | |
} | |
static void free_sysctl(void) { | |
struct sysctl_val *cur; | |
if (!opts.sysctl) | |
return; | |
cur = *opts.sysctl; | |
while (cur) { | |
free(cur->entry); | |
free(cur->value); | |
free(cur++); | |
} | |
free(opts.sysctl); | |
} | |
static void free_devices(void) { | |
struct mknod_args **cur; | |
if (!opts.devices) | |
return; | |
cur = opts.devices; | |
while (*cur) { | |
free((*cur)->path); | |
free(*(cur++)); | |
} | |
free(opts.devices); | |
} | |
static void free_rlimits(void) { | |
int type; | |
for (type = 0; type < RLIM_NLIMITS; ++type) | |
free(opts.rlimits[type]); | |
} | |
static void free_opts(bool parent) { | |
free_library_search(); | |
mount_free(); | |
cgroups_free(); | |
/* we need to keep argv, envp and seccomp filter in child */ | |
if (parent) { /* parent-only */ | |
if (opts.ociseccomp) { | |
free(opts.ociseccomp->filter); | |
free(opts.ociseccomp); | |
} | |
free_oci_envp(opts.jail_argv); | |
free_oci_envp(opts.envp); | |
} | |
free_rlimits(); | |
free_sysctl(); | |
free_devices(); | |
free(opts.hostname); | |
free(opts.cwd); | |
free(opts.uidmap); | |
free(opts.gidmap); | |
free(opts.annotations); | |
free(opts.extroot); | |
free(opts.overlaydir); | |
free(opts.infra); | |
free_hooklist(opts.hooks.createRuntime); | |
free_hooklist(opts.hooks.createContainer); | |
free_hooklist(opts.hooks.startContainer); | |
free_hooklist(opts.hooks.poststart); | |
free_hooklist(opts.hooks.poststop); | |
} | |
static int mount_overlay(char *jail_root, char *overlaydir) { | |
char *upperdir, *workdir, *optsstr, *upperetc, *upperresolvconf; | |
const char mountoptsformat[] = "lowerdir=%s,upperdir=%s,workdir=%s"; | |
int ret = -1, fd; | |
if (asprintf(&upperdir, "%s%s", overlaydir, "/upper") < 0) | |
goto out; | |
if (asprintf(&workdir, "%s%s", overlaydir, "/work") < 0) | |
goto upper_printf; | |
if (asprintf(&optsstr, mountoptsformat, jail_root, upperdir, workdir) < 0) | |
goto work_printf; | |
if (mkdir_p(upperdir, 0755) || mkdir_p(workdir, 0755)) | |
goto opts_printf; | |
/* | |
* make sure /etc/resolv.conf exists in overlay and is owned by jail userns root | |
* this is to work-around a bug in overlayfs described in the overlayfs-userns | |
* patch: | |
* 3. modification of a file 'hithere' which is in l but not yet | |
* in u, and which is not owned by T, is not allowed, even if | |
* writes to u are allowed. This may be a bug in overlayfs, | |
* but it is safe behavior. | |
*/ | |
if (asprintf(&upperetc, "%s/etc", upperdir) < 0) | |
goto opts_printf; | |
if (mkdir_p(upperetc, 0755)) | |
goto upper_etc_printf; | |
if (asprintf(&upperresolvconf, "%s/resolv.conf", upperetc) < 0) | |
goto upper_etc_printf; | |
fd = creat(upperresolvconf, 0644); | |
if (fd < 0) { | |
if (errno != EEXIST) | |
ERROR("creat(%s) failed: %m\n", upperresolvconf); | |
} else { | |
close(fd); | |
} | |
DEBUG("mount -t overlay %s %s (%s)\n", jail_root, jail_root, optsstr); | |
if (mount(jail_root, jail_root, "overlay", MS_NOATIME, optsstr)) | |
goto upper_resolvconf_printf; | |
ret = 0; | |
upper_resolvconf_printf: | |
free(upperresolvconf); | |
upper_etc_printf: | |
free(upperetc); | |
opts_printf: | |
free(optsstr); | |
work_printf: | |
free(workdir); | |
upper_printf: | |
free(upperdir); | |
out: | |
return ret; | |
} | |
static void pass_console(int console_fd) | |
{ | |
struct ubus_context *child_ctx = ubus_connect(NULL); | |
static struct blob_buf req; | |
uint32_t id; | |
if (!child_ctx) | |
return; | |
blob_buf_init(&req, 0); | |
blobmsg_add_string(&req, "name", opts.name); | |
if (ubus_lookup_id(child_ctx, "container", &id) || | |
ubus_invoke_fd(child_ctx, id, "console_set", req.head, NULL, NULL, 3000, console_fd)) | |
INFO("ubus request failed\n"); | |
else | |
close(console_fd); | |
blob_buf_free(&req); | |
ubus_free(child_ctx); | |
} | |
static int create_dev_console(const char *jail_root) | |
{ | |
char *console_fname; | |
char dev_console_path[PATH_MAX]; | |
int slave_console_fd, dev_console_dummy; | |
/* Open UNIX/98 virtual console */ | |
console_fd = posix_openpt(O_RDWR | O_NOCTTY); | |
if (console_fd < 0) | |
return -1; | |
console_fname = ptsname(console_fd); | |
DEBUG("got console fd %d and PTS client name %s\n", console_fd, console_fname); | |
if (!console_fname) | |
goto no_console; | |
grantpt(console_fd); | |
unlockpt(console_fd); | |
/* pass PTY master to procd */ | |
pass_console(console_fd); | |
/* mount-bind PTY slave to /dev/console in jail */ | |
snprintf(dev_console_path, sizeof(dev_console_path), "%s/dev/console", jail_root); | |
dev_console_dummy = creat(dev_console_path, 0620); | |
if (dev_console_dummy < 0) | |
goto no_console; | |
close(dev_console_dummy); | |
if (mount(console_fname, dev_console_path, "bind", MS_BIND, NULL)) | |
goto no_console; | |
/* use PTY slave for stdio */ | |
slave_console_fd = open(console_fname, O_RDWR); /* | O_NOCTTY */ | |
if (slave_console_fd < 0) | |
goto no_console; | |
dup2(slave_console_fd, 0); | |
dup2(slave_console_fd, 1); | |
dup2(slave_console_fd, 2); | |
close(slave_console_fd); | |
INFO("using guest console %s\n", console_fname); | |
return 0; | |
no_console: | |
close(console_fd); | |
return 1; | |
} | |
static int hook_running = 0; | |
static int hook_return_code = 0; | |
static struct hook_execvpe **current_hook = NULL; | |
typedef void (*hook_return_handler)(void); | |
static hook_return_handler hook_return_cb = NULL; | |
static void hook_process_timeout_cb(struct uloop_timeout *t); | |
static struct uloop_timeout hook_process_timeout = { | |
.cb = hook_process_timeout_cb, | |
}; | |
static void run_hooklist(void); | |
static void hook_process_handler(struct uloop_process *c, int ret) | |
{ | |
uloop_timeout_cancel(&hook_process_timeout); | |
if (WIFEXITED(ret)) { | |
hook_return_code = WEXITSTATUS(ret); | |
if (hook_return_code) | |
ERROR("hook (%d) exited with exit: %d\n", c->pid, hook_return_code); | |
else | |
DEBUG("hook (%d) exited with exit: %d\n", c->pid, hook_return_code); | |
} else { | |
hook_return_code = WTERMSIG(ret); | |
ERROR("hook (%d) exited with signal: %d\n", c->pid, hook_return_code); | |
} | |
hook_running = 0; | |
++current_hook; | |
run_hooklist(); | |
} | |
static struct uloop_process hook_process = { | |
.cb = hook_process_handler, | |
}; | |
static void hook_process_timeout_cb(struct uloop_timeout *t) | |
{ | |
DEBUG("hook process failed to stop, sending SIGKILL\n"); | |
kill(hook_process.pid, SIGKILL); | |
} | |
static void run_hooklist(void) | |
{ | |
struct hook_execvpe *hook = *current_hook; | |
struct stat s; | |
if (!hook) | |
return hook_return_cb(); | |
DEBUG("executing hook %s\n", hook->file); | |
if (stat(hook->file, &s)) | |
hook_process_handler(&hook_process, ENOENT); | |
if (!((unsigned long)s.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) | |
hook_process_handler(&hook_process, EPERM); | |
hook_running = 1; | |
hook_process.pid = fork(); | |
if (hook_process.pid == 0) { | |
/* child */ | |
execve(hook->file, hook->argv, hook->envp); | |
ERROR("execve error %m\n"); | |
_exit(errno); | |
} else if (hook_process.pid < 0) { | |
/* fork error */ | |
ERROR("hook fork error\n"); | |
hook_running = 0; | |
hook_process_handler(&hook_process, errno); | |
} | |
/* parent */ | |
uloop_process_add(&hook_process); | |
if (hook->timeout > 0) | |
uloop_timeout_set(&hook_process_timeout, 1000 * hook->timeout); | |
uloop_run(); | |
if (hook_running) { | |
DEBUG("uloop interrupted, killing jail process\n"); | |
kill(hook_process.pid, SIGTERM); | |
uloop_timeout_set(&hook_process_timeout, 1000); | |
uloop_run(); | |
} | |
} | |
static void run_hooks(struct hook_execvpe **hooklist, hook_return_handler return_cb) | |
{ | |
if (!hooklist) | |
return_cb(); | |
current_hook = hooklist; | |
hook_return_cb = return_cb; | |
run_hooklist(); | |
} | |
static int apply_sysctl(const char *jail_root) | |
{ | |
struct sysctl_val **cur; | |
char *procdir, *fname; | |
int f; | |
if (!opts.sysctl) | |
return 0; | |
if (asprintf(&procdir, "%s/proc", jail_root) < 0) | |
return ENOMEM; | |
mkdir(procdir, 0700); | |
if (mount("proc", procdir, "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0)) | |
return EPERM; | |
cur = opts.sysctl; | |
while (*cur) { | |
if (asprintf(&fname, "%s/sys/%s", procdir, (*cur)->entry) < 0) | |
return ENOMEM; | |
DEBUG("sysctl: writing '%s' to %s\n", (*cur)->value, fname); | |
f = open(fname, O_WRONLY); | |
if (f < 0) { | |
ERROR("sysctl: can't open %s\n", fname); | |
free(fname); | |
return errno; | |
} | |
if (write(f, (*cur)->value, strlen((*cur)->value)) < 0) { | |
ERROR("sysctl: write to %s\n", fname); | |
free(fname); | |
close(f); | |
return errno; | |
} | |
free(fname); | |
close(f); | |
++cur; | |
} | |
umount(procdir); | |
rmdir(procdir); | |
free(procdir); | |
return 0; | |
} | |
/* glibc defines makedev calling a function. make sure it's a pure macro */ | |
#if defined(__GLIBC__) | |
#undef makedev | |
/* from musl's sys/sysmacros.h */ | |
#define makedev(x,y) ( \ | |
(((x)&0xfffff000ULL) << 32) | \ | |
(((x)&0x00000fffULL) << 8) | \ | |
(((y)&0xffffff00ULL) << 12) | \ | |
(((y)&0x000000ffULL)) ) | |
#endif | |
static struct mknod_args default_devices[] = { | |
{ .path = "/dev/null", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 3) }, | |
{ .path = "/dev/zero", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 5) }, | |
{ .path = "/dev/full", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 7) }, | |
{ .path = "/dev/random", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 8) }, | |
{ .path = "/dev/urandom", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 9) }, | |
{ .path = "/dev/tty", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(5, 0), .gid = 5 }, | |
{ 0 }, | |
}; | |
static int create_devices(void) | |
{ | |
struct mknod_args **cur, *curdef; | |
char *path, *tmp; | |
int ret; | |
if (!opts.devices) | |
goto only_default_devices; | |
cur = opts.devices; | |
while (*cur) { | |
path = (*cur)->path; | |
/* don't allow devices outside of /dev */ | |
if (strncmp(path, "/dev", 4)) | |
return EPERM; | |
/* make sure parent folder exists */ | |
tmp = strrchr(path, '/'); | |
if (!tmp) | |
return EINVAL; | |
*tmp = '\0'; | |
if (strcmp(path, "/dev")) { | |
DEBUG("creating directory %s\n", path); | |
mkdir_p(path, 0755); | |
} | |
*tmp = '/'; | |
DEBUG("creating %s (mode=%08o)\n", path, (*cur)->mode); | |
/* create device */ | |
if (mknod(path, (*cur)->mode, (*cur)->dev)) | |
return errno; | |
/* change owner, if needed */ | |
if (((*cur)->uid || (*cur)->gid) && | |
chown(path, (*cur)->uid, (*cur)->gid)) | |
return errno; | |
++cur; | |
} | |
only_default_devices: | |
curdef = default_devices; | |
while(curdef->path) { | |
DEBUG("creating %s (mode=%08o)\n", curdef->path, curdef->mode); | |
if (mknod(curdef->path, curdef->mode, curdef->dev)) { | |
++curdef; | |
continue; /* may already exist, eg. due to a bind-mount */ | |
} | |
if ((curdef->uid || curdef->gid) && | |
chown(curdef->path, curdef->uid, curdef->gid)) | |
return errno; | |
++curdef; | |
} | |
/* Dev symbolic links as defined in OCI spec */ | |
ret = symlink("/dev/pts/ptmx", "/dev/ptmx"); | |
if (ret < 0) | |
WARNING("symlink() failed to create link to /dev/pts/ptmx"); | |
ret = symlink("/proc/self/fd", "/dev/fd"); | |
if (ret < 0) | |
WARNING("symlink() failed to create link to /proc/self/fd"); | |
ret = symlink("/proc/self/fd/0", "/dev/stdin"); | |
if (ret < 0) | |
WARNING("symlink() failed to create link to /proc/self/fd/0"); | |
ret = symlink("/proc/self/fd/1", "/dev/stdout"); | |
if (ret < 0) | |
WARNING("symlink() failed to create link to /proc/self/fd/1"); | |
ret = symlink("/proc/self/fd/2", "/dev/stderr"); | |
if (ret < 0) | |
WARNING("symlink() failed to create link to /proc/self/fd/2"); | |
return 0; | |
} | |
static char jail_root[] = "/tmp/ujail-XXXXXX"; | |
static char tmpovdir[] = "/tmp/ujail-overlay-XXXXXX"; | |
static mode_t old_umask; | |
static void enter_jail_fs(void); | |
static int build_jail_fs(void) | |
{ | |
char *overlaydir = NULL; | |
int ret; | |
old_umask = umask(0); | |
if (mkdtemp(jail_root) == NULL) { | |
ERROR("mkdtemp(%s) failed: %m\n", jail_root); | |
return -1; | |
} | |
if (apply_sysctl(jail_root)) { | |
ERROR("failed to apply sysctl values\n"); | |
return -1; | |
} | |
/* oldroot can't be MS_SHARED else pivot_root() fails */ | |
if (mount("none", "/", "none", MS_REC|MS_PRIVATE, NULL)) { | |
ERROR("private mount failed %m\n"); | |
return -1; | |
} | |
if (opts.extroot) { | |
if (mount(opts.extroot, jail_root, "bind", MS_BIND, NULL)) { | |
ERROR("extroot mount failed %m\n"); | |
return -1; | |
} | |
} else { | |
if (mount("tmpfs", jail_root, "tmpfs", MS_NOATIME, "mode=0755")) { | |
ERROR("tmpfs mount failed %m\n"); | |
return -1; | |
} | |
} | |
if (opts.tmpoverlaysize) { | |
char mountoptsstr[] = "mode=0755,size=XXXXXXXX"; | |
snprintf(mountoptsstr, sizeof(mountoptsstr), | |
"mode=0755,size=%s", opts.tmpoverlaysize); | |
if (mkdtemp(tmpovdir) == NULL) { | |
ERROR("mkdtemp(%s) failed: %m\n", jail_root); | |
return -1; | |
} | |
if (mount("tmpfs", tmpovdir, "tmpfs", MS_NOATIME, | |
mountoptsstr)) { | |
ERROR("failed to mount tmpfs for overlay (size=%s)\n", opts.tmpoverlaysize); | |
return -1; | |
} | |
overlaydir = tmpovdir; | |
} | |
if (opts.overlaydir) | |
overlaydir = opts.overlaydir; | |
if (overlaydir) { | |
ret = mount_overlay(jail_root, overlaydir); | |
if (ret) | |
return ret; | |
} | |
if (chdir(jail_root)) { | |
ERROR("chdir(%s) (jail_root) failed: %m\n", jail_root); | |
return -1; | |
} | |
if (mount_all(jail_root)) { | |
ERROR("mount_all() failed\n"); | |
return -1; | |
} | |
if (opts.console) | |
create_dev_console(jail_root); | |
/* make sure /etc/resolv.conf exists if in new network namespace */ | |
if (opts.namespace & CLONE_NEWNET) { | |
char jailetc[PATH_MAX], jaillink[PATH_MAX]; | |
snprintf(jailetc, PATH_MAX, "%s/etc", jail_root); | |
mkdir_p(jailetc, 0755); | |
snprintf(jaillink, PATH_MAX, "%s/etc/resolv.conf", jail_root); | |
if (overlaydir) | |
unlink(jaillink); | |
ret = symlink("../dev/resolv.conf.d/resolv.conf.auto", jaillink); | |
if (ret < 0) | |
WARNING("symlink() failed to create link to ../dev/resolv.conf.d/resolv.conf.auto"); | |
} | |
run_hooks(opts.hooks.createContainer, enter_jail_fs); | |
return 0; | |
} | |
static bool exit_from_child; | |
static void free_and_exit(int ret) | |
{ | |
if (!exit_from_child && opts.ocibundle) | |
cgroups_free(); | |
if (!exit_from_child && parent_ctx) | |
ubus_free(parent_ctx); | |
free_opts(!exit_from_child); | |
exit(ret); | |
} | |
static void post_jail_fs(void); | |
static void enter_jail_fs(void) | |
{ | |
char dirbuf[sizeof(jail_root) + 4]; | |
snprintf(dirbuf, sizeof(dirbuf), "%s/old", jail_root); | |
mkdir(dirbuf, 0755); | |
if (pivot_root(jail_root, dirbuf) == -1) { | |
ERROR("pivot_root(%s, %s) failed: %m\n", jail_root, dirbuf); | |
free_and_exit(-1); | |
} | |
if (chdir("/")) { | |
ERROR("chdir(/) (after pivot_root) failed: %m\n"); | |
free_and_exit(-1); | |
} | |
snprintf(dirbuf, sizeof(dirbuf), "/old%s", jail_root); | |
umount2(dirbuf, MNT_DETACH); | |
rmdir(dirbuf); | |
if (opts.tmpoverlaysize) { | |
char tmpdirbuf[sizeof(tmpovdir) + 4]; | |
snprintf(tmpdirbuf, sizeof(tmpdirbuf), "/old%s", tmpovdir); | |
umount2(tmpdirbuf, MNT_DETACH); | |
rmdir(tmpdirbuf); | |
} | |
umount2("/old", MNT_DETACH); | |
rmdir("/old"); | |
if (create_devices()) { | |
ERROR("create_devices() failed\n"); | |
free_and_exit(-1); | |
} | |
if (opts.ronly) | |
mount(NULL, "/", "bind", MS_REMOUNT | MS_BIND | MS_RDONLY, 0); | |
umask(old_umask); | |
post_jail_fs(); | |
} | |
static int write_uid_gid_map(pid_t child_pid, bool gidmap, char *mapstr) | |
{ | |
int map_file; | |
char map_path[64]; | |
if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s", | |
child_pid, gidmap?"gid_map":"uid_map") < 0) | |
return -1; | |
if ((map_file = open(map_path, O_WRONLY)) < 0) | |
return -1; | |
if (dprintf(map_file, "%s", mapstr)) { | |
close(map_file); | |
return -1; | |
} | |
close(map_file); | |
return 0; | |
} | |
static int write_single_uid_gid_map(pid_t child_pid, bool gidmap, int id) | |
{ | |
int map_file; | |
char map_path[64]; | |
const char *map_format = "%d %d %d\n"; | |
if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s", | |
child_pid, gidmap?"gid_map":"uid_map") < 0) | |
return -1; | |
if ((map_file = open(map_path, O_WRONLY)) < 0) | |
return -1; | |
if (dprintf(map_file, map_format, 0, id, 1) < 0) { | |
close(map_file); | |
return -1; | |
} | |
close(map_file); | |
return 0; | |
} | |
static int write_setgroups(pid_t child_pid, bool allow) | |
{ | |
int setgroups_file; | |
char setgroups_path[64]; | |
if (snprintf(setgroups_path, sizeof(setgroups_path), "/proc/%d/setgroups", | |
child_pid) < 0) { | |
return -1; | |
} | |
if ((setgroups_file = open(setgroups_path, O_WRONLY)) < 0) { | |
return -1; | |
} | |
if (dprintf(setgroups_file, "%s", allow?"allow":"deny") == -1) { | |
close(setgroups_file); | |
return -1; | |
} | |
close(setgroups_file); | |
return 0; | |
} | |
static void get_jail_user(int *user, int *user_gid, int *gr_gid) | |
{ | |
struct passwd *p = NULL; | |
struct group *g = NULL; | |
if (opts.user) { | |
p = getpwnam(opts.user); | |
if (!p) { | |
ERROR("failed to get uid/gid for user %s: %d (%s)\n", | |
opts.user, errno, strerror(errno)); | |
free_and_exit(EXIT_FAILURE); | |
} | |
*user = p->pw_uid; | |
*user_gid = p->pw_gid; | |
} else { | |
*user = -1; | |
*user_gid = -1; | |
} | |
if (opts.group) { | |
g = getgrnam(opts.group); | |
if (!g) { | |
ERROR("failed to get gid for group %s: %m\n", opts.group); | |
free_and_exit(EXIT_FAILURE); | |
} | |
*gr_gid = g->gr_gid; | |
} else { | |
*gr_gid = -1; | |
} | |
}; | |
static void set_jail_user(int pw_uid, int user_gid, int gr_gid) | |
{ | |
if (opts.user && (user_gid != -1) && initgroups(opts.user, user_gid)) { | |
ERROR("failed to initgroups() for user %s: %m\n", opts.user); | |
free_and_exit(EXIT_FAILURE); | |
} | |
if ((gr_gid != -1) && setregid(gr_gid, gr_gid)) { | |
ERROR("failed to set group id %d: %m\n", gr_gid); | |
free_and_exit(EXIT_FAILURE); | |
} | |
if ((pw_uid != -1) && setreuid(pw_uid, pw_uid)) { | |
ERROR("failed to set user id %d: %m\n", pw_uid); | |
free_and_exit(EXIT_FAILURE); | |
} | |
} | |
static int apply_rlimits(void) | |
{ | |
int resource; | |
for (resource = 0; resource < RLIM_NLIMITS; ++resource) { | |
if (opts.rlimits[resource]) | |
DEBUG("applying limits to resource %u\n", resource); | |
if (opts.rlimits[resource] && | |
setrlimit(resource, opts.rlimits[resource])) | |
return errno; | |
} | |
return 0; | |
} | |
#define MAX_ENVP 64 | |
static char** build_envp(const char *seccomp, char **ocienvp) | |
{ | |
static char *envp[MAX_ENVP]; | |
static char preload_var[PATH_MAX]; | |
static char seccomp_var[PATH_MAX]; | |
static char seccomp_debug_var[20]; | |
static char debug_var[] = "LD_DEBUG=all"; | |
static char container_var[] = "container=ujail"; | |
const char *preload_lib = find_lib("libpreload-seccomp.so"); | |
char **addenv; | |
int count = 0; | |
if (seccomp && !preload_lib) { | |
ERROR("failed to add preload-lib to env\n"); | |
return NULL; | |
} | |
if (seccomp) { | |
snprintf(seccomp_var, sizeof(seccomp_var), "SECCOMP_FILE=%s", seccomp); | |
envp[count++] = seccomp_var; | |
snprintf(seccomp_debug_var, sizeof(seccomp_debug_var), "SECCOMP_DEBUG=%2d", debug); | |
envp[count++] = seccomp_debug_var; | |
snprintf(preload_var, sizeof(preload_var), "LD_PRELOAD=%s", preload_lib); | |
envp[count++] = preload_var; | |
} | |
envp[count++] = container_var; | |
if (debug > 1) | |
envp[count++] = debug_var; | |
addenv = ocienvp; | |
while (addenv && *addenv) { | |
envp[count++] = *(addenv++); | |
if (count >= MAX_ENVP) { | |
ERROR("environment limited to %d extra records, truncating\n", MAX_ENVP); | |
break; | |
} | |
} | |
return envp; | |
} | |
static void usage(void) | |
{ | |
fprintf(stderr, "ujail <options> -- <binary> <params ...>\n"); | |
fprintf(stderr, " -d <num>\tshow debug log (increase num to increase verbosity)\n"); | |
fprintf(stderr, " -S <file>\tseccomp filter config\n"); | |
fprintf(stderr, " -C <file>\tcapabilities drop config\n"); | |
fprintf(stderr, " -c\t\tset PR_SET_NO_NEW_PRIVS\n"); | |
fprintf(stderr, " -n <name>\tthe name of the jail\n"); | |
fprintf(stderr, " -e <var>\timport environment variable\n"); | |
fprintf(stderr, "namespace jail options:\n"); | |
fprintf(stderr, " -h <hostname>\tchange the hostname of the jail\n"); | |
fprintf(stderr, " -N\t\tjail has network namespace\n"); | |
fprintf(stderr, " -f\t\tjail has user namespace\n"); | |
fprintf(stderr, " -F\t\tjail has cgroups namespace\n"); | |
fprintf(stderr, " -r <file>\treadonly files that should be staged\n"); | |
fprintf(stderr, " -w <file>\twriteable files that should be staged\n"); | |
fprintf(stderr, " -p\t\tjail has /proc\n"); | |
fprintf(stderr, " -s\t\tjail has /sys\n"); | |
fprintf(stderr, " -l\t\tjail has /dev/log\n"); | |
fprintf(stderr, " -u\t\tjail has a ubus socket\n"); | |
fprintf(stderr, " -U <name>\tuser to run jailed process\n"); | |
fprintf(stderr, " -G <name>\tgroup to run jailed process\n"); | |
fprintf(stderr, " -o\t\tremont jail root (/) read only\n"); | |
fprintf(stderr, " -R <dir>\texternal jail rootfs (system container)\n"); | |
fprintf(stderr, " -O <dir>\tdirectory for r/w overlayfs\n"); | |
fprintf(stderr, " -T <size>\tuse tmpfs r/w overlayfs with <size>\n"); | |
fprintf(stderr, " -E\t\tfail if jail cannot be setup\n"); | |
fprintf(stderr, " -y\t\tprovide jail console\n"); | |
fprintf(stderr, " -J <dir>\tcreate container from OCI bundle\n"); | |
fprintf(stderr, " -I <infra>\tshare namespace with another container\n"); | |
fprintf(stderr, " -i\t\tstart container immediately\n"); | |
fprintf(stderr, " -P <pidfile>\tcreate <pidfile>\n"); | |
fprintf(stderr, "\nWarning: by default root inside the jail is the same\n\ | |
and he has the same powers as root outside the jail,\n\ | |
thus he can escape the jail and/or break stuff.\n\ | |
Please use seccomp/capabilities (-S/-C) to restrict his powers\n\n\ | |
If you use none of the namespace jail options,\n\ | |
ujail will not use namespace/build a jail,\n\ | |
and will only drop capabilities/apply seccomp filter.\n\n"); | |
} | |
static int* get_namespace_fd(const unsigned int nstype) | |
{ | |
switch (nstype) { | |
case CLONE_NEWPID: | |
return &opts.setns.pid; | |
case CLONE_NEWNET: | |
return &opts.setns.net; | |
case CLONE_NEWNS: | |
return &opts.setns.ns; | |
case CLONE_NEWIPC: | |
return &opts.setns.ipc; | |
case CLONE_NEWUTS: | |
return &opts.setns.uts; | |
case CLONE_NEWUSER: | |
return &opts.setns.user; | |
case CLONE_NEWCGROUP: | |
return &opts.setns.cgroup; | |
#ifdef CLONE_NEWTIME | |
case CLONE_NEWTIME: | |
return &opts.setns.time; | |
#endif | |
default: | |
return NULL; | |
} | |
} | |
static int setns_open(unsigned long nstype) | |
{ | |
int *fd = get_namespace_fd(nstype); | |
assert(fd != NULL); | |
if (*fd < 0) | |
return 0; | |
if (setns(*fd, nstype) == -1) { | |
close(*fd); | |
return errno; | |
} | |
close(*fd); | |
return 0; | |
} | |
static int jail_running = 0; | |
static int jail_return_code = 0; | |
static void jail_process_timeout_cb(struct uloop_timeout *t); | |
static struct uloop_timeout jail_process_timeout = { | |
.cb = jail_process_timeout_cb, | |
}; | |
static void poststop(void); | |
static void jail_process_handler(struct uloop_process *c, int ret) | |
{ | |
uloop_timeout_cancel(&jail_process_timeout); | |
if (WIFEXITED(ret)) { | |
jail_return_code = WEXITSTATUS(ret); | |
INFO("jail (%d) exited with exit: %d\n", c->pid, jail_return_code); | |
} else { | |
jail_return_code = WTERMSIG(ret); | |
INFO("jail (%d) exited with signal: %d\n", c->pid, jail_return_code); | |
} | |
jail_running = 0; | |
poststop(); | |
} | |
static struct uloop_process jail_process = { | |
.cb = jail_process_handler, | |
}; | |
static void jail_process_timeout_cb(struct uloop_timeout *t) | |
{ | |
DEBUG("jail process failed to stop, sending SIGKILL\n"); | |
kill(jail_process.pid, SIGKILL); | |
} | |
static void jail_handle_signal(int signo) | |
{ | |
if (hook_running) { | |
DEBUG("forwarding signal %d to the hook process\n", signo); | |
kill(hook_process.pid, signo); | |
/* set timeout to send SIGKILL hook process in case SIGTERM doesn't succeed */ | |
if (signo == SIGTERM) | |
uloop_timeout_set(&hook_process_timeout, opts.term_timeout * 1000); | |
} | |
if (jail_running) { | |
DEBUG("forwarding signal %d to the jailed process\n", signo); | |
kill(jail_process.pid, signo); | |
/* set timeout to send SIGKILL jail process in case SIGTERM doesn't succeed */ | |
if (signo == SIGTERM) | |
uloop_timeout_set(&jail_process_timeout, opts.term_timeout * 1000); | |
} | |
} | |
static void signals_init(void) | |
{ | |
int i; | |
sigset_t sigmask; | |
sigfillset(&sigmask); | |
for (i = 0; i < _NSIG; i++) { | |
struct sigaction s = { 0 }; | |
if (!sigismember(&sigmask, i)) | |
continue; | |
if ((i == SIGCHLD) || (i == SIGPIPE) || (i == SIGSEGV) || (i == SIGSTOP) || (i == SIGKILL)) | |
continue; | |
s.sa_handler = jail_handle_signal; | |
sigaction(i, &s, NULL); | |
} | |
} | |
static void pre_exec_jail(struct uloop_timeout *t); | |
static struct uloop_timeout pre_exec_timeout = { | |
.cb = pre_exec_jail, | |
}; | |
int pipes[4]; | |
static int exec_jail(void *arg) | |
{ | |
char buf[1]; | |
exit_from_child = true; | |
prctl(PR_SET_SECUREBITS, 0); | |
uloop_init(); | |
signals_init(); | |
close(pipes[0]); | |
close(pipes[3]); | |
setns_open(CLONE_NEWUSER); | |
setns_open(CLONE_NEWNET); | |
setns_open(CLONE_NEWNS); | |
setns_open(CLONE_NEWIPC); | |
setns_open(CLONE_NEWUTS); | |
buf[0] = 'i'; | |
if (write(pipes[1], buf, 1) < 1) { | |
ERROR("can't write to parent\n"); | |
return EXIT_FAILURE; | |
} | |
close(pipes[1]); | |
if (read(pipes[2], buf, 1) < 1) { | |
ERROR("can't read from parent\n"); | |
return EXIT_FAILURE; | |
} | |
if (buf[0] != 'O') { | |
ERROR("parent had an error, child exiting\n"); | |
return EXIT_FAILURE; | |
} | |
if (opts.namespace & CLONE_NEWCGROUP) | |
unshare(CLONE_NEWCGROUP); | |
setns_open(CLONE_NEWCGROUP); | |
if ((opts.namespace & CLONE_NEWUSER) || (opts.setns.user != -1)) { | |
if (setregid(0, 0) < 0) { | |
ERROR("setgid\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
if (setreuid(0, 0) < 0) { | |
ERROR("setuid\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
if (setgroups(0, NULL) < 0) { | |
ERROR("setgroups\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
} | |
if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0 | |
&& sethostname(opts.hostname, strlen(opts.hostname))) { | |
ERROR("sethostname(%s) failed: %m\n", opts.hostname); | |
free_and_exit(EXIT_FAILURE); | |
} | |
uloop_timeout_add(&pre_exec_timeout); | |
uloop_run(); | |
free_and_exit(-1); | |
return -1; | |
} | |
static void pre_exec_jail(struct uloop_timeout *t) | |
{ | |
if ((opts.namespace & CLONE_NEWNS) && build_jail_fs()) { | |
ERROR("failed to build jail fs\n"); | |
free_and_exit(EXIT_FAILURE); | |
} else { | |
run_hooks(opts.hooks.createContainer, post_jail_fs); | |
} | |
} | |
static void post_start_hook(void); | |
static void post_jail_fs(void) | |
{ | |
char buf[1]; | |
if (read(pipes[2], buf, 1) < 1) { | |
ERROR("can't read from parent\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
if (buf[0] != '!') { | |
ERROR("parent had an error, child exiting\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
close(pipes[2]); | |
run_hooks(opts.hooks.startContainer, post_start_hook); | |
} | |
static void post_start_hook(void) | |
{ | |
int pw_uid, pw_gid, gr_gid; | |
/* | |
* make sure setuid/setgid won't drop capabilities in case capabilities | |
* have been specified explicitely. | |
*/ | |
if (opts.capset.apply) { | |
if (prctl(PR_SET_SECUREBITS, SECBIT_NO_SETUID_FIXUP)) { | |
ERROR("prctl(PR_SET_SECUREBITS) failed: %m\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
} | |
/* drop capabilities, retain those still needed to further setup jail */ | |
if (applyOCIcapabilities(opts.capset, (1LLU << CAP_SETGID) | (1LLU << CAP_SETUID) | (1LLU << CAP_SETPCAP))) | |
free_and_exit(EXIT_FAILURE); | |
/* use either cmdline-supplied user/group or uid/gid from OCI spec */ | |
get_jail_user(&pw_uid, &pw_gid, &gr_gid); | |
set_jail_user(opts.pw_uid?:pw_uid, opts.pw_gid?:pw_gid, opts.gr_gid?:gr_gid); | |
if (opts.additional_gids && | |
(setgroups(opts.num_additional_gids, opts.additional_gids) < 0)) { | |
ERROR("setgroups failed: %m\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
if (opts.set_umask) | |
umask(opts.umask); | |
/* restore securebits back to normal (and lock them if not in userns) */ | |
if (opts.capset.apply) { | |
if (prctl(PR_SET_SECUREBITS, (opts.namespace & CLONE_NEWUSER)?0: | |
SECBIT_KEEP_CAPS_LOCKED|SECBIT_NO_SETUID_FIXUP_LOCKED|SECBIT_NOROOT_LOCKED)) { | |
ERROR("prctl(PR_SET_SECUREBITS) failed: %m\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
} | |
/* drop remaining capabilities to end up with specified sets */ | |
if (applyOCIcapabilities(opts.capset, 0)) | |
free_and_exit(EXIT_FAILURE); | |
if (opts.no_new_privs && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { | |
ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
char **envp = build_envp(opts.seccomp, opts.envp); | |
if (!envp) | |
free_and_exit(EXIT_FAILURE); | |
if (opts.cwd && chdir(opts.cwd)) | |
free_and_exit(EXIT_FAILURE); | |
if (opts.ociseccomp && applyOCIlinuxseccomp(opts.ociseccomp)) | |
free_and_exit(EXIT_FAILURE); | |
uloop_end(); | |
free_opts(false); | |
INFO("exec-ing %s\n", *opts.jail_argv); | |
if (opts.envp) /* respect PATH if potentially set in ENV */ | |
execvpe(*opts.jail_argv, opts.jail_argv, envp); | |
else | |
execve(*opts.jail_argv, opts.jail_argv, envp); | |
/* we get there only if execve fails */ | |
ERROR("failed to execve %s: %m\n", *opts.jail_argv); | |
exit(EXIT_FAILURE); | |
} | |
int ns_open_pid(const char *nstype, const pid_t target_ns) | |
{ | |
char pid_pid_path[PATH_MAX]; | |
snprintf(pid_pid_path, sizeof(pid_pid_path), "/proc/%u/ns/%s", target_ns, nstype); | |
return open(pid_pid_path, O_RDONLY); | |
} | |
static int parseOCIenvarray(struct blob_attr *msg, char ***envp) | |
{ | |
struct blob_attr *cur; | |
int sz = 0, rem; | |
blobmsg_for_each_attr(cur, msg, rem) | |
++sz; | |
if (sz > 0) { | |
*envp = calloc(1 + sz, sizeof(char*)); | |
if (!(*envp)) | |
return ENOMEM; | |
} else { | |
*envp = NULL; | |
return 0; | |
} | |
sz = 0; | |
blobmsg_for_each_attr(cur, msg, rem) | |
(*envp)[sz++] = strdup(blobmsg_get_string(cur)); | |
if (sz) | |
(*envp)[sz] = NULL; | |
return 0; | |
} | |
enum { | |
OCI_ROOT_PATH, | |
OCI_ROOT_READONLY, | |
__OCI_ROOT_MAX, | |
}; | |
static const struct blobmsg_policy oci_root_policy[] = { | |
[OCI_ROOT_PATH] = { "path", BLOBMSG_TYPE_STRING }, | |
[OCI_ROOT_READONLY] = { "readonly", BLOBMSG_TYPE_BOOL }, | |
}; | |
static int parseOCIroot(const char *jsonfile, struct blob_attr *msg) | |
{ | |
char extroot[PATH_MAX] = { 0 }; | |
struct blob_attr *tb[__OCI_ROOT_MAX]; | |
char *cur; | |
char *root_path; | |
blobmsg_parse(oci_root_policy, __OCI_ROOT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); | |
if (!tb[OCI_ROOT_PATH]) | |
return ENODATA; | |
root_path = blobmsg_get_string(tb[OCI_ROOT_PATH]); | |
/* prepend bundle directory in case of relative paths */ | |
if (root_path[0] != '/') { | |
strncpy(extroot, jsonfile, PATH_MAX - 1); | |
cur = strrchr(extroot, '/'); | |
if (!cur) | |
return ENOTDIR; | |
*(++cur) = '\0'; | |
} | |
strncat(extroot, root_path, PATH_MAX - (strlen(extroot) + 1)); | |
/* follow symbolic link(s) */ | |
opts.extroot = realpath(extroot, NULL); | |
if (!opts.extroot) | |
return errno; | |
if (tb[OCI_ROOT_READONLY]) | |
opts.ronly = blobmsg_get_bool(tb[OCI_ROOT_READONLY]); | |
return 0; | |
} | |
enum { | |
OCI_HOOK_PATH, | |
OCI_HOOK_ARGS, | |
OCI_HOOK_ENV, | |
OCI_HOOK_TIMEOUT, | |
__OCI_HOOK_MAX, | |
}; | |
static const struct blobmsg_policy oci_hook_policy[] = { | |
[OCI_HOOK_PATH] = { "path", BLOBMSG_TYPE_STRING }, | |
[OCI_HOOK_ARGS] = { "args", BLOBMSG_TYPE_ARRAY }, | |
[OCI_HOOK_ENV] = { "env", BLOBMSG_TYPE_ARRAY }, | |
[OCI_HOOK_TIMEOUT] = { "timeout", BLOBMSG_TYPE_INT32 }, | |
}; | |
static int parseOCIhook(struct hook_execvpe ***hooklist, struct blob_attr *msg) | |
{ | |
struct blob_attr *tb[__OCI_HOOK_MAX]; | |
struct blob_attr *cur; | |
int rem, ret = 0; | |
int idx = 0; | |
blobmsg_for_each_attr(cur, msg, rem) | |
++idx; | |
if (!idx) | |
return 0; | |
*hooklist = calloc(idx + 1, sizeof(struct hook_execvpe *)); | |
idx = 0; | |
if (!(*hooklist)) | |
return ENOMEM; | |
blobmsg_for_each_attr(cur, msg, rem) { | |
blobmsg_parse(oci_hook_policy, __OCI_HOOK_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[OCI_HOOK_PATH]) { | |
ret = EINVAL; | |
goto errout; | |
} | |
(*hooklist)[idx] = calloc(1, sizeof(struct hook_execvpe)); | |
if (tb[OCI_HOOK_ARGS]) { | |
ret = parseOCIenvarray(tb[OCI_HOOK_ARGS], &((*hooklist)[idx]->argv)); | |
if (ret) | |
goto errout; | |
} else { | |
(*hooklist)[idx]->argv = calloc(2, sizeof(char *)); | |
((*hooklist)[idx]->argv)[0] = strdup(blobmsg_get_string(tb[OCI_HOOK_PATH])); | |
((*hooklist)[idx]->argv)[1] = NULL; | |
}; | |
if (tb[OCI_HOOK_ENV]) { | |
ret = parseOCIenvarray(tb[OCI_HOOK_ENV], &((*hooklist)[idx]->envp)); | |
if (ret) | |
goto errout; | |
} | |
if (tb[OCI_HOOK_TIMEOUT]) | |
(*hooklist)[idx]->timeout = blobmsg_get_u32(tb[OCI_HOOK_TIMEOUT]); | |
(*hooklist)[idx]->file = strdup(blobmsg_get_string(tb[OCI_HOOK_PATH])); | |
++idx; | |
} | |
(*hooklist)[idx] = NULL; | |
DEBUG("added %d hooks\n", idx); | |
return 0; | |
errout: | |
free_hooklist(*hooklist); | |
*hooklist = NULL; | |
return ret; | |
}; | |
enum { | |
OCI_HOOKS_PRESTART, | |
OCI_HOOKS_CREATERUNTIME, | |
OCI_HOOKS_CREATECONTAINER, | |
OCI_HOOKS_STARTCONTAINER, | |
OCI_HOOKS_POSTSTART, | |
OCI_HOOKS_POSTSTOP, | |
__OCI_HOOKS_MAX, | |
}; | |
static const struct blobmsg_policy oci_hooks_policy[] = { | |
[OCI_HOOKS_PRESTART] = { "prestart", BLOBMSG_TYPE_ARRAY }, | |
[OCI_HOOKS_CREATERUNTIME] = { "createRuntime", BLOBMSG_TYPE_ARRAY }, | |
[OCI_HOOKS_CREATECONTAINER] = { "createContainer", BLOBMSG_TYPE_ARRAY }, | |
[OCI_HOOKS_STARTCONTAINER] = { "startContainer", BLOBMSG_TYPE_ARRAY }, | |
[OCI_HOOKS_POSTSTART] = { "poststart", BLOBMSG_TYPE_ARRAY }, | |
[OCI_HOOKS_POSTSTOP] = { "poststop", BLOBMSG_TYPE_ARRAY }, | |
}; | |
static int parseOCIhooks(struct blob_attr *msg) | |
{ | |
struct blob_attr *tb[__OCI_HOOKS_MAX]; | |
int ret; | |
blobmsg_parse(oci_hooks_policy, __OCI_HOOKS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); | |
if (tb[OCI_HOOKS_PRESTART]) | |
INFO("warning: ignoring deprecated prestart hook\n"); | |
if (tb[OCI_HOOKS_CREATERUNTIME]) { | |
ret = parseOCIhook(&opts.hooks.createRuntime, tb[OCI_HOOKS_CREATERUNTIME]); | |
if (ret) | |
return ret; | |
} | |
if (tb[OCI_HOOKS_CREATECONTAINER]) { | |
ret = parseOCIhook(&opts.hooks.createContainer, tb[OCI_HOOKS_CREATECONTAINER]); | |
if (ret) | |
goto out_createruntime; | |
} | |
if (tb[OCI_HOOKS_STARTCONTAINER]) { | |
ret = parseOCIhook(&opts.hooks.startContainer, tb[OCI_HOOKS_STARTCONTAINER]); | |
if (ret) | |
goto out_createcontainer; | |
} | |
if (tb[OCI_HOOKS_POSTSTART]) { | |
ret = parseOCIhook(&opts.hooks.poststart, tb[OCI_HOOKS_POSTSTART]); | |
if (ret) | |
goto out_startcontainer; | |
} | |
if (tb[OCI_HOOKS_POSTSTOP]) { | |
ret = parseOCIhook(&opts.hooks.poststop, tb[OCI_HOOKS_POSTSTOP]); | |
if (ret) | |
goto out_poststart; | |
} | |
return 0; | |
out_poststart: | |
free_hooklist(opts.hooks.poststart); | |
out_startcontainer: | |
free_hooklist(opts.hooks.startContainer); | |
out_createcontainer: | |
free_hooklist(opts.hooks.createContainer); | |
out_createruntime: | |
free_hooklist(opts.hooks.createRuntime); | |
return ret; | |
}; | |
enum { | |
OCI_PROCESS_USER_UID, | |
OCI_PROCESS_USER_GID, | |
OCI_PROCESS_USER_UMASK, | |
OCI_PROCESS_USER_ADDITIONALGIDS, | |
__OCI_PROCESS_USER_MAX, | |
}; | |
static const struct blobmsg_policy oci_process_user_policy[] = { | |
[OCI_PROCESS_USER_UID] = { "uid", BLOBMSG_TYPE_INT32 }, | |
[OCI_PROCESS_USER_GID] = { "gid", BLOBMSG_TYPE_INT32 }, | |
[OCI_PROCESS_USER_UMASK] = { "umask", BLOBMSG_TYPE_INT32 }, | |
[OCI_PROCESS_USER_ADDITIONALGIDS] = { "additionalGids", BLOBMSG_TYPE_ARRAY }, | |
}; | |
static int parseOCIprocessuser(struct blob_attr *msg) { | |
struct blob_attr *tb[__OCI_PROCESS_USER_MAX]; | |
struct blob_attr *cur; | |
int rem; | |
int has_gid = 0; | |
blobmsg_parse(oci_process_user_policy, __OCI_PROCESS_USER_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); | |
if (tb[OCI_PROCESS_USER_UID]) | |
opts.pw_uid = blobmsg_get_u32(tb[OCI_PROCESS_USER_UID]); | |
if (tb[OCI_PROCESS_USER_GID]) { | |
opts.pw_gid = blobmsg_get_u32(tb[OCI_PROCESS_USER_GID]); | |
opts.gr_gid = blobmsg_get_u32(tb[OCI_PROCESS_USER_GID]); | |
has_gid = 1; | |
} | |
if (tb[OCI_PROCESS_USER_ADDITIONALGIDS]) { | |
size_t gidcnt = 0; | |
blobmsg_for_each_attr(cur, tb[OCI_PROCESS_USER_ADDITIONALGIDS], rem) { | |
++gidcnt; | |
if (has_gid && (blobmsg_get_u32(cur) == opts.gr_gid)) | |
continue; | |
} | |
if (gidcnt) { | |
opts.additional_gids = calloc(gidcnt + has_gid, sizeof(gid_t)); | |
gidcnt = 0; | |
/* always add primary GID to set of GIDs if set */ | |
if (has_gid) | |
opts.additional_gids[gidcnt++] = opts.gr_gid; | |
blobmsg_for_each_attr(cur, tb[OCI_PROCESS_USER_ADDITIONALGIDS], rem) { | |
if (has_gid && (blobmsg_get_u32(cur) == opts.gr_gid)) | |
continue; | |
opts.additional_gids[gidcnt++] = blobmsg_get_u32(cur); | |
} | |
opts.num_additional_gids = gidcnt; | |
} | |
DEBUG("read %zu additional groups\n", gidcnt); | |
} | |
if (tb[OCI_PROCESS_USER_UMASK]) { | |
opts.umask = blobmsg_get_u32(tb[OCI_PROCESS_USER_UMASK]); | |
opts.set_umask = true; | |
} | |
return 0; | |
} | |
enum { | |
OCI_PROCESS_RLIMIT_TYPE, | |
OCI_PROCESS_RLIMIT_SOFT, | |
OCI_PROCESS_RLIMIT_HARD, | |
__OCI_PROCESS_RLIMIT_MAX, | |
}; | |
static const struct blobmsg_policy oci_process_rlimit_policy[] = { | |
[OCI_PROCESS_RLIMIT_TYPE] = { "type", BLOBMSG_TYPE_STRING }, | |
[OCI_PROCESS_RLIMIT_SOFT] = { "soft", BLOBMSG_CAST_INT64 }, | |
[OCI_PROCESS_RLIMIT_HARD] = { "hard", BLOBMSG_CAST_INT64 }, | |
}; | |
/* from manpage GETRLIMIT(2) */ | |
static const char* const rlimit_names[RLIM_NLIMITS] = { | |
[RLIMIT_AS] = "AS", | |
[RLIMIT_CORE] = "CORE", | |
[RLIMIT_CPU] = "CPU", | |
[RLIMIT_DATA] = "DATA", | |
[RLIMIT_FSIZE] = "FSIZE", | |
[RLIMIT_LOCKS] = "LOCKS", | |
[RLIMIT_MEMLOCK] = "MEMLOCK", | |
[RLIMIT_MSGQUEUE] = "MSGQUEUE", | |
[RLIMIT_NICE] = "NICE", | |
[RLIMIT_NOFILE] = "NOFILE", | |
[RLIMIT_NPROC] = "NPROC", | |
[RLIMIT_RSS] = "RSS", | |
[RLIMIT_RTPRIO] = "RTPRIO", | |
[RLIMIT_RTTIME] = "RTTIME", | |
[RLIMIT_SIGPENDING] = "SIGPENDING", | |
[RLIMIT_STACK] = "STACK", | |
}; | |
static int resolve_rlimit(char *type) { | |
unsigned int rltype; | |
for (rltype = 0; rltype < RLIM_NLIMITS; ++rltype) | |
if (rlimit_names[rltype] && | |
!strncmp("RLIMIT_", type, 7) && | |
!strcmp(rlimit_names[rltype], type + 7)) | |
return rltype; | |
return -1; | |
} | |
static int parseOCIrlimit(struct blob_attr *msg) | |
{ | |
struct blob_attr *tb[__OCI_PROCESS_RLIMIT_MAX]; | |
int limtype = -1; | |
struct rlimit *curlim; | |
blobmsg_parse(oci_process_rlimit_policy, __OCI_PROCESS_RLIMIT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); | |
if (!tb[OCI_PROCESS_RLIMIT_TYPE] || | |
!tb[OCI_PROCESS_RLIMIT_SOFT] || | |
!tb[OCI_PROCESS_RLIMIT_HARD]) | |
return ENODATA; | |
limtype = resolve_rlimit(blobmsg_get_string(tb[OCI_PROCESS_RLIMIT_TYPE])); | |
if (limtype < 0) | |
return EINVAL; | |
if (opts.rlimits[limtype]) | |
return ENOTUNIQ; | |
curlim = malloc(sizeof(struct rlimit)); | |
curlim->rlim_cur = blobmsg_cast_u64(tb[OCI_PROCESS_RLIMIT_SOFT]); | |
curlim->rlim_max = blobmsg_cast_u64(tb[OCI_PROCESS_RLIMIT_HARD]); | |
opts.rlimits[limtype] = curlim; | |
return 0; | |
}; | |
enum { | |
OCI_PROCESS_ARGS, | |
OCI_PROCESS_CAPABILITIES, | |
OCI_PROCESS_CWD, | |
OCI_PROCESS_ENV, | |
OCI_PROCESS_OOMSCOREADJ, | |
OCI_PROCESS_NONEWPRIVILEGES, | |
OCI_PROCESS_RLIMITS, | |
OCI_PROCESS_TERMINAL, | |
OCI_PROCESS_USER, | |
__OCI_PROCESS_MAX, | |
}; | |
static const struct blobmsg_policy oci_process_policy[] = { | |
[OCI_PROCESS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY }, | |
[OCI_PROCESS_CAPABILITIES] = { "capabilities", BLOBMSG_TYPE_TABLE }, | |
[OCI_PROCESS_CWD] = { "cwd", BLOBMSG_TYPE_STRING }, | |
[OCI_PROCESS_ENV] = { "env", BLOBMSG_TYPE_ARRAY }, | |
[OCI_PROCESS_OOMSCOREADJ] = { "oomScoreAdj", BLOBMSG_TYPE_INT32 }, | |
[OCI_PROCESS_NONEWPRIVILEGES] = { "noNewPrivileges", BLOBMSG_TYPE_BOOL }, | |
[OCI_PROCESS_RLIMITS] = { "rlimits", BLOBMSG_TYPE_ARRAY }, | |
[OCI_PROCESS_TERMINAL] = { "terminal", BLOBMSG_TYPE_BOOL }, | |
[OCI_PROCESS_USER] = { "user", BLOBMSG_TYPE_TABLE }, | |
}; | |
static int parseOCIprocess(struct blob_attr *msg) | |
{ | |
struct blob_attr *tb[__OCI_PROCESS_MAX], *cur; | |
int rem, res; | |
blobmsg_parse(oci_process_policy, __OCI_PROCESS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); | |
if (!tb[OCI_PROCESS_ARGS]) | |
return ENOENT; | |
res = parseOCIenvarray(tb[OCI_PROCESS_ARGS], &opts.jail_argv); | |
if (res) | |
return res; | |
if (tb[OCI_PROCESS_TERMINAL]) | |
opts.console = blobmsg_get_bool(tb[OCI_PROCESS_TERMINAL]); | |
if (tb[OCI_PROCESS_NONEWPRIVILEGES]) | |
opts.no_new_privs = blobmsg_get_bool(tb[OCI_PROCESS_NONEWPRIVILEGES]); | |
if (tb[OCI_PROCESS_CWD]) | |
opts.cwd = strdup(blobmsg_get_string(tb[OCI_PROCESS_CWD])); | |
if (tb[OCI_PROCESS_ENV]) { | |
res = parseOCIenvarray(tb[OCI_PROCESS_ENV], &opts.envp); | |
if (res) | |
return res; | |
} | |
if (tb[OCI_PROCESS_USER] && (res = parseOCIprocessuser(tb[OCI_PROCESS_USER]))) | |
return res; | |
if (tb[OCI_PROCESS_CAPABILITIES] && | |
(res = parseOCIcapabilities(&opts.capset, tb[OCI_PROCESS_CAPABILITIES]))) | |
return res; | |
if (tb[OCI_PROCESS_RLIMITS]) { | |
blobmsg_for_each_attr(cur, tb[OCI_PROCESS_RLIMITS], rem) { | |
res = parseOCIrlimit(cur); | |
if (res) | |
return res; | |
} | |
} | |
if (tb[OCI_PROCESS_OOMSCOREADJ]) { | |
opts.oom_score_adj = blobmsg_get_u32(tb[OCI_PROCESS_OOMSCOREADJ]); | |
opts.set_oom_score_adj = true; | |
} | |
return 0; | |
} | |
enum { | |
OCI_LINUX_NAMESPACE_TYPE, | |
OCI_LINUX_NAMESPACE_PATH, | |
__OCI_LINUX_NAMESPACE_MAX, | |
}; | |
static const struct blobmsg_policy oci_linux_namespace_policy[] = { | |
[OCI_LINUX_NAMESPACE_TYPE] = { "type", BLOBMSG_TYPE_STRING }, | |
[OCI_LINUX_NAMESPACE_PATH] = { "path", BLOBMSG_TYPE_STRING }, | |
}; | |
static int resolve_nstype(char *type) { | |
if (!strcmp("pid", type)) | |
return CLONE_NEWPID; | |
else if (!strcmp("network", type)) | |
return CLONE_NEWNET; | |
else if (!strcmp("net", type)) | |
return CLONE_NEWNET; | |
else if (!strcmp("mount", type)) | |
return CLONE_NEWNS; | |
else if (!strcmp("ipc", type)) | |
return CLONE_NEWIPC; | |
else if (!strcmp("uts", type)) | |
return CLONE_NEWUTS; | |
else if (!strcmp("user", type)) | |
return CLONE_NEWUSER; | |
else if (!strcmp("cgroup", type)) | |
return CLONE_NEWCGROUP; | |
#ifdef CLONE_NEWTIME | |
else if (!strcmp("time", type)) | |
return CLONE_NEWTIME; | |
#endif | |
else | |
return 0; | |
} | |
static int parseOCIlinuxns(struct blob_attr *msg) | |
{ | |
struct blob_attr *tb[__OCI_LINUX_NAMESPACE_MAX]; | |
int nstype; | |
int *setns; | |
int fd; | |
blobmsg_parse(oci_linux_namespace_policy, __OCI_LINUX_NAMESPACE_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); | |
if (!tb[OCI_LINUX_NAMESPACE_TYPE]) | |
return EINVAL; | |
nstype = resolve_nstype(blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE])); | |
if (!nstype) | |
return EINVAL; | |
if (opts.namespace & nstype) | |
return ENOTUNIQ; | |
setns = get_namespace_fd(nstype); | |
if (!setns) | |
return EFAULT; | |
if (*setns != -1) | |
return ENOTUNIQ; | |
if (tb[OCI_LINUX_NAMESPACE_PATH]) { | |
DEBUG("opening existing %s namespace from path %s\n", | |
blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE]), | |
blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_PATH])); | |
fd = open(blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_PATH]), O_RDONLY); | |
if (fd < 0) | |
return errno?:ESTALE; | |
if (ioctl(fd, NS_GET_NSTYPE) != nstype) { | |
close(fd); | |
return EINVAL; | |
} | |
DEBUG("opened existing %s namespace got filehandler %u\n", | |
blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE]), | |
fd); | |
*setns = fd; | |
} else { | |
opts.namespace |= nstype; | |
} | |
return 0; | |
} | |
/* | |
* join namespace of existing PID | |
* The string argument is the reference PID followed by ':' and a | |
* ',' separated list of namespaces to to join. | |
*/ | |
static int jail_join_ns(char *arg) | |
{ | |
pid_t pid; | |
int fd; | |
int nstype; | |
char *tmp, *etmp, *nspath; | |
int *setns; | |
tmp = strchr(arg, ':'); | |
if (!tmp) | |
return EINVAL; | |
*tmp = '\0'; | |
pid = atoi(arg); | |
do { | |
++tmp; | |
etmp = strchr(tmp, ','); | |
if (etmp) | |
*etmp = '\0'; | |
nstype = resolve_nstype(tmp); | |
if (!nstype) | |
return EINVAL; | |
if (opts.namespace & nstype) | |
return ENOTUNIQ; | |
setns = get_namespace_fd(nstype); | |
if (!setns) | |
return EFAULT; | |
if (*setns != -1) | |
return ENOTUNIQ; | |
if (asprintf(&nspath, "/proc/%d/ns/%s", pid, tmp) < 0) | |
return ENOMEM; | |
fd = open(nspath, O_RDONLY); | |
free(nspath); | |
if (fd < 0) | |
return errno?:ESTALE; | |
*setns = fd; | |
if (etmp) | |
tmp = etmp; | |
else | |
tmp = NULL; | |
} while (tmp); | |
return 0; | |
} | |
static void get_jail_root_user(bool is_gidmap, uint32_t container_id, uint32_t host_id, uint32_t size) | |
{ | |
if (container_id == 0 && size >= 1) | |
if (!is_gidmap) | |
opts.root_map_uid = host_id; | |
} | |
enum { | |
OCI_LINUX_UIDGIDMAP_CONTAINERID, | |
OCI_LINUX_UIDGIDMAP_HOSTID, | |
OCI_LINUX_UIDGIDMAP_SIZE, | |
__OCI_LINUX_UIDGIDMAP_MAX, | |
}; | |
static const struct blobmsg_policy oci_linux_uidgidmap_policy[] = { | |
[OCI_LINUX_UIDGIDMAP_CONTAINERID] = { "containerID", BLOBMSG_TYPE_INT32 }, | |
[OCI_LINUX_UIDGIDMAP_HOSTID] = { "hostID", BLOBMSG_TYPE_INT32 }, | |
[OCI_LINUX_UIDGIDMAP_SIZE] = { "size", BLOBMSG_TYPE_INT32 }, | |
}; | |
static int parseOCIuidgidmappings(struct blob_attr *msg, bool is_gidmap) | |
{ | |
struct blob_attr *tb[__OCI_LINUX_UIDGIDMAP_MAX]; | |
struct blob_attr *cur; | |
int rem; | |
char *map; | |
size_t len, pos, totallen = 0; | |
blobmsg_for_each_attr(cur, msg, rem) { | |
blobmsg_parse(oci_linux_uidgidmap_policy, __OCI_LINUX_UIDGIDMAP_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[OCI_LINUX_UIDGIDMAP_CONTAINERID] || | |
!tb[OCI_LINUX_UIDGIDMAP_HOSTID] || | |
!tb[OCI_LINUX_UIDGIDMAP_SIZE]) | |
return EINVAL; | |
/* count length */ | |
totallen += snprintf(NULL, 0, "%d %d %d\n", | |
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]), | |
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]), | |
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE])); | |
} | |
/* allocate combined mapping string */ | |
map = malloc(totallen + 1); | |
if (!map) | |
return ENOMEM; | |
pos = 0; | |
blobmsg_for_each_attr(cur, msg, rem) { | |
blobmsg_parse(oci_linux_uidgidmap_policy, __OCI_LINUX_UIDGIDMAP_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
get_jail_root_user(is_gidmap, blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]), | |
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]), | |
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE])); | |
/* write mapping line into pre-allocated string */ | |
len = snprintf(&map[pos], totallen + 1, "%d %d %d\n", | |
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]), | |
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]), | |
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE])); | |
pos += len; | |
totallen -= len; | |
} | |
assert(totallen == 0); | |
if (is_gidmap) | |
opts.gidmap = map; | |
else | |
opts.uidmap = map; | |
return 0; | |
} | |
enum { | |
OCI_DEVICES_TYPE, | |
OCI_DEVICES_PATH, | |
OCI_DEVICES_MAJOR, | |
OCI_DEVICES_MINOR, | |
OCI_DEVICES_FILEMODE, | |
OCI_DEVICES_UID, | |
OCI_DEVICES_GID, | |
__OCI_DEVICES_MAX, | |
}; | |
static const struct blobmsg_policy oci_devices_policy[] = { | |
[OCI_DEVICES_TYPE] = { "type", BLOBMSG_TYPE_STRING }, | |
[OCI_DEVICES_PATH] = { "path", BLOBMSG_TYPE_STRING }, | |
[OCI_DEVICES_MAJOR] = { "major", BLOBMSG_TYPE_INT32 }, | |
[OCI_DEVICES_MINOR] = { "minor", BLOBMSG_TYPE_INT32 }, | |
[OCI_DEVICES_FILEMODE] = { "fileMode", BLOBMSG_TYPE_INT32 }, | |
[OCI_DEVICES_UID] = { "uid", BLOBMSG_TYPE_INT32 }, | |
[OCI_DEVICES_GID] = { "uid", BLOBMSG_TYPE_INT32 }, | |
}; | |
static mode_t resolve_devtype(char *tstr) | |
{ | |
if (!strcmp("c", tstr) || | |
!strcmp("u", tstr)) | |
return S_IFCHR; | |
else if (!strcmp("b", tstr)) | |
return S_IFBLK; | |
else if (!strcmp("p", tstr)) | |
return S_IFIFO; | |
else | |
return 0; | |
} | |
static int parseOCIdevices(struct blob_attr *msg) | |
{ | |
struct blob_attr *tb[__OCI_DEVICES_MAX]; | |
struct blob_attr *cur; | |
int rem; | |
size_t cnt = 0; | |
struct mknod_args *tmp; | |
blobmsg_for_each_attr(cur, msg, rem) | |
++cnt; | |
opts.devices = calloc(cnt + 1, sizeof(struct mknod_args *)); | |
cnt = 0; | |
blobmsg_for_each_attr(cur, msg, rem) { | |
blobmsg_parse(oci_devices_policy, __OCI_DEVICES_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[OCI_DEVICES_TYPE] || | |
!tb[OCI_DEVICES_PATH]) | |
return ENODATA; | |
tmp = calloc(1, sizeof(struct mknod_args)); | |
if (!tmp) | |
return ENOMEM; | |
tmp->mode = resolve_devtype(blobmsg_get_string(tb[OCI_DEVICES_TYPE])); | |
if (!tmp->mode) { | |
free(tmp); | |
return EINVAL; | |
} | |
if (tmp->mode != S_IFIFO) { | |
if (!tb[OCI_DEVICES_MAJOR] || !tb[OCI_DEVICES_MINOR]) { | |
free(tmp); | |
return ENODATA; | |
} | |
tmp->dev = makedev(blobmsg_get_u32(tb[OCI_DEVICES_MAJOR]), | |
blobmsg_get_u32(tb[OCI_DEVICES_MINOR])); | |
} | |
if (tb[OCI_DEVICES_FILEMODE]) { | |
if (~(S_IRWXU|S_IRWXG|S_IRWXO) & blobmsg_get_u32(tb[OCI_DEVICES_FILEMODE])) { | |
free(tmp); | |
return EINVAL; | |
} | |
tmp->mode |= blobmsg_get_u32(tb[OCI_DEVICES_FILEMODE]); | |
} else { | |
tmp->mode |= (S_IRUSR|S_IWUSR); /* 0600 */ | |
} | |
tmp->path = strdup(blobmsg_get_string(tb[OCI_DEVICES_PATH])); | |
if (tb[OCI_DEVICES_UID]) | |
tmp->uid = blobmsg_get_u32(tb[OCI_DEVICES_UID]); | |
else | |
tmp->uid = -1; | |
if (tb[OCI_DEVICES_GID]) | |
tmp->gid = blobmsg_get_u32(tb[OCI_DEVICES_GID]); | |
else | |
tmp->gid = -1; | |
DEBUG("read device %s (%s)\n", blobmsg_get_string(tb[OCI_DEVICES_PATH]), blobmsg_get_string(tb[OCI_DEVICES_TYPE])); | |
opts.devices[cnt++] = tmp; | |
} | |
opts.devices[cnt] = NULL; | |
return 0; | |
} | |
static int parseOCIsysctl(struct blob_attr *msg) | |
{ | |
struct blob_attr *cur; | |
int rem; | |
char *tmp, *tc; | |
size_t cnt = 0; | |
blobmsg_for_each_attr(cur, msg, rem) { | |
if (!blobmsg_name(cur) || !blobmsg_get_string(cur)) | |
return EINVAL; | |
++cnt; | |
} | |
if (!cnt) | |
return 0; | |
opts.sysctl = calloc(cnt + 1, sizeof(struct sysctl_val *)); | |
if (!opts.sysctl) | |
return ENOMEM; | |
cnt = 0; | |
blobmsg_for_each_attr(cur, msg, rem) { | |
opts.sysctl[cnt] = malloc(sizeof(struct sysctl_val)); | |
if (!opts.sysctl[cnt]) | |
return ENOMEM; | |
/* replace '.' with '/' in entry name */ | |
tc = tmp = strdup(blobmsg_name(cur)); | |
while ((tc = strchr(tc, '.'))) | |
*tc = '/'; | |
opts.sysctl[cnt]->value = strdup(blobmsg_get_string(cur)); | |
opts.sysctl[cnt]->entry = tmp; | |
++cnt; | |
} | |
opts.sysctl[cnt] = NULL; | |
return 0; | |
} | |
enum { | |
OCI_LINUX_CGROUPSPATH, | |
OCI_LINUX_RESOURCES, | |
OCI_LINUX_SECCOMP, | |
OCI_LINUX_SYSCTL, | |
OCI_LINUX_NAMESPACES, | |
OCI_LINUX_DEVICES, | |
OCI_LINUX_UIDMAPPINGS, | |
OCI_LINUX_GIDMAPPINGS, | |
OCI_LINUX_MASKEDPATHS, | |
OCI_LINUX_READONLYPATHS, | |
OCI_LINUX_ROOTFSPROPAGATION, | |
__OCI_LINUX_MAX, | |
}; | |
static const struct blobmsg_policy oci_linux_policy[] = { | |
[OCI_LINUX_CGROUPSPATH] = { "cgroupsPath", BLOBMSG_TYPE_STRING }, | |
[OCI_LINUX_RESOURCES] = { "resources", BLOBMSG_TYPE_TABLE }, | |
[OCI_LINUX_SECCOMP] = { "seccomp", BLOBMSG_TYPE_TABLE }, | |
[OCI_LINUX_SYSCTL] = { "sysctl", BLOBMSG_TYPE_TABLE }, | |
[OCI_LINUX_NAMESPACES] = { "namespaces", BLOBMSG_TYPE_ARRAY }, | |
[OCI_LINUX_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY }, | |
[OCI_LINUX_UIDMAPPINGS] = { "uidMappings", BLOBMSG_TYPE_ARRAY }, | |
[OCI_LINUX_GIDMAPPINGS] = { "gidMappings", BLOBMSG_TYPE_ARRAY }, | |
[OCI_LINUX_MASKEDPATHS] = { "maskedPaths", BLOBMSG_TYPE_ARRAY }, | |
[OCI_LINUX_READONLYPATHS] = { "readonlyPaths", BLOBMSG_TYPE_ARRAY }, | |
[OCI_LINUX_ROOTFSPROPAGATION] = { "rootfsPropagation", BLOBMSG_TYPE_STRING }, | |
}; | |
static int parseOCIlinux(struct blob_attr *msg) | |
{ | |
struct blob_attr *tb[__OCI_LINUX_MAX]; | |
struct blob_attr *cur; | |
int rem; | |
int res = 0; | |
char *cgpath; | |
char cgfullpath[256] = "/sys/fs/cgroup"; | |
blobmsg_parse(oci_linux_policy, __OCI_LINUX_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); | |
if (tb[OCI_LINUX_NAMESPACES]) { | |
blobmsg_for_each_attr(cur, tb[OCI_LINUX_NAMESPACES], rem) { | |
res = parseOCIlinuxns(cur); | |
if (res) | |
return res; | |
} | |
} | |
if (tb[OCI_LINUX_UIDMAPPINGS]) { | |
res = parseOCIuidgidmappings(tb[OCI_LINUX_GIDMAPPINGS], 0); | |
if (res) | |
return res; | |
} | |
if (tb[OCI_LINUX_GIDMAPPINGS]) { | |
res = parseOCIuidgidmappings(tb[OCI_LINUX_GIDMAPPINGS], 1); | |
if (res) | |
return res; | |
} | |
if (tb[OCI_LINUX_READONLYPATHS]) { | |
blobmsg_for_each_attr(cur, tb[OCI_LINUX_READONLYPATHS], rem) { | |
res = add_mount(NULL, blobmsg_get_string(cur), NULL, MS_BIND | MS_REC | MS_RDONLY, 0, NULL, 0); | |
if (res) | |
return res; | |
} | |
} | |
if (tb[OCI_LINUX_MASKEDPATHS]) { | |
blobmsg_for_each_attr(cur, tb[OCI_LINUX_MASKEDPATHS], rem) { | |
res = add_mount((void *)(-1), blobmsg_get_string(cur), NULL, 0, 0, NULL, 0); | |
if (res) | |
return res; | |
} | |
} | |
if (tb[OCI_LINUX_SYSCTL]) { | |
res = parseOCIsysctl(tb[OCI_LINUX_SYSCTL]); | |
if (res) | |
return res; | |
} | |
if (tb[OCI_LINUX_SECCOMP]) { | |
opts.ociseccomp = parseOCIlinuxseccomp(tb[OCI_LINUX_SECCOMP]); | |
if (!opts.ociseccomp) | |
return EINVAL; | |
} | |
if (tb[OCI_LINUX_DEVICES]) { | |
res = parseOCIdevices(tb[OCI_LINUX_DEVICES]); | |
if (res) | |
return res; | |
} | |
if (tb[OCI_LINUX_CGROUPSPATH]) { | |
cgpath = blobmsg_get_string(tb[OCI_LINUX_CGROUPSPATH]); | |
if (cgpath[0] == '/') { | |
if (strlen(cgpath) + 1 >= (sizeof(cgfullpath) - strlen(cgfullpath))) | |
return E2BIG; | |
strcat(cgfullpath, cgpath); | |
} else { | |
strcat(cgfullpath, "/containers/"); | |
if (strlen(opts.name) + strlen(cgpath) + 2 >= (sizeof(cgfullpath) - strlen(cgfullpath))) | |
return E2BIG; | |
strcat(cgfullpath, opts.name); /* should be container name rather than jail name */ | |
strcat(cgfullpath, "/"); | |
strcat(cgfullpath, cgpath); | |
} | |
} else { | |
strcat(cgfullpath, "/containers/"); | |
if (2 * strlen(opts.name) + 2 >= (sizeof(cgfullpath) - strlen(cgfullpath))) | |
return E2BIG; | |
strcat(cgfullpath, opts.name); /* should be container name rather than jail name */ | |
strcat(cgfullpath, "/"); | |
strcat(cgfullpath, opts.name); /* should be container instance name rather than jail name */ | |
} | |
cgroups_init(cgfullpath); | |
if (tb[OCI_LINUX_RESOURCES]) { | |
res = parseOCIlinuxcgroups(tb[OCI_LINUX_RESOURCES]); | |
if (res) | |
return res; | |
} | |
return 0; | |
} | |
enum { | |
OCI_VERSION, | |
OCI_HOSTNAME, | |
OCI_PROCESS, | |
OCI_ROOT, | |
OCI_MOUNTS, | |
OCI_HOOKS, | |
OCI_LINUX, | |
OCI_ANNOTATIONS, | |
__OCI_MAX, | |
}; | |
static const struct blobmsg_policy oci_policy[] = { | |
[OCI_VERSION] = { "ociVersion", BLOBMSG_TYPE_STRING }, | |
[OCI_HOSTNAME] = { "hostname", BLOBMSG_TYPE_STRING }, | |
[OCI_PROCESS] = { "process", BLOBMSG_TYPE_TABLE }, | |
[OCI_ROOT] = { "root", BLOBMSG_TYPE_TABLE }, | |
[OCI_MOUNTS] = { "mounts", BLOBMSG_TYPE_ARRAY }, | |
[OCI_HOOKS] = { "hooks", BLOBMSG_TYPE_TABLE }, | |
[OCI_LINUX] = { "linux", BLOBMSG_TYPE_TABLE }, | |
[OCI_ANNOTATIONS] = { "annotations", BLOBMSG_TYPE_TABLE }, | |
}; | |
static int parseOCI(const char *jsonfile) | |
{ | |
struct blob_attr *tb[__OCI_MAX]; | |
struct blob_attr *cur; | |
int rem; | |
int res; | |
blob_buf_init(&ocibuf, 0); | |
if (!blobmsg_add_json_from_file(&ocibuf, jsonfile)) { | |
res=ENOENT; | |
goto errout; | |
} | |
blobmsg_parse(oci_policy, __OCI_MAX, tb, blob_data(ocibuf.head), blob_len(ocibuf.head)); | |
if (!tb[OCI_VERSION]) { | |
res=ENOMSG; | |
goto errout; | |
} | |
if (strncmp("1.0", blobmsg_get_string(tb[OCI_VERSION]), 3)) { | |
ERROR("unsupported ociVersion %s\n", blobmsg_get_string(tb[OCI_VERSION])); | |
res=ENOTSUP; | |
goto errout; | |
} | |
if (tb[OCI_HOSTNAME]) | |
opts.hostname = strdup(blobmsg_get_string(tb[OCI_HOSTNAME])); | |
if (!tb[OCI_PROCESS]) { | |
res=ENODATA; | |
goto errout; | |
} | |
if ((res = parseOCIprocess(tb[OCI_PROCESS]))) | |
goto errout; | |
if (!tb[OCI_ROOT]) { | |
res=ENODATA; | |
goto errout; | |
} | |
if ((res = parseOCIroot(jsonfile, tb[OCI_ROOT]))) | |
goto errout; | |
if (!tb[OCI_MOUNTS]) { | |
res=ENODATA; | |
goto errout; | |
} | |
blobmsg_for_each_attr(cur, tb[OCI_MOUNTS], rem) | |
if ((res = parseOCImount(cur))) | |
goto errout; | |
if (tb[OCI_LINUX] && (res = parseOCIlinux(tb[OCI_LINUX]))) | |
goto errout; | |
if (tb[OCI_HOOKS] && (res = parseOCIhooks(tb[OCI_HOOKS]))) | |
goto errout; | |
if (tb[OCI_ANNOTATIONS]) | |
opts.annotations = blob_memdup(tb[OCI_ANNOTATIONS]); | |
errout: | |
blob_buf_free(&ocibuf); | |
return res; | |
} | |
static int set_oom_score_adj(void) | |
{ | |
int f; | |
char fname[32]; | |
if (!opts.set_oom_score_adj) | |
return 0; | |
snprintf(fname, sizeof(fname), "/proc/%u/oom_score_adj", jail_process.pid); | |
f = open(fname, O_WRONLY | O_TRUNC); | |
if (f < 0) | |
return errno; | |
dprintf(f, "%d", opts.oom_score_adj); | |
close(f); | |
return 0; | |
} | |
enum { | |
OCI_STATE_CREATING, | |
OCI_STATE_CREATED, | |
OCI_STATE_RUNNING, | |
OCI_STATE_STOPPED, | |
}; | |
static int jail_oci_state = OCI_STATE_CREATED; | |
static void pipe_send_start_container(struct uloop_timeout *t); | |
static struct uloop_timeout start_container_timeout = { | |
.cb = pipe_send_start_container, | |
}; | |
static int handle_start(struct ubus_context *ctx, struct ubus_object *obj, | |
struct ubus_request_data *req, const char *method, | |
struct blob_attr *msg) | |
{ | |
if (jail_oci_state != OCI_STATE_CREATED) | |
return UBUS_STATUS_INVALID_ARGUMENT; | |
uloop_timeout_add(&start_container_timeout); | |
return UBUS_STATUS_OK; | |
} | |
static struct blob_buf bb; | |
static int handle_state(struct ubus_context *ctx, struct ubus_object *obj, | |
struct ubus_request_data *req, const char *method, | |
struct blob_attr *msg) | |
{ | |
char *statusstr; | |
switch (jail_oci_state) { | |
case OCI_STATE_CREATING: | |
statusstr = "creating"; | |
break; | |
case OCI_STATE_CREATED: | |
statusstr = "created"; | |
break; | |
case OCI_STATE_RUNNING: | |
statusstr = "running"; | |
break; | |
case OCI_STATE_STOPPED: | |
statusstr = "stopped"; | |
break; | |
default: | |
statusstr = "unknown"; | |
} | |
blob_buf_init(&bb, 0); | |
blobmsg_add_string(&bb, "ociVersion", OCI_VERSION_STRING); | |
blobmsg_add_string(&bb, "id", opts.name); | |
blobmsg_add_string(&bb, "status", statusstr); | |
if (jail_oci_state == OCI_STATE_CREATED || | |
jail_oci_state == OCI_STATE_RUNNING) | |
blobmsg_add_u32(&bb, "pid", jail_process.pid); | |
blobmsg_add_string(&bb, "bundle", opts.ocibundle); | |
if (opts.infra) | |
blobmsg_add_string(&bb, "infra", opts.infra); | |
if (opts.annotations) | |
blobmsg_add_blob(&bb, opts.annotations); | |
ubus_send_reply(ctx, req, bb.head); | |
return UBUS_STATUS_OK; | |
} | |
enum { | |
CONTAINER_KILL_ATTR_SIGNAL, | |
__CONTAINER_KILL_ATTR_MAX, | |
}; | |
static const struct blobmsg_policy container_kill_attrs[__CONTAINER_KILL_ATTR_MAX] = { | |
[CONTAINER_KILL_ATTR_SIGNAL] = { "signal", BLOBMSG_TYPE_INT32 }, | |
}; | |
static int | |
container_handle_kill(struct ubus_context *ctx, struct ubus_object *obj, | |
struct ubus_request_data *req, const char *method, | |
struct blob_attr *msg) | |
{ | |
struct blob_attr *tb[__CONTAINER_KILL_ATTR_MAX], *cur; | |
int sig = SIGTERM; | |
blobmsg_parse(container_kill_attrs, __CONTAINER_KILL_ATTR_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg)); | |
cur = tb[CONTAINER_KILL_ATTR_SIGNAL]; | |
if (cur) | |
sig = blobmsg_get_u32(cur); | |
if (jail_oci_state == OCI_STATE_CREATING) | |
return UBUS_STATUS_NOT_FOUND; | |
if (kill(jail_process.pid, sig) == 0) | |
return 0; | |
switch (errno) { | |
case EINVAL: return UBUS_STATUS_INVALID_ARGUMENT; | |
case EPERM: return UBUS_STATUS_PERMISSION_DENIED; | |
case ESRCH: return UBUS_STATUS_NOT_FOUND; | |
} | |
return UBUS_STATUS_UNKNOWN_ERROR; | |
} | |
static int | |
jail_writepid(pid_t pid) | |
{ | |
FILE *_pidfile; | |
if (!opts.pidfile) | |
return 0; | |
_pidfile = fopen(opts.pidfile, "w"); | |
if (_pidfile == NULL) | |
return errno; | |
if (fprintf(_pidfile, "%d\n", pid) < 0) { | |
fclose(_pidfile); | |
return errno; | |
} | |
if (fclose(_pidfile)) | |
return errno; | |
return 0; | |
} | |
static int checkpath(const char *path) | |
{ | |
int dirfd = open(path, O_RDONLY | O_DIRECTORY | O_CLOEXEC); | |
if (dirfd < 0) { | |
ERROR("path %s open failed %m\n", path); | |
return -1; | |
} | |
close(dirfd); | |
return 0; | |
} | |
static struct ubus_method container_methods[] = { | |
UBUS_METHOD_NOARG("start", handle_start), | |
UBUS_METHOD_NOARG("state", handle_state), | |
UBUS_METHOD("kill", container_handle_kill, container_kill_attrs), | |
}; | |
static struct ubus_object_type container_object_type = | |
UBUS_OBJECT_TYPE("container", container_methods); | |
static struct ubus_object container_object = { | |
.type = &container_object_type, | |
.methods = container_methods, | |
.n_methods = ARRAY_SIZE(container_methods), | |
}; | |
static void post_main(struct uloop_timeout *t); | |
static struct uloop_timeout post_main_timeout = { | |
.cb = post_main, | |
}; | |
static int netns_fd; | |
static int pidns_fd; | |
#ifdef CLONE_NEWTIME | |
static int timens_fd; | |
#endif | |
static void post_create_runtime(void); | |
struct env_e { | |
struct list_head list; | |
char *envarg; | |
}; | |
int main(int argc, char **argv) | |
{ | |
uid_t uid = getuid(); | |
const char log[] = "/dev/log"; | |
const char ubus[] = "/var/run/ubus/ubus.sock"; | |
int ret = EXIT_FAILURE; | |
int ch; | |
char *tmp; | |
struct list_head envl = LIST_HEAD_INIT(envl); | |
struct env_e *enve, *tmpenve; | |
unsigned short int envn = 0, envc = 0; | |
if (uid) { | |
ERROR("not root, aborting: %m\n"); | |
return EXIT_FAILURE; | |
} | |
/* those are filehandlers, so -1 indicates unused */ | |
opts.setns.pid = -1; | |
opts.setns.net = -1; | |
opts.setns.ns = -1; | |
opts.setns.ipc = -1; | |
opts.setns.uts = -1; | |
opts.setns.user = -1; | |
opts.setns.cgroup = -1; | |
#ifdef CLONE_NEWTIME | |
opts.setns.time = -1; | |
#endif | |
/* default 5 seconds timeout after SIGTERM before SIGKILL is sent */ | |
opts.term_timeout = 5; | |
umask(022); | |
mount_list_init(); | |
init_library_search(); | |
cgroups_prepare(); | |
exit_from_child = false; | |
while ((ch = getopt(argc, argv, OPT_ARGS)) != -1) { | |
switch (ch) { | |
case 'd': | |
debug = atoi(optarg); | |
break; | |
case 'e': | |
enve = calloc(1, sizeof(*enve)); | |
enve->envarg = optarg; | |
list_add_tail(&enve->list, &envl); | |
break; | |
case 'p': | |
opts.namespace |= CLONE_NEWNS; | |
opts.procfs = 1; | |
break; | |
case 'o': | |
opts.namespace |= CLONE_NEWNS; | |
opts.ronly = 1; | |
break; | |
case 'f': | |
opts.namespace |= CLONE_NEWUSER; | |
break; | |
case 'F': | |
opts.namespace |= CLONE_NEWCGROUP; | |
break; | |
case 'R': | |
opts.extroot = realpath(optarg, NULL); | |
break; | |
case 's': | |
opts.namespace |= CLONE_NEWNS; | |
opts.sysfs = 1; | |
break; | |
case 'S': | |
opts.seccomp = optarg; | |
add_mount_bind(optarg, 1, -1); | |
break; | |
case 'C': | |
opts.capabilities = optarg; | |
break; | |
case 'c': | |
opts.no_new_privs = 1; | |
break; | |
case 'n': | |
opts.name = optarg; | |
break; | |
case 'N': | |
opts.namespace |= CLONE_NEWNET; | |
break; | |
case 'h': | |
opts.namespace |= CLONE_NEWUTS; | |
opts.hostname = strdup(optarg); | |
break; | |
case 'j': | |
jail_join_ns(optarg); | |
break; | |
case 'r': | |
opts.namespace |= CLONE_NEWNS; | |
tmp = strchr(optarg, ':'); | |
if (tmp) { | |
*(tmp++) = '\0'; | |
add_2paths_and_deps(optarg, tmp, 1, 0, 0); | |
} else { | |
add_path_and_deps(optarg, 1, 0, 0); | |
} | |
break; | |
case 'w': | |
opts.namespace |= CLONE_NEWNS; | |
tmp = strchr(optarg, ':'); | |
if (tmp) { | |
*(tmp++) = '\0'; | |
add_2paths_and_deps(optarg, tmp, 0, 0, 0); | |
} else { | |
add_path_and_deps(optarg, 0, 0, 0); | |
} | |
break; | |
case 'u': | |
opts.namespace |= CLONE_NEWNS; | |
add_mount_bind(ubus, 0, -1); | |
break; | |
case 'l': | |
opts.namespace |= CLONE_NEWNS; | |
add_mount_bind(log, 0, -1); | |
break; | |
case 'U': | |
opts.user = optarg; | |
break; | |
case 'G': | |
opts.group = optarg; | |
break; | |
case 'O': | |
opts.overlaydir = realpath(optarg, NULL); | |
break; | |
case 't': | |
opts.term_timeout = atoi(optarg); | |
break; | |
case 'T': | |
opts.tmpoverlaysize = optarg; | |
break; | |
case 'E': | |
opts.require_jail = 1; | |
break; | |
case 'y': | |
opts.console = 1; | |
break; | |
case 'J': | |
opts.ocibundle = optarg; | |
break; | |
case 'I': | |
opts.infra = optarg; | |
break; | |
case 'i': | |
opts.immediately = true; | |
break; | |
case 'P': | |
opts.pidfile = optarg; | |
break; | |
} | |
} | |
if (opts.namespace && !opts.ocibundle) | |
opts.namespace |= CLONE_NEWIPC | CLONE_NEWPID; | |
/* | |
* env import from cmdline is not available for OCI containers | |
*/ | |
if (opts.ocibundle && !list_empty(&envl)) { | |
ret=-ENOTSUP; | |
goto errout; | |
} | |
/* | |
* prepare list of env variables to import for slim containers | |
*/ | |
if (!list_empty(&envl)) { | |
list_for_each_entry(enve, &envl, list) | |
++envn; | |
opts.envp = calloc(1 + envn, sizeof(char*)); | |
list_for_each_entry_safe(enve, tmpenve, &envl, list) { | |
tmp = getenv(enve->envarg); | |
if (tmp) { | |
ret = asprintf(&opts.envp[envc++], "%s=%s", enve->envarg, tmp); | |
if (ret < 0) { | |
ERROR("filed to handle envargs %s\n", tmp); | |
free(enve); | |
goto errout; | |
} | |
} | |
list_del(&enve->list); | |
free(enve); | |
} | |
opts.envp[envc] = NULL; | |
} | |
/* | |
* uid in parent user namespace representing root user in new | |
* user namespace, defaults to nobody unless specified in uidMappings | |
*/ | |
opts.root_map_uid = 65534; | |
if (opts.capabilities && parseOCIcapabilities_from_file(&opts.capset, opts.capabilities)) { | |
ERROR("failed to read capabilities from file %s\n", opts.capabilities); | |
ret=-1; | |
goto errout; | |
} | |
if (opts.ocibundle) { | |
char *jsonfile; | |
int ocires; | |
if (!opts.name) { | |
ERROR("OCI bundle needs a named jail\n"); | |
ret=-1; | |
goto errout; | |
} | |
if (asprintf(&jsonfile, "%s/config.json", opts.ocibundle) < 0) { | |
ret=-ENOMEM; | |
goto errout; | |
} | |
ocires = parseOCI(jsonfile); | |
free(jsonfile); | |
if (ocires) { | |
ERROR("parsing of OCI JSON spec has failed: %s (%d)\n", strerror(ocires), ocires); | |
ret=ocires; | |
goto errout; | |
} | |
} | |
if (opts.namespace & CLONE_NEWNET) { | |
if (!opts.name) { | |
ERROR("netns needs a named jail\n"); | |
ret=-1; | |
goto errout; | |
} | |
} | |
if (opts.tmpoverlaysize && strlen(opts.tmpoverlaysize) > 8) { | |
ERROR("size parameter too long: \"%s\"\n", opts.tmpoverlaysize); | |
ret=-1; | |
goto errout; | |
} | |
if (opts.extroot && checkpath(opts.extroot)) { | |
ERROR("invalid rootfs path '%s'", opts.extroot); | |
ret=-1; | |
goto errout; | |
} | |
if (opts.overlaydir && checkpath(opts.overlaydir)) { | |
ERROR("invalid rootfs overlay path '%s'", opts.overlaydir); | |
ret=-1; | |
goto errout; | |
} | |
/* no <binary> param found */ | |
if (!opts.ocibundle && (argc - optind < 1)) { | |
usage(); | |
ret=EXIT_FAILURE; | |
goto errout; | |
} | |
if (!(opts.ocibundle||opts.namespace||opts.capabilities||opts.seccomp|| | |
(opts.setns.net != -1) || | |
(opts.setns.ns != -1) || | |
(opts.setns.ipc != -1) || | |
(opts.setns.uts != -1) || | |
(opts.setns.user != -1) || | |
(opts.setns.cgroup != -1))) { | |
ERROR("Not using namespaces, capabilities or seccomp !!!\n\n"); | |
usage(); | |
ret=EXIT_FAILURE; | |
goto errout; | |
} | |
DEBUG("Using namespaces(0x%08x), capabilities(%d), seccomp(%d)\n", | |
opts.namespace, | |
opts.capset.apply, | |
opts.seccomp != 0 || opts.ociseccomp != 0); | |
uloop_init(); | |
signals_init(); | |
parent_ctx = ubus_connect(NULL); | |
ubus_add_uloop(parent_ctx); | |
if (opts.ocibundle) { | |
char *objname; | |
if (asprintf(&objname, "container.%s", opts.name) < 0) { | |
ret=-ENOMEM; | |
goto errout; | |
} | |
container_object.name = objname; | |
ret = ubus_add_object(parent_ctx, &container_object); | |
if (ret) { | |
ERROR("Failed to add object: %s\n", ubus_strerror(ret)); | |
ret=-1; | |
goto errout; | |
} | |
} | |
/* deliberately not using 'else' on unrelated conditional branches */ | |
if (!opts.ocibundle) { | |
/* allocate NULL-terminated array for argv */ | |
opts.jail_argv = calloc(1 + argc - optind, sizeof(void *)); | |
if (!opts.jail_argv) { | |
ret=EXIT_FAILURE; | |
goto errout; | |
} | |
for (size_t s = optind; s < argc; s++) | |
opts.jail_argv[s - optind] = strdup(argv[s]); | |
if (opts.namespace & CLONE_NEWUSER) | |
get_jail_user(&opts.pw_uid, &opts.pw_gid, &opts.gr_gid); | |
} | |
if (!opts.extroot) { | |
if (opts.namespace && add_path_and_deps(*opts.jail_argv, 1, -1, 0)) { | |
ERROR("failed to load dependencies\n"); | |
ret=-1; | |
goto errout; | |
} | |
} | |
if (opts.namespace && opts.seccomp && add_path_and_deps("libpreload-seccomp.so", 1, -1, 1)) { | |
ERROR("failed to load libpreload-seccomp.so\n"); | |
opts.seccomp = 0; | |
if (opts.require_jail) { | |
ret=-1; | |
goto errout; | |
} | |
} | |
uloop_timeout_add(&post_main_timeout); | |
uloop_run(); | |
errout: | |
if (opts.ocibundle) | |
cgroups_free(); | |
free_opts(true); | |
return ret; | |
} | |
static void post_main(struct uloop_timeout *t) | |
{ | |
if (apply_rlimits()) { | |
ERROR("error applying resource limits\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
if (opts.name) | |
prctl(PR_SET_NAME, opts.name, NULL, NULL, NULL); | |
if (pipe(&pipes[0]) < 0 || pipe(&pipes[2]) < 0) | |
free_and_exit(-1); | |
if (has_namespaces()) { | |
if (opts.namespace & CLONE_NEWNS) { | |
if (!opts.extroot && (opts.user || opts.group)) { | |
add_mount_bind("/etc/passwd", 1, -1); | |
add_mount_bind("/etc/group", 1, -1); | |
} | |
#if defined(__GLIBC__) | |
if (!opts.extroot) | |
add_mount_bind("/etc/nsswitch.conf", 1, -1); | |
#endif | |
if (opts.setns.ns == -1) { | |
if (!(opts.namespace & CLONE_NEWNET)) { | |
add_mount_bind("/etc/resolv.conf", 1, 0); | |
} else { | |
/* new mount namespace to provide /dev/resolv.conf.d */ | |
char hostdir[PATH_MAX]; | |
snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name); | |
mkdir_p(hostdir, 0755); | |
add_mount(hostdir, "/dev/resolv.conf.d", NULL, | |
MS_BIND | MS_NOEXEC | MS_NOATIME | MS_NOSUID | MS_NODEV | MS_RDONLY, 0, NULL, 0); | |
} | |
} | |
/* default mounts */ | |
add_mount(NULL, "/dev", "tmpfs", MS_NOATIME | MS_NOEXEC | MS_NOSUID, 0, "size=1M", -1); | |
add_mount(NULL, "/dev/pts", "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, 0, "newinstance,ptmxmode=0666,mode=0620,gid=5", 0); | |
if (opts.procfs || opts.ocibundle) { | |
add_mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0, NULL, -1); | |
/* | |
* hack to make /proc/sys/net read-write while the rest of /proc/sys is read-only | |
* which cannot be expressed with OCI spec, but happends to be very useful. | |
* Only apply it if '/proc/sys' is not already listed as mount, maskedPath or | |
* readonlyPath. | |
* If not running in a new network namespace, only make /proc/sys read-only. | |
* If running in a new network namespace, temporarily stash (ie. mount-bind) | |
* /proc/sys/net into (totally unrelated, but surely existing) /proc/self/net. | |
* Then we mount-bind /proc/sys read-only and then mount-move /proc/self/net into | |
* /proc/sys/net. | |
* This works because mounts are executed in incrementing strcmp() order and | |
* /proc/self/net appears there before /proc/sys/net and hence the operation | |
* succeeds as the bind-mount of /proc/self/net is performed first and then | |
* move-mount of /proc/sys/net follows because 'e' preceeds 'y' in the ASCII | |
* table (and in the alphabet). | |
*/ | |
if (!add_mount(NULL, "/proc/sys", NULL, MS_BIND | MS_RDONLY, 0, NULL, -1)) | |
if (opts.namespace & CLONE_NEWNET) | |
if (!add_mount_inner("/proc/self/net", "/proc/sys/net", NULL, MS_MOVE, 0, NULL, -1)) | |
add_mount_inner("/proc/sys/net", "/proc/self/net", NULL, MS_BIND, 0, NULL, -1); | |
} | |
if (opts.sysfs || opts.ocibundle) | |
add_mount("sysfs", "/sys", "sysfs", MS_RELATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, 0, NULL, -1); | |
if (opts.ocibundle) | |
add_mount("shm", "/dev/shm", "tmpfs", MS_NOSUID | MS_NOEXEC | MS_NODEV, 0, "mode=1777", -1); | |
} | |
if (opts.setns.pid != -1) { | |
pidns_fd = ns_open_pid("pid", getpid()); | |
setns_open(CLONE_NEWPID); | |
} else { | |
pidns_fd = -1; | |
} | |
#ifdef CLONE_NEWTIME | |
if (opts.setns.time != -1) { | |
timens_fd = ns_open_pid("time", getpid()); | |
setns_open(CLONE_NEWTIME); | |
} else { | |
timens_fd = -1; | |
} | |
#endif | |
if (opts.namespace & CLONE_NEWUSER) { | |
if (prctl(PR_SET_SECUREBITS, SECBIT_NO_SETUID_FIXUP)) { | |
ERROR("prctl(PR_SET_SECUREBITS) failed: %m\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
if (seteuid(opts.root_map_uid)) { | |
ERROR("seteuid(%d) failed: %m\n", opts.root_map_uid); | |
free_and_exit(EXIT_FAILURE); | |
} | |
} | |
jail_process.pid = clone(exec_jail, child_stack + STACK_SIZE, SIGCHLD | (opts.namespace & (~CLONE_NEWCGROUP)), NULL); | |
} else { | |
jail_process.pid = fork(); | |
} | |
if (jail_process.pid > 0) { | |
/* parent process */ | |
char sig_buf[1]; | |
uloop_process_add(&jail_process); | |
jail_running = 1; | |
if (seteuid(0)) { | |
ERROR("seteuid(%d) failed: %m\n", opts.root_map_uid); | |
free_and_exit(EXIT_FAILURE); | |
} | |
prctl(PR_SET_SECUREBITS, 0); | |
if (pidns_fd != -1) { | |
setns(pidns_fd, CLONE_NEWPID); | |
close(pidns_fd); | |
} | |
#ifdef CLONE_NEWTIME | |
if (timens_fd != -1) { | |
setns(timens_fd, CLONE_NEWTIME); | |
close(timens_fd); | |
} | |
#endif | |
if (opts.setns.net != -1) | |
close(opts.setns.net); | |
if (opts.setns.ns != -1) | |
close(opts.setns.ns); | |
if (opts.setns.ipc != -1) | |
close(opts.setns.ipc); | |
if (opts.setns.uts != -1) | |
close(opts.setns.uts); | |
if (opts.setns.user != -1) | |
close(opts.setns.user); | |
if (opts.setns.cgroup != -1) | |
close(opts.setns.cgroup); | |
close(pipes[1]); | |
close(pipes[2]); | |
if (read(pipes[0], sig_buf, 1) < 1) { | |
ERROR("can't read from child\n"); | |
free_and_exit(-1); | |
} | |
close(pipes[0]); | |
set_oom_score_adj(); | |
if (opts.ocibundle) | |
cgroups_apply(jail_process.pid); | |
if (opts.namespace & CLONE_NEWUSER) { | |
if (write_setgroups(jail_process.pid, true)) { | |
ERROR("can't write setgroups\n"); | |
free_and_exit(-1); | |
} | |
if (!opts.uidmap) { | |
bool has_gr = (opts.gr_gid != -1); | |
if (opts.pw_uid != -1) { | |
write_single_uid_gid_map(jail_process.pid, 0, opts.pw_uid); | |
write_single_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:opts.pw_gid); | |
} else { | |
write_single_uid_gid_map(jail_process.pid, 0, 65534); | |
write_single_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:65534); | |
} | |
} else { | |
write_uid_gid_map(jail_process.pid, 0, opts.uidmap); | |
if (opts.gidmap) | |
write_uid_gid_map(jail_process.pid, 1, opts.gidmap); | |
} | |
} | |
if (opts.namespace & CLONE_NEWNET) | |
jail_network_start(parent_ctx, opts.name, jail_process.pid); | |
if (jail_writepid(jail_process.pid)) { | |
ERROR("failed to write pidfile: %m\n"); | |
free_and_exit(-1); | |
} | |
} else if (jail_process.pid == 0) { | |
/* fork child process */ | |
free_and_exit(exec_jail(NULL)); | |
} else { | |
ERROR("failed to clone/fork: %m\n"); | |
free_and_exit(EXIT_FAILURE); | |
} | |
run_hooks(opts.hooks.createRuntime, post_create_runtime); | |
} | |
static void post_poststart(void); | |
static void post_create_runtime(void) | |
{ | |
char sig_buf[1]; | |
sig_buf[0] = 'O'; | |
if (write(pipes[3], sig_buf, 1) < 0) { | |
ERROR("can't write to child\n"); | |
free_and_exit(-1); | |
} | |
jail_oci_state = OCI_STATE_CREATED; | |
if (opts.ocibundle && !opts.immediately) | |
uloop_run(); /* wait for 'start' command via ubus */ | |
else | |
pipe_send_start_container(NULL); | |
} | |
static void pipe_send_start_container(struct uloop_timeout *t) | |
{ | |
char sig_buf[1]; | |
jail_oci_state = OCI_STATE_RUNNING; | |
sig_buf[0] = '!'; | |
if (write(pipes[3], sig_buf, 1) < 0) { | |
ERROR("can't write to child\n"); | |
free_and_exit(-1); | |
} | |
close(pipes[3]); | |
run_hooks(opts.hooks.poststart, post_poststart); | |
} | |
static void post_poststart(void) | |
{ | |
uloop_run(); /* idle here while jail is running */ | |
if (jail_running) { | |
DEBUG("uloop interrupted, killing jail process\n"); | |
kill(jail_process.pid, SIGTERM); | |
uloop_timeout_set(&jail_process_timeout, 1000); | |
uloop_run(); | |
} | |
uloop_done(); | |
poststop(); | |
} | |
static void post_poststop(void); | |
static void poststop(void) { | |
if (opts.namespace & CLONE_NEWNET) { | |
setns(netns_fd, CLONE_NEWNET); | |
jail_network_stop(); | |
close(netns_fd); | |
} | |
run_hooks(opts.hooks.poststop, post_poststop); | |
} | |
static void post_poststop(void) | |
{ | |
free_opts(true); | |
if (parent_ctx) | |
ubus_free(parent_ctx); | |
exit(jail_return_code); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org> | |
* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU Lesser General Public License version 2.1 | |
* as published by the Free Software Foundation | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
*/ | |
#ifndef _GNU_SOURCE | |
#define _GNU_SOURCE | |
#endif | |
#include <errno.h> | |
#include <fcntl.h> | |
#include <getopt.h> | |
#include <glob.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <signal.h> | |
#include <termios.h> | |
#include <unistd.h> | |
#include <sys/stat.h> | |
#include <sys/types.h> | |
#include <sched.h> | |
#include <libubus.h> | |
#include <libubox/avl-cmp.h> | |
#include <libubox/blobmsg.h> | |
#include <libubox/blobmsg_json.h> | |
#include <libubox/ustream.h> | |
#include "log.h" | |
#define UXC_VERSION "0.3" | |
#define OCI_VERSION_STRING "1.0.2" | |
#define UXC_ETC_CONFDIR "/etc/uxc" | |
#define UXC_VOL_CONFDIR "/tmp/run/uvol/.meta/uxc" | |
static bool verbose = false; | |
static bool json_output = false; | |
static char *confdir = UXC_ETC_CONFDIR; | |
static struct ustream_fd cufd; | |
static struct ustream_fd lufd; | |
struct runtime_state { | |
struct avl_node avl; | |
char *container_name; | |
char *instance_name; | |
char *jail_name; | |
bool running; | |
int runtime_pid; | |
int exitcode; | |
struct blob_attr *ocistate; | |
}; | |
struct settings { | |
struct avl_node avl; | |
char *container_name; | |
const char *fname; | |
char *infra; | |
char *tmprwsize; | |
char *writepath; | |
signed char autostart; | |
struct blob_attr *volumes; | |
}; | |
enum uxc_cmd { | |
CMD_ATTACH, | |
CMD_EXEC, | |
CMD_LIST, | |
CMD_BOOT, | |
CMD_START, | |
CMD_STATE, | |
CMD_KILL, | |
CMD_ENABLE, | |
CMD_DISABLE, | |
CMD_DELETE, | |
CMD_CREATE, | |
CMD_RESTART, | |
CMD_UNKNOWN | |
}; | |
#define OPT_ARGS "ab:fjm:p:t:vVw:" | |
static struct option long_options[] = { | |
{"autostart", no_argument, 0, 'a' }, | |
{"console", no_argument, 0, 'c' }, | |
{"bundle", required_argument, 0, 'b' }, | |
{"force", no_argument, 0, 'f' }, | |
{"json", no_argument, 0, 'j' }, | |
{"mounts", required_argument, 0, 'm' }, | |
{"pid-file", required_argument, 0, 'p' }, | |
{"temp-overlay-size", required_argument, 0, 't' }, | |
{"write-overlay-path", required_argument, 0, 'w' }, | |
{"verbose", no_argument, 0, 'v' }, | |
{"version", no_argument, 0, 'V' }, | |
{"infra", required_argument, 0, 'I' }, | |
{0, 0, 0, 0 } | |
}; | |
AVL_TREE(runtime, avl_strcmp, false, NULL); | |
AVL_TREE(settings, avl_strcmp, false, NULL); | |
static struct blob_buf conf; | |
static struct blob_buf settingsbuf; | |
static struct blob_attr *blockinfo; | |
static struct blob_attr *fstabinfo; | |
static struct ubus_context *ctx; | |
static int usage(void) { | |
printf("syntax: uxc <command> [parameters ...]\n"); | |
printf("commands:\n"); | |
printf("\tlist [--json]\t\t\t\tlist all configured containers\n"); | |
printf("\tattach <conf>\t\t\t\tattach to container console\n"); | |
printf("\texec <conf> [cmd]\t\t\texecute command or shell in container\n"); | |
printf("\tcreate <conf>\t\t\t\t(re-)create <conf>\n"); | |
printf("\t\t[--bundle <path>]\t\t\tOCI bundle at <path>\n"); | |
printf("\t\t[--autostart]\t\t\t\tstart on boot\n"); | |
printf("\t\t[--temp-overlay-size <size>]\t\tuse tmpfs overlay with {size}\n"); | |
printf("\t\t[--write-overlay-path <path>]\t\tuse overlay on {path}\n"); | |
printf("\t\t[--mounts <v1>,<v2>,...,<vN>]\t\trequire filesystems to be available\n"); | |
printf("\t\t[--infra <conf>\t\t\tuse shared namespace of other container\n"); | |
printf("\tstart [--console] <conf>\t\tstart container <conf>\n"); | |
printf("\tstate <conf>\t\t\t\tget state of container <conf>\n"); | |
printf("\tkill <conf> [<signal>]\t\t\tsend signal to container <conf>\n"); | |
printf("\trestart [--console] <conf> [<signal>]\trestart container <conf>\n"); | |
printf("\tenable <conf>\t\t\t\tstart container <conf> on boot\n"); | |
printf("\tdisable <conf>\t\t\t\tdon't start container <conf> on boot\n"); | |
printf("\tdelete <conf> [--force]\t\t\tdelete <conf>\n"); | |
return -EINVAL; | |
} | |
enum { | |
CONF_NAME, | |
CONF_PATH, | |
CONF_JAIL, | |
CONF_INFRA, | |
CONF_AUTOSTART, | |
CONF_PIDFILE, | |
CONF_TEMP_OVERLAY_SIZE, | |
CONF_WRITE_OVERLAY_PATH, | |
CONF_VOLUMES, | |
__CONF_MAX, | |
}; | |
static const struct blobmsg_policy conf_policy[__CONF_MAX] = { | |
[CONF_NAME] = { .name = "name", .type = BLOBMSG_TYPE_STRING }, | |
[CONF_PATH] = { .name = "path", .type = BLOBMSG_TYPE_STRING }, | |
[CONF_JAIL] = { .name = "jail", .type = BLOBMSG_TYPE_STRING }, | |
[CONF_INFRA] = { .name = "infra", .type = BLOBMSG_TYPE_STRING }, | |
[CONF_AUTOSTART] = { .name = "autostart", .type = BLOBMSG_TYPE_BOOL }, | |
[CONF_PIDFILE] = { .name = "pidfile", .type = BLOBMSG_TYPE_STRING }, | |
[CONF_TEMP_OVERLAY_SIZE] = { .name = "temp-overlay-size", .type = BLOBMSG_TYPE_STRING }, | |
[CONF_WRITE_OVERLAY_PATH] = { .name = "write-overlay-path", .type = BLOBMSG_TYPE_STRING }, | |
[CONF_VOLUMES] = { .name = "volumes", .type = BLOBMSG_TYPE_ARRAY }, | |
}; | |
static int open_ns(int pid, char *name) { | |
char *path; | |
if (asprintf(&path, "/proc/%d/%s", pid, name) == -1 ) { | |
fprintf(stderr, "cannot allocate path /proc/%d/%s\n", pid, name); | |
return -1; | |
} | |
struct stat st; | |
if (stat(path, &st) == 0 && S_ISLNK(st.st_mode)) { | |
fprintf(stderr, "file /proc/%d/%s does not exists or is not a symbolic link\n", pid, name); | |
return -1; | |
} | |
int fd = open(path, O_RDONLY); | |
if ( fd < 0 ) { | |
fprintf(stderr, "cannot open /proc/%d/%s\n", pid, name); | |
return fd; | |
} | |
return fd; | |
} | |
static int conf_load(bool load_settings) | |
{ | |
int gl_flags = GLOB_NOESCAPE | GLOB_MARK; | |
int j, res; | |
glob_t gl; | |
char *globstr; | |
void *c, *o; | |
struct stat sb; | |
struct blob_buf *target; | |
if (asprintf(&globstr, "%s/%s*.json", UXC_ETC_CONFDIR, load_settings?"settings/":"") == -1) | |
return -ENOMEM; | |
res = glob(globstr, gl_flags, NULL, &gl); | |
if (res == 0) | |
gl_flags |= GLOB_APPEND; | |
free(globstr); | |
if (!stat(UXC_VOL_CONFDIR, &sb)) { | |
if (sb.st_mode & S_IFDIR) { | |
if (asprintf(&globstr, "%s/%s*.json", UXC_VOL_CONFDIR, load_settings?"settings/":"") == -1) | |
return -ENOMEM; | |
res = glob(globstr, gl_flags, NULL, &gl); | |
free(globstr); | |
} | |
} | |
target = load_settings ? &settingsbuf : &conf; | |
blob_buf_init(target, 0); | |
c = blobmsg_open_table(target, NULL); | |
if (res < 0) | |
return 0; | |
for (j = 0; j < gl.gl_pathc; j++) { | |
o = blobmsg_open_table(target, strdup(gl.gl_pathv[j])); | |
if (!blobmsg_add_json_from_file(target, gl.gl_pathv[j])) { | |
ERROR("uxc: failed to load %s\n", gl.gl_pathv[j]); | |
continue; | |
} | |
blobmsg_close_table(target, o); | |
} | |
blobmsg_close_table(target, c); | |
globfree(&gl); | |
return 0; | |
} | |
static struct settings * | |
settings_alloc(const char *container_name) | |
{ | |
struct settings *s; | |
char *new_name; | |
s = calloc_a(sizeof(*s), &new_name, strlen(container_name) + 1); | |
strcpy(new_name, container_name); | |
s->container_name = new_name; | |
s->avl.key = s->container_name; | |
s->autostart = -1; | |
s->infra = NULL; | |
s->tmprwsize = NULL; | |
s->writepath = NULL; | |
s->volumes = NULL; | |
return s; | |
} | |
static int settings_add(void) | |
{ | |
struct blob_attr *cur, *tb[__CONF_MAX]; | |
struct settings *s; | |
int rem, err; | |
avl_init(&settings, avl_strcmp, false, NULL); | |
blobmsg_for_each_attr(cur, blob_data(settingsbuf.head), rem) { | |
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[CONF_NAME]) | |
continue; | |
if (tb[CONF_TEMP_OVERLAY_SIZE] && tb[CONF_WRITE_OVERLAY_PATH]) | |
return -EINVAL; | |
s = settings_alloc(blobmsg_get_string(tb[CONF_NAME])); | |
if (tb[CONF_INFRA]) | |
s->infra = blobmsg_get_string(tb[CONF_INFRA]); | |
if (tb[CONF_AUTOSTART]) | |
s->autostart = blobmsg_get_bool(tb[CONF_AUTOSTART]); | |
if (tb[CONF_TEMP_OVERLAY_SIZE]) | |
s->tmprwsize = blobmsg_get_string(tb[CONF_TEMP_OVERLAY_SIZE]); | |
if (tb[CONF_WRITE_OVERLAY_PATH]) | |
s->writepath = blobmsg_get_string(tb[CONF_WRITE_OVERLAY_PATH]); | |
s->volumes = tb[CONF_VOLUMES]; | |
s->fname = blobmsg_name(cur); | |
err = avl_insert(&settings, &s->avl); | |
if (err) { | |
fprintf(stderr, "error adding settings for %s\n", blobmsg_get_string(tb[CONF_NAME])); | |
free(s); | |
} | |
} | |
return 0; | |
} | |
static void settings_free(void) | |
{ | |
struct settings *item, *tmp; | |
avl_for_each_element_safe(&settings, item, avl, tmp) { | |
avl_delete(&settings, &item->avl); | |
free(item); | |
} | |
return; | |
} | |
enum { | |
LIST_INSTANCES, | |
__LIST_MAX, | |
}; | |
static const struct blobmsg_policy list_policy[__LIST_MAX] = { | |
[LIST_INSTANCES] = { .name = "instances", .type = BLOBMSG_TYPE_TABLE }, | |
}; | |
enum { | |
INSTANCE_RUNNING, | |
INSTANCE_PID, | |
INSTANCE_EXITCODE, | |
INSTANCE_JAIL, | |
__INSTANCE_MAX, | |
}; | |
static const struct blobmsg_policy instance_policy[__INSTANCE_MAX] = { | |
[INSTANCE_RUNNING] = { .name = "running", .type = BLOBMSG_TYPE_BOOL }, | |
[INSTANCE_PID] = { .name = "pid", .type = BLOBMSG_TYPE_INT32 }, | |
[INSTANCE_EXITCODE] = { .name = "exit_code", .type = BLOBMSG_TYPE_INT32 }, | |
[INSTANCE_JAIL] = { .name = "jail", .type = BLOBMSG_TYPE_TABLE }, | |
}; | |
enum { | |
JAIL_NAME, | |
__JAIL_MAX, | |
}; | |
static const struct blobmsg_policy jail_policy[__JAIL_MAX] = { | |
[JAIL_NAME] = { .name = "name", .type = BLOBMSG_TYPE_STRING }, | |
}; | |
static struct runtime_state * | |
runtime_alloc(const char *container_name) | |
{ | |
struct runtime_state *s; | |
char *new_name; | |
s = calloc_a(sizeof(*s), &new_name, strlen(container_name) + 1); | |
strcpy(new_name, container_name); | |
s->container_name = new_name; | |
s->avl.key = s->container_name; | |
return s; | |
} | |
enum { | |
STATE_OCIVERSION, | |
STATE_ID, | |
STATE_STATUS, | |
STATE_PID, | |
STATE_BUNDLE, | |
STATE_INFRA, | |
STATE_ANNOTATIONS, | |
__STATE_MAX, | |
}; | |
static const struct blobmsg_policy state_policy[__STATE_MAX] = { | |
[STATE_OCIVERSION] = { .name = "ociVersion", .type = BLOBMSG_TYPE_STRING }, | |
[STATE_ID] = { .name = "id", .type = BLOBMSG_TYPE_STRING }, | |
[STATE_STATUS] = { .name = "status", .type = BLOBMSG_TYPE_STRING }, | |
[STATE_PID] = { .name = "pid", .type = BLOBMSG_TYPE_INT32 }, | |
[STATE_BUNDLE] = { .name = "bundle", .type = BLOBMSG_TYPE_STRING }, | |
[STATE_INFRA] = { .name = "infra", .type = BLOBMSG_TYPE_STRING }, | |
[STATE_ANNOTATIONS] = { .name = "annotations", .type = BLOBMSG_TYPE_TABLE }, | |
}; | |
static void ocistate_cb(struct ubus_request *req, int type, struct blob_attr *msg) | |
{ | |
struct blob_attr **ocistate = (struct blob_attr **)req->priv; | |
struct blob_attr *tb[__STATE_MAX]; | |
blobmsg_parse(state_policy, __STATE_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); | |
if (!tb[STATE_OCIVERSION] || | |
!tb[STATE_ID] || | |
!tb[STATE_STATUS] || | |
!tb[STATE_BUNDLE]) | |
return; | |
*ocistate = blob_memdup(msg); | |
} | |
static void get_ocistate(struct blob_attr **ocistate, const char *name) | |
{ | |
char *objname; | |
unsigned int id; | |
int ret; | |
*ocistate = NULL; | |
if (asprintf(&objname, "container.%s", name) == -1) | |
exit(ENOMEM); | |
ret = ubus_lookup_id(ctx, objname, &id); | |
free(objname); | |
if (ret) | |
return; | |
ubus_invoke(ctx, id, "state", NULL, ocistate_cb, ocistate, 3000); | |
} | |
static void list_cb(struct ubus_request *req, int type, struct blob_attr *msg) | |
{ | |
struct blob_attr *cur, *curi, *tl[__LIST_MAX], *ti[__INSTANCE_MAX], *tj[__JAIL_MAX]; | |
int rem, remi; | |
const char *container_name, *instance_name, *jail_name; | |
bool running; | |
int pid, exitcode; | |
struct runtime_state *rs; | |
blobmsg_for_each_attr(cur, msg, rem) { | |
container_name = blobmsg_name(cur); | |
blobmsg_parse(list_policy, __LIST_MAX, tl, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tl[LIST_INSTANCES]) | |
continue; | |
blobmsg_for_each_attr(curi, tl[LIST_INSTANCES], remi) { | |
instance_name = blobmsg_name(curi); | |
blobmsg_parse(instance_policy, __INSTANCE_MAX, ti, blobmsg_data(curi), blobmsg_len(curi)); | |
if (!ti[INSTANCE_JAIL]) | |
continue; | |
blobmsg_parse(jail_policy, __JAIL_MAX, tj, blobmsg_data(ti[INSTANCE_JAIL]), blobmsg_len(ti[INSTANCE_JAIL])); | |
if (!tj[JAIL_NAME]) | |
continue; | |
jail_name = blobmsg_get_string(tj[JAIL_NAME]); | |
running = ti[INSTANCE_RUNNING] && blobmsg_get_bool(ti[INSTANCE_RUNNING]); | |
if (ti[INSTANCE_PID]) | |
pid = blobmsg_get_u32(ti[INSTANCE_PID]); | |
else | |
pid = -1; | |
if (ti[INSTANCE_EXITCODE]) | |
exitcode = blobmsg_get_u32(ti[INSTANCE_EXITCODE]); | |
else | |
exitcode = -1; | |
rs = runtime_alloc(container_name); | |
rs->instance_name = strdup(instance_name); | |
rs->jail_name = strdup(jail_name); | |
rs->runtime_pid = pid; | |
rs->exitcode = exitcode; | |
rs->running = running; | |
avl_insert(&runtime, &rs->avl); | |
} | |
} | |
return; | |
} | |
static int runtime_load(void) | |
{ | |
struct runtime_state *item, *tmp; | |
uint32_t id; | |
avl_init(&runtime, avl_strcmp, false, NULL); | |
if (ubus_lookup_id(ctx, "container", &id) || | |
ubus_invoke(ctx, id, "list", NULL, list_cb, &runtime, 3000)) | |
return -EIO; | |
avl_for_each_element_safe(&runtime, item, avl, tmp) | |
get_ocistate(&item->ocistate, item->jail_name); | |
return 0; | |
} | |
static void runtime_free(void) | |
{ | |
struct runtime_state *item, *tmp; | |
avl_for_each_element_safe(&runtime, item, avl, tmp) { | |
avl_delete(&runtime, &item->avl); | |
free(item->instance_name); | |
free(item->jail_name); | |
free(item->ocistate); | |
free(item); | |
} | |
return; | |
} | |
static inline int setup_tios(int fd, struct termios *oldtios) | |
{ | |
struct termios newtios; | |
if (!isatty(fd)) { | |
return -EIO; | |
} | |
/* Get current termios */ | |
if (tcgetattr(fd, oldtios) < 0) | |
return -errno; | |
newtios = *oldtios; | |
/* We use the same settings that ssh does. */ | |
newtios.c_iflag |= IGNPAR; | |
newtios.c_iflag &= ~(ISTRIP | INLCR | IGNCR | ICRNL | IXON | IXANY | IXOFF); | |
newtios.c_lflag &= ~(TOSTOP | ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHONL); | |
newtios.c_oflag &= ~ONLCR; | |
newtios.c_oflag |= OPOST; | |
newtios.c_cc[VMIN] = 1; | |
newtios.c_cc[VTIME] = 0; | |
/* Set new attributes */ | |
if (tcsetattr(fd, TCSAFLUSH, &newtios) < 0) | |
return -errno; | |
return 0; | |
} | |
static void client_cb(struct ustream *s, int bytes) | |
{ | |
char *buf; | |
int len, rv; | |
do { | |
buf = ustream_get_read_buf(s, &len); | |
if (!buf) | |
break; | |
rv = ustream_write(&lufd.stream, buf, len, false); | |
if (rv > 0) | |
ustream_consume(s, rv); | |
if (rv <= len) | |
break; | |
} while(1); | |
} | |
static void local_cb(struct ustream *s, int bytes) | |
{ | |
char *buf; | |
int len, rv; | |
do { | |
buf = ustream_get_read_buf(s, &len); | |
if (!buf) | |
break; | |
if ((len > 0) && (buf[0] == 2)) | |
uloop_end(); | |
rv = ustream_write(&cufd.stream, buf, len, false); | |
if (rv > 0) | |
ustream_consume(s, rv); | |
if (rv <= len) | |
break; | |
} while(1); | |
} | |
static int uxc_attach(const char *container_name) | |
{ | |
struct ubus_context *ctx; | |
uint32_t id; | |
static struct blob_buf req; | |
int client_fd, server_fd, tty_fd; | |
struct termios oldtermios; | |
ctx = ubus_connect(NULL); | |
if (!ctx) { | |
fprintf(stderr, "can't connect to ubus!\n"); | |
return -ECONNREFUSED; | |
} | |
/* open pseudo-terminal pair */ | |
client_fd = posix_openpt(O_RDWR | O_NOCTTY); | |
if (client_fd < 0) { | |
fprintf(stderr, "can't create virtual console!\n"); | |
ubus_free(ctx); | |
return -EIO; | |
} | |
setup_tios(client_fd, &oldtermios); | |
grantpt(client_fd); | |
unlockpt(client_fd); | |
server_fd = open(ptsname(client_fd), O_RDWR | O_NOCTTY); | |
if (server_fd < 0) { | |
fprintf(stderr, "can't open virtual console!\n"); | |
close(client_fd); | |
ubus_free(ctx); | |
return -EIO; | |
} | |
setup_tios(server_fd, &oldtermios); | |
tty_fd = open("/dev/tty", O_RDWR); | |
if (tty_fd < 0) { | |
fprintf(stderr, "can't open local console!\n"); | |
close(server_fd); | |
close(client_fd); | |
ubus_free(ctx); | |
return -EIO; | |
} | |
setup_tios(tty_fd, &oldtermios); | |
/* register server-side with procd */ | |
blob_buf_init(&req, 0); | |
blobmsg_add_string(&req, "name", container_name); | |
blobmsg_add_string(&req, "instance", container_name); | |
if (ubus_lookup_id(ctx, "container", &id) || | |
ubus_invoke_fd(ctx, id, "console_attach", req.head, NULL, NULL, 3000, server_fd)) { | |
fprintf(stderr, "ubus request failed\n"); | |
close(tty_fd); | |
close(server_fd); | |
close(client_fd); | |
blob_buf_free(&req); | |
ubus_free(ctx); | |
return -ENXIO; | |
} | |
close(server_fd); | |
blob_buf_free(&req); | |
ubus_free(ctx); | |
uloop_init(); | |
/* forward between stdio and client_fd until detach is requested */ | |
lufd.stream.notify_read = local_cb; | |
ustream_fd_init(&lufd, tty_fd); | |
cufd.stream.notify_read = client_cb; | |
/* ToDo: handle remote close and other events */ | |
// cufd.stream.notify_state = client_state_cb; | |
ustream_fd_init(&cufd, client_fd); | |
fprintf(stderr, "attaching to jail console. press [CTRL]+[B] to exit.\n"); | |
close(0); | |
close(1); | |
close(2); | |
uloop_run(); | |
tcsetattr(tty_fd, TCSAFLUSH, &oldtermios); | |
ustream_free(&lufd.stream); | |
ustream_free(&cufd.stream); | |
close(client_fd); | |
return 0; | |
} | |
static int uxc_exec(const char *container_name, char **args) | |
{ | |
struct blob_attr *cur, *tb[__CONF_MAX], *ts[__STATE_MAX]; | |
struct runtime_state *rsstate = NULL; | |
int rem, container_pid; | |
bool found = false; | |
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) { | |
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[CONF_NAME] || !tb[CONF_PATH]) | |
continue; | |
if (strcmp(container_name, blobmsg_get_string(tb[CONF_NAME]))) | |
continue; | |
found = true; | |
break; | |
} | |
if (!found) | |
return -ENOENT; | |
rsstate = avl_find_element(&runtime, container_name, rsstate, avl); | |
container_pid = 0; | |
if (rsstate && rsstate->ocistate) { | |
blobmsg_parse(state_policy, __STATE_MAX, ts, blobmsg_data(rsstate->ocistate), blobmsg_len(rsstate->ocistate)); | |
container_pid = blobmsg_get_u32(ts[STATE_PID]); | |
} | |
if (container_pid < 2) { | |
fprintf(stderr, "failed to container pid for %s\n", container_name); | |
return -ENOENT; | |
} | |
int ns_ipc = open_ns(container_pid, "ns/ipc"); | |
int ns_mnt = open_ns(container_pid, "ns/mnt"); | |
int ns_net = open_ns(container_pid, "ns/net"); | |
int ns_uts = open_ns(container_pid, "ns/uts"); | |
int ns_pid = open_ns(container_pid, "ns/pid"); | |
int ns_root = open_ns(container_pid, "root"); | |
if (ns_ipc == -1 || ns_mnt == -1 || ns_net == -1 || ns_uts == -1 || ns_pid == -1 || ns_root == -1) | |
return -ENXIO; | |
if (setns(ns_ipc, 0) == -1) { | |
fprintf(stderr, "failed to enter ipc namespace\n"); | |
return -ENXIO; | |
} | |
if (setns(ns_mnt, 0) == -1) { | |
fprintf(stderr, "failed to enter mnt namespace\n"); | |
return -ENXIO; | |
} | |
if (setns(ns_net, 0) == -1) { | |
fprintf(stderr, "failed to enter net namespace\n"); | |
return -ENXIO; | |
} | |
if (setns(ns_uts, 0) == -1) { | |
fprintf(stderr, "failed to enter uts namespace\n"); | |
return -ENXIO; | |
} | |
if (setns(ns_pid, 0) == -1) { | |
fprintf(stderr, "failed to enter pid namespace\n"); | |
return -ENXIO; | |
} | |
if (fchdir(ns_root) == -1) { | |
fprintf(stderr, "failed to change working directory\n"); | |
return -ENXIO; | |
} | |
if (chroot(".") == -1) { | |
fprintf(stderr, "failed to chroot\n"); | |
return -ENXIO; | |
} | |
if (execv(args[0], args) == -1) { | |
fprintf(stderr, "failed to execute %s in container %s\n", args[0], container_name); | |
return -ENXIO; | |
} | |
return 0; | |
} | |
static int uxc_state(char *name) | |
{ | |
struct runtime_state *rsstate = avl_find_element(&runtime, name, rsstate, avl); | |
struct blob_attr *ocistate = NULL; | |
struct blob_attr *cur, *tb[__CONF_MAX]; | |
int rem; | |
char *bundle = NULL; | |
char *jail_name = NULL; | |
char *state = NULL; | |
char *infra = NULL; | |
char *tmp; | |
static struct blob_buf buf; | |
if (rsstate) | |
ocistate = rsstate->ocistate; | |
if (ocistate) { | |
state = blobmsg_format_json_indent(ocistate, true, 0); | |
if (!state) | |
return -ENOMEM; | |
printf("%s\n", state); | |
free(state); | |
return 0; | |
} | |
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) { | |
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[CONF_NAME] || !tb[CONF_PATH]) | |
continue; | |
if (!strcmp(name, blobmsg_get_string(tb[CONF_NAME]))) { | |
if (tb[CONF_JAIL]) | |
jail_name = blobmsg_get_string(tb[CONF_JAIL]); | |
else | |
jail_name = name; | |
if (tb[CONF_INFRA]) | |
infra = blobmsg_get_string(tb[CONF_INFRA]); | |
bundle = blobmsg_get_string(tb[CONF_PATH]); | |
break; | |
} | |
} | |
if (!bundle) | |
return -ENOENT; | |
blob_buf_init(&buf, 0); | |
blobmsg_add_string(&buf, "ociVersion", OCI_VERSION_STRING); | |
blobmsg_add_string(&buf, "id", jail_name); | |
blobmsg_add_string(&buf, "status", rsstate?"stopped":"uninitialized"); | |
blobmsg_add_string(&buf, "bundle", bundle); | |
if (infra) | |
blobmsg_add_string(&buf, "infra", infra); | |
tmp = blobmsg_format_json_indent(buf.head, true, 0); | |
if (!tmp) { | |
blob_buf_free(&buf); | |
return -ENOMEM; | |
} | |
printf("%s\n", tmp); | |
free(tmp); | |
blob_buf_free(&buf); | |
return 0; | |
} | |
static int uxc_list(void) | |
{ | |
struct blob_attr *cur, *tb[__CONF_MAX], *ts[__STATE_MAX]; | |
int rem; | |
struct runtime_state *rsstate = NULL; | |
struct settings *usettings = NULL; | |
char *name, *ocistatus, *status, *tmp; | |
int container_pid = -1; | |
bool autostart; | |
static struct blob_buf buf; | |
void *arr, *obj; | |
if (json_output) { | |
blob_buf_init(&buf, 0); | |
arr = blobmsg_open_array(&buf, ""); | |
} | |
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) { | |
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[CONF_NAME] || !tb[CONF_PATH]) | |
continue; | |
autostart = tb[CONF_AUTOSTART] && blobmsg_get_bool(tb[CONF_AUTOSTART]); | |
ocistatus = NULL; | |
container_pid = 0; | |
name = blobmsg_get_string(tb[CONF_NAME]); | |
rsstate = avl_find_element(&runtime, name, rsstate, avl); | |
if (rsstate && rsstate->ocistate) { | |
blobmsg_parse(state_policy, __STATE_MAX, ts, blobmsg_data(rsstate->ocistate), blobmsg_len(rsstate->ocistate)); | |
ocistatus = blobmsg_get_string(ts[STATE_STATUS]); | |
container_pid = blobmsg_get_u32(ts[STATE_PID]); | |
} | |
status = ocistatus?:(rsstate && rsstate->running)?"creating":"stopped"; | |
usettings = avl_find_element(&settings, name, usettings, avl); | |
if (usettings && (usettings->autostart >= 0)) | |
autostart = !!(usettings->autostart); | |
if (json_output) { | |
obj = blobmsg_open_table(&buf, ""); | |
blobmsg_add_string(&buf, "name", name); | |
blobmsg_add_string(&buf, "status", status); | |
blobmsg_add_u8(&buf, "autostart", autostart); | |
} else { | |
printf("[%c] %s %s", autostart?'*':' ', name, status); | |
} | |
if (rsstate && !rsstate->running && (rsstate->exitcode >= 0)) { | |
if (json_output) | |
blobmsg_add_u32(&buf, "exitcode", rsstate->exitcode); | |
else | |
printf(" exitcode: %d (%s)", rsstate->exitcode, strerror(rsstate->exitcode)); | |
} | |
if (rsstate && rsstate->running && (rsstate->runtime_pid >= 0)) { | |
if (json_output) | |
blobmsg_add_u32(&buf, "runtime_pid", rsstate->runtime_pid); | |
else | |
printf(" runtime pid: %d", rsstate->runtime_pid); | |
} | |
if (rsstate && rsstate->running && (container_pid >= 0)) { | |
if (json_output) | |
blobmsg_add_u32(&buf, "container_pid", container_pid); | |
else | |
printf(" container pid: %d", container_pid); | |
} | |
if (!json_output) | |
printf("\n"); | |
else | |
blobmsg_close_table(&buf, obj); | |
} | |
if (json_output) { | |
blobmsg_close_array(&buf, arr); | |
tmp = blobmsg_format_json_indent(buf.head, true, 0); | |
if (!tmp) { | |
blob_buf_free(&buf); | |
return -ENOMEM; | |
} | |
printf("%s\n", tmp); | |
free(tmp); | |
blob_buf_free(&buf); | |
}; | |
return 0; | |
} | |
static int uxc_exists(char *name) | |
{ | |
struct runtime_state *rsstate = NULL; | |
rsstate = avl_find_element(&runtime, name, rsstate, avl); | |
if (rsstate && (rsstate->running)) | |
return -EEXIST; | |
return 0; | |
} | |
static int uxc_create(char *name, bool immediately) | |
{ | |
static struct blob_buf req; | |
struct blob_attr *cur, *tb[__CONF_MAX]; | |
int rem, ret = 0; | |
uint32_t id; | |
struct settings *usettings = NULL; | |
char *path = NULL, *jailname = NULL, *pidfile = NULL, *tmprwsize = NULL, *writepath = NULL, *infra = NULL; | |
void *in, *ins, *j; | |
bool found = false; | |
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) { | |
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[CONF_NAME] || !tb[CONF_PATH]) | |
continue; | |
if (strcmp(name, blobmsg_get_string(tb[CONF_NAME]))) | |
continue; | |
found = true; | |
break; | |
} | |
if (!found) | |
return -ENOENT; | |
path = blobmsg_get_string(tb[CONF_PATH]); | |
if (tb[CONF_PIDFILE]) | |
pidfile = blobmsg_get_string(tb[CONF_PIDFILE]); | |
if (tb[CONF_TEMP_OVERLAY_SIZE]) | |
tmprwsize = blobmsg_get_string(tb[CONF_TEMP_OVERLAY_SIZE]); | |
if (tb[CONF_WRITE_OVERLAY_PATH]) | |
writepath = blobmsg_get_string(tb[CONF_WRITE_OVERLAY_PATH]); | |
if (tb[CONF_JAIL]) | |
jailname = blobmsg_get_string(tb[CONF_JAIL]); | |
if (tb[CONF_INFRA]) | |
infra = blobmsg_get_string(tb[CONF_INFRA]); | |
usettings = avl_find_element(&settings, blobmsg_get_string(tb[CONF_NAME]), usettings, avl); | |
if (usettings) { | |
if (usettings->writepath) { | |
writepath = usettings->writepath; | |
tmprwsize = NULL; | |
} | |
if (usettings->tmprwsize) { | |
tmprwsize = usettings->tmprwsize; | |
writepath = NULL; | |
} | |
if (!infra && usettings->infra) | |
infra = usettings->infra; | |
} | |
blob_buf_init(&req, 0); | |
blobmsg_add_string(&req, "name", name); | |
ins = blobmsg_open_table(&req, "instances"); | |
in = blobmsg_open_table(&req, name); | |
blobmsg_add_string(&req, "bundle", path); | |
j = blobmsg_open_table(&req, "jail"); | |
blobmsg_add_string(&req, "name", jailname?:name); | |
blobmsg_add_u8(&req, "immediately", immediately); | |
if (pidfile) | |
blobmsg_add_string(&req, "pidfile", pidfile); | |
blobmsg_close_table(&req, j); | |
if (writepath) | |
blobmsg_add_string(&req, "overlaydir", writepath); | |
if (tmprwsize) | |
blobmsg_add_string(&req, "tmpoverlaysize", tmprwsize); | |
if (infra) | |
blobmsg_add_string(&req, "infra", infra); | |
blobmsg_close_table(&req, in); | |
blobmsg_close_table(&req, ins); | |
if (verbose) { | |
char *tmp; | |
tmp = blobmsg_format_json_indent(req.head, true, 1); | |
if (!tmp) | |
return -ENOMEM; | |
fprintf(stderr, "adding container to procd:\n\t%s\n", tmp); | |
free(tmp); | |
} | |
if (ubus_lookup_id(ctx, "container", &id) || | |
ubus_invoke(ctx, id, "add", req.head, NULL, NULL, 3000)) { | |
blob_buf_free(&req); | |
ret = -EIO; | |
} | |
return ret; | |
} | |
static int uxc_start(const char *name, bool console) | |
{ | |
char *objname; | |
unsigned int id; | |
pid_t pid; | |
if (console) { | |
pid = fork(); | |
if (pid > 0) | |
exit(uxc_attach(name)); | |
} | |
if (asprintf(&objname, "container.%s", name) == -1) | |
return -ENOMEM; | |
if (ubus_lookup_id(ctx, objname, &id)) | |
return -ENOENT; | |
free(objname); | |
return ubus_invoke(ctx, id, "start", NULL, NULL, NULL, 3000); | |
} | |
static int uxc_kill(char *name, int signal) | |
{ | |
static struct blob_buf req; | |
struct blob_attr *cur, *tb[__CONF_MAX]; | |
int rem, ret; | |
char *objname; | |
unsigned int id; | |
struct runtime_state *rsstate = NULL; | |
bool found = false; | |
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) { | |
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[CONF_NAME] || !tb[CONF_PATH]) | |
continue; | |
if (strcmp(name, blobmsg_get_string(tb[CONF_NAME]))) | |
continue; | |
found = true; | |
break; | |
} | |
if (!found) | |
return -ENOENT; | |
rsstate = avl_find_element(&runtime, name, rsstate, avl); | |
if (!rsstate || !(rsstate->running)) | |
return -ENOENT; | |
blob_buf_init(&req, 0); | |
blobmsg_add_u32(&req, "signal", signal); | |
blobmsg_add_string(&req, "name", name); | |
if (asprintf(&objname, "container.%s", name) == -1) | |
return -ENOMEM; | |
ret = ubus_lookup_id(ctx, objname, &id); | |
free(objname); | |
if (ret) | |
return -ENOENT; | |
if (ubus_invoke(ctx, id, "kill", req.head, NULL, NULL, 3000)) | |
return -EIO; | |
return 0; | |
} | |
static int uxc_set(char *name, char *path, signed char autostart, char *pidfile, char *tmprwsize, char *writepath, char *requiredmounts, char *infra) | |
{ | |
static struct blob_buf req; | |
struct settings *usettings = NULL; | |
struct blob_attr *cur, *tb[__CONF_MAX]; | |
int rem, ret; | |
const char *cfname = NULL; | |
const char *sfname = NULL; | |
char *fname = NULL; | |
char *curvol, *tmp, *mnttok; | |
void *mntarr; | |
int f; | |
struct stat sb; | |
/* nothing to do */ | |
if (!path && (autostart<0) && !pidfile && !tmprwsize && !writepath && !requiredmounts && !infra) | |
return 0; | |
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) { | |
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[CONF_NAME] || !tb[CONF_PATH]) | |
continue; | |
if (strcmp(name, blobmsg_get_string(tb[CONF_NAME]))) | |
continue; | |
cfname = blobmsg_name(cur); | |
break; | |
} | |
if (cfname && path) | |
return -EEXIST; | |
if (!cfname && !path) | |
return -ENOENT; | |
if (path) { | |
if (stat(path, &sb) == -1) | |
return -ENOENT; | |
if ((sb.st_mode & S_IFMT) != S_IFDIR) | |
return -ENOTDIR; | |
} | |
usettings = avl_find_element(&settings, blobmsg_get_string(tb[CONF_NAME]), usettings, avl); | |
if (path && usettings) | |
return -EIO; | |
if (usettings) { | |
sfname = usettings->fname; | |
if (!tmprwsize && !writepath) { | |
if (usettings->tmprwsize) { | |
tmprwsize = usettings->tmprwsize; | |
writepath = NULL; | |
} | |
if (usettings->writepath) { | |
writepath = usettings->writepath; | |
tmprwsize = NULL; | |
} | |
} | |
if (usettings->autostart >= 0 && autostart < 0) | |
autostart = !!(usettings->autostart); | |
if (usettings->infra) | |
infra = usettings->infra; | |
} | |
if (path) { | |
ret = mkdir(confdir, 0755); | |
if (ret && errno != EEXIST) | |
return -errno; | |
if (asprintf(&fname, "%s/%s.json", confdir, name) == -1) | |
return -ENOMEM; | |
f = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0644); | |
if (f < 0) | |
return -errno; | |
free(fname); | |
} else { | |
if (sfname) { | |
f = open(sfname, O_WRONLY | O_CREAT | O_TRUNC, 0644); | |
} else { | |
char *t1, *t2; | |
t1 = strdup(cfname); | |
t2 = strrchr(t1, '/'); | |
if (!t2) | |
return -EINVAL; | |
*t2 = '\0'; | |
if (asprintf(&t2, "%s/settings", t1) == -1) | |
return -ENOMEM; | |
ret = mkdir(t2, 0755); | |
if (ret && ret != EEXIST) | |
return -ret; | |
free(t2); | |
if (asprintf(&t2, "%s/settings/%s.json", t1, name) == -1) | |
return -ENOMEM; | |
free(t1); | |
f = open(t2, O_WRONLY | O_CREAT | O_TRUNC, 0644); | |
free(t2); | |
} | |
if (f < 0) | |
return -errno; | |
} | |
blob_buf_init(&req, 0); | |
blobmsg_add_string(&req, "name", name); | |
if (path) | |
blobmsg_add_string(&req, "path", path); | |
if (autostart >= 0) | |
blobmsg_add_u8(&req, "autostart", !!autostart); | |
if (pidfile) | |
blobmsg_add_string(&req, "pidfile", pidfile); | |
if (infra) | |
blobmsg_add_string(&req, "infra", infra); | |
if (tmprwsize) | |
blobmsg_add_string(&req, "temp-overlay-size", tmprwsize); | |
if (writepath) | |
blobmsg_add_string(&req, "write-overlay-path", writepath); | |
if (!requiredmounts && usettings && usettings->volumes) | |
blobmsg_add_blob(&req, usettings->volumes); | |
if (requiredmounts) { | |
mntarr = blobmsg_open_array(&req, "volumes"); | |
for (mnttok = requiredmounts; ; mnttok = NULL) { | |
curvol = strtok_r(mnttok, ",;", &tmp); | |
if (!curvol) | |
break; | |
blobmsg_add_string(&req, NULL, curvol); | |
} | |
blobmsg_close_array(&req, mntarr); | |
} | |
tmp = blobmsg_format_json_indent(req.head, true, 0); | |
if (tmp) { | |
dprintf(f, "%s\n", tmp); | |
free(tmp); | |
} | |
blob_buf_free(&req); | |
close(f); | |
return 1; | |
} | |
enum { | |
BLOCK_INFO_DEVICE, | |
BLOCK_INFO_UUID, | |
BLOCK_INFO_TARGET, | |
BLOCK_INFO_TYPE, | |
BLOCK_INFO_MOUNT, | |
__BLOCK_INFO_MAX, | |
}; | |
static const struct blobmsg_policy block_info_policy[__BLOCK_INFO_MAX] = { | |
[BLOCK_INFO_DEVICE] = { .name = "device", .type = BLOBMSG_TYPE_STRING }, | |
[BLOCK_INFO_UUID] = { .name = "uuid", .type = BLOBMSG_TYPE_STRING }, | |
[BLOCK_INFO_TARGET] = { .name = "target", .type = BLOBMSG_TYPE_STRING }, | |
[BLOCK_INFO_TYPE] = { .name = "type", .type = BLOBMSG_TYPE_STRING }, | |
[BLOCK_INFO_MOUNT] = { .name = "mount", .type = BLOBMSG_TYPE_STRING }, | |
}; | |
/* check if device 'devname' is mounted according to blockd */ | |
static bool checkblock(const char *uuid) | |
{ | |
struct blob_attr *tb[__BLOCK_INFO_MAX]; | |
struct blob_attr *cur; | |
int rem; | |
blobmsg_for_each_attr(cur, blockinfo, rem) { | |
blobmsg_parse(block_info_policy, __BLOCK_INFO_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[BLOCK_INFO_UUID] || !tb[BLOCK_INFO_MOUNT]) | |
continue; | |
if (!strcmp(uuid, blobmsg_get_string(tb[BLOCK_INFO_UUID]))) | |
return false; | |
} | |
return true; | |
} | |
enum { | |
UCI_FSTAB_UUID, | |
UCI_FSTAB_ANONYMOUS, | |
__UCI_FSTAB_MAX, | |
}; | |
static const struct blobmsg_policy uci_fstab_policy[__UCI_FSTAB_MAX] = { | |
[UCI_FSTAB_UUID] = { .name = "uuid", .type = BLOBMSG_TYPE_STRING }, | |
[UCI_FSTAB_ANONYMOUS] = { .name = ".anonymous", .type = BLOBMSG_TYPE_BOOL }, | |
}; | |
static const char *resolveuuid(const char *volname) | |
{ | |
struct blob_attr *tb[__UCI_FSTAB_MAX]; | |
struct blob_attr *cur; | |
const char *mntname; | |
char *tmpvolname, *replc; | |
int rem, res; | |
blobmsg_for_each_attr(cur, fstabinfo, rem) { | |
blobmsg_parse(uci_fstab_policy, __UCI_FSTAB_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[UCI_FSTAB_UUID]) | |
continue; | |
if (tb[UCI_FSTAB_ANONYMOUS] && blobmsg_get_bool(tb[UCI_FSTAB_ANONYMOUS])) | |
continue; | |
mntname = blobmsg_name(cur); | |
if (!mntname) | |
continue; | |
tmpvolname = strdup(volname); | |
while ((replc = strchr(tmpvolname, '-'))) | |
*replc = '_'; | |
res = strcmp(tmpvolname, mntname); | |
free(tmpvolname); | |
if (!res) | |
return blobmsg_get_string(tb[UCI_FSTAB_UUID]); | |
}; | |
return volname; | |
}; | |
/* check status of each required volume */ | |
static bool checkvolumes(struct blob_attr *volumes) | |
{ | |
struct blob_attr *cur; | |
int rem; | |
blobmsg_for_each_attr(cur, volumes, rem) { | |
if (checkblock(resolveuuid(blobmsg_get_string(cur)))) | |
return true; | |
} | |
return false; | |
} | |
static void block_cb(struct ubus_request *req, int type, struct blob_attr *msg) | |
{ | |
blockinfo = blob_memdup(blobmsg_data(msg)); | |
} | |
static void fstab_cb(struct ubus_request *req, int type, struct blob_attr *msg) | |
{ | |
fstabinfo = blob_memdup(blobmsg_data(msg)); | |
} | |
static int uxc_boot(void) | |
{ | |
struct blob_attr *cur, *tb[__CONF_MAX]; | |
struct runtime_state *rsstate = NULL; | |
struct settings *usettings = NULL; | |
static struct blob_buf req; | |
int rem, ret = 0; | |
char *name; | |
unsigned int id; | |
bool autostart; | |
ret = ubus_lookup_id(ctx, "block", &id); | |
if (ret) | |
return -ENOENT; | |
ret = ubus_invoke(ctx, id, "info", NULL, block_cb, NULL, 3000); | |
if (ret) | |
return -ENXIO; | |
ret = ubus_lookup_id(ctx, "uci", &id); | |
if (ret) | |
return -ENOENT; | |
blob_buf_init(&req, 0); | |
blobmsg_add_string(&req, "config", "fstab"); | |
blobmsg_add_string(&req, "type", "mount"); | |
ret = ubus_invoke(ctx, id, "get", req.head, fstab_cb, NULL, 3000); | |
if (ret) | |
return ret; | |
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) { | |
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[CONF_NAME] || !tb[CONF_PATH]) | |
continue; | |
rsstate = avl_find_element(&runtime, blobmsg_get_string(tb[CONF_NAME]), rsstate, avl); | |
if (rsstate) | |
continue; | |
if (tb[CONF_AUTOSTART]) | |
autostart = blobmsg_get_bool(tb[CONF_AUTOSTART]); | |
usettings = avl_find_element(&settings, blobmsg_get_string(tb[CONF_NAME]), usettings, avl); | |
if (usettings && (usettings->autostart >= 0)) | |
autostart = !!(usettings->autostart); | |
if (!autostart) | |
continue; | |
/* make sure all volumes are ready before starting */ | |
if (tb[CONF_VOLUMES]) | |
if (checkvolumes(tb[CONF_VOLUMES])) | |
continue; | |
if (usettings && usettings->volumes) | |
if (checkvolumes(usettings->volumes)) | |
continue; | |
name = strdup(blobmsg_get_string(tb[CONF_NAME])); | |
if (uxc_exists(name)) | |
continue; | |
if (uxc_create(name, true)) | |
++ret; | |
free(name); | |
} | |
return ret; | |
} | |
static int uxc_delete(char *name, bool force) | |
{ | |
struct blob_attr *cur, *tb[__CONF_MAX]; | |
struct runtime_state *rsstate = NULL; | |
struct settings *usettings = NULL; | |
static struct blob_buf req; | |
uint32_t id; | |
int rem, ret = 0; | |
const char *cfname = NULL; | |
const char *sfname = NULL; | |
struct stat sb; | |
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) { | |
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur)); | |
if (!tb[CONF_NAME] || !tb[CONF_PATH]) | |
continue; | |
if (strcmp(name, blobmsg_get_string(tb[CONF_NAME]))) | |
continue; | |
cfname = blobmsg_name(cur); | |
break; | |
} | |
if (!cfname) | |
return -ENOENT; | |
rsstate = avl_find_element(&runtime, name, rsstate, avl); | |
if (rsstate && rsstate->running) { | |
if (force) { | |
ret = uxc_kill(name, SIGKILL); | |
if (ret) | |
goto errout; | |
} else { | |
ret = -EWOULDBLOCK; | |
goto errout; | |
} | |
} | |
if (rsstate) { | |
ret = ubus_lookup_id(ctx, "container", &id); | |
if (ret) | |
goto errout; | |
blob_buf_init(&req, 0); | |
blobmsg_add_string(&req, "name", rsstate->container_name); | |
blobmsg_add_string(&req, "instance", rsstate->instance_name); | |
if (ubus_invoke(ctx, id, "delete", req.head, NULL, NULL, 3000)) { | |
blob_buf_free(&req); | |
ret = -EIO; | |
goto errout; | |
} | |
} | |
usettings = avl_find_element(&settings, name, usettings, avl); | |
if (usettings) | |
sfname = usettings->fname; | |
if (sfname) { | |
if (stat(sfname, &sb) == -1) { | |
ret = -ENOENT; | |
goto errout; | |
} | |
if (unlink(sfname) == -1) { | |
ret = -errno; | |
goto errout; | |
} | |
} | |
if (stat(cfname, &sb) == -1) { | |
ret = -ENOENT; | |
goto errout; | |
} | |
if (unlink(cfname) == -1) | |
ret = -errno; | |
errout: | |
return ret; | |
} | |
static void reload_conf(void) | |
{ | |
blob_buf_free(&conf); | |
conf_load(false); | |
settings_free(); | |
blob_buf_free(&settingsbuf); | |
conf_load(true); | |
settings_add(); | |
} | |
int main(int argc, char **argv) | |
{ | |
enum uxc_cmd cmd = CMD_UNKNOWN; | |
int ret = -EINVAL; | |
char *bundle = NULL; | |
char *pidfile = NULL; | |
char *tmprwsize = NULL; | |
char *writepath = NULL; | |
char *requiredmounts = NULL; | |
char *infra = NULL; | |
signed char autostart = -1; | |
bool force = false; | |
bool console = false; | |
int signal = SIGTERM; | |
int c; | |
if (argc < 2) | |
return usage(); | |
ctx = ubus_connect(NULL); | |
if (!ctx) | |
return -ENODEV; | |
ret = conf_load(false); | |
if (ret < 0) | |
goto out; | |
ret = conf_load(true); | |
if (ret < 0) | |
goto conf_out; | |
ret = settings_add(); | |
if (ret < 0) | |
goto settings_out; | |
ret = runtime_load(); | |
if (ret) | |
goto settings_avl_out; | |
if ( argc > 1 && !strcmp("exec", argv[1])) | |
cmd = CMD_EXEC; | |
while (cmd != CMD_EXEC) { | |
int option_index = 0; | |
c = getopt_long(argc, argv, OPT_ARGS, long_options, &option_index); | |
if (c == -1) | |
break; | |
switch (c) { | |
case 'a': | |
autostart = 1; | |
break; | |
case 'b': | |
bundle = optarg; | |
break; | |
case 'c': | |
console = true; | |
break; | |
case 'f': | |
force = true; | |
break; | |
case 'j': | |
json_output = true; | |
break; | |
case 'p': | |
pidfile = optarg; | |
break; | |
case 't': | |
tmprwsize = optarg; | |
break; | |
case 'v': | |
verbose = true; | |
break; | |
case 'V': | |
printf("uxc %s\n", UXC_VERSION); | |
exit(0); | |
case 'w': | |
writepath = optarg; | |
break; | |
case 'm': | |
requiredmounts = optarg; | |
break; | |
case 'I': | |
infra = optarg; | |
break; | |
} | |
} | |
if (optind == argc && cmd != CMD_EXEC) | |
goto usage_out; | |
if (!strcmp("list", argv[optind])) | |
cmd = CMD_LIST; | |
else if (!strcmp("attach", argv[optind])) | |
cmd = CMD_ATTACH; | |
else if (!strcmp("exec", argv[optind])) | |
cmd = CMD_EXEC; | |
else if (!strcmp("boot", argv[optind])) | |
cmd = CMD_BOOT; | |
else if(!strcmp("start", argv[optind])) | |
cmd = CMD_START; | |
else if(!strcmp("state", argv[optind])) | |
cmd = CMD_STATE; | |
else if(!strcmp("kill", argv[optind]) || !strcmp("stop", argv[optind])) | |
cmd = CMD_KILL; | |
else if(!strcmp("restart", argv[optind])) | |
cmd = CMD_RESTART; | |
else if(!strcmp("enable", argv[optind])) | |
cmd = CMD_ENABLE; | |
else if(!strcmp("disable", argv[optind])) | |
cmd = CMD_DISABLE; | |
else if(!strcmp("delete", argv[optind])) | |
cmd = CMD_DELETE; | |
else if(!strcmp("create", argv[optind])) | |
cmd = CMD_CREATE; | |
switch (cmd) { | |
case CMD_ATTACH: | |
if (optind != argc - 2) | |
goto usage_out; | |
ret = uxc_attach(argv[optind + 1]); | |
break; | |
case CMD_EXEC: | |
if (argc < 3) | |
goto usage_out; | |
int i; | |
char *cmd = argc < 4 ? "/bin/sh" : argv[3]; | |
int cnt = argc < 5 ? 2 : ( argc - 2 ); | |
char **args = (char **)malloc(cnt * sizeof(char*)); | |
if (argc > 3) { | |
for (i = 0; i < cnt - 1; i++ ) | |
args[i] = argv[i + 3]; | |
} else args[0] = cmd; | |
args[cnt - 1] = NULL; | |
ret = uxc_exec(argv[optind + 1], args); | |
break; | |
case CMD_LIST: | |
ret = uxc_list(); | |
break; | |
case CMD_BOOT: | |
ret = uxc_boot(); | |
break; | |
case CMD_START: | |
if (optind != argc - 2) | |
goto usage_out; | |
ret = uxc_start(argv[optind + 1], console); | |
break; | |
case CMD_RESTART: | |
if (optind == (argc - 3)) | |
signal = atoi(argv[optind + 2]); | |
else if (optind != argc - 2) | |
goto usage_out; | |
uxc_kill(argv[optind + 1], signal); | |
runtime_free(); // poll runtime | |
sleep(1); | |
runtime_load(); | |
ret = uxc_exists(argv[optind + 1]); | |
if (ret) | |
goto runtime_out; | |
ret = uxc_set(argv[optind + 1], bundle, autostart, pidfile, tmprwsize, writepath, requiredmounts, infra); | |
if (ret < 0) | |
goto runtime_out; | |
else if (ret > 0) | |
reload_conf(); | |
ret = uxc_create(argv[optind + 1], false); | |
if (ret != 0) | |
goto runtime_out; | |
runtime_free(); // poll runtime again | |
sleep(1); | |
runtime_load(); | |
ret = uxc_start(argv[optind + 1], console); | |
break; | |
case CMD_STATE: | |
if (optind != argc - 2) | |
goto usage_out; | |
ret = uxc_state(argv[optind + 1]); | |
break; | |
case CMD_KILL: | |
if (optind == (argc - 3)) | |
signal = atoi(argv[optind + 2]); | |
else if (optind > argc - 2) | |
goto usage_out; | |
ret = uxc_kill(argv[optind + 1], signal); | |
break; | |
case CMD_ENABLE: | |
if (optind != argc - 2) | |
goto usage_out; | |
ret = uxc_set(argv[optind + 1], NULL, 1, NULL, NULL, NULL, NULL, NULL); | |
break; | |
case CMD_DISABLE: | |
if (optind != argc - 2) | |
goto usage_out; | |
ret = uxc_set(argv[optind + 1], NULL, 0, NULL, NULL, NULL, NULL, NULL); | |
break; | |
case CMD_DELETE: | |
if (optind != argc - 2) | |
goto usage_out; | |
ret = uxc_delete(argv[optind + 1], force); | |
break; | |
case CMD_CREATE: | |
if (optind != argc - 2) | |
goto usage_out; | |
ret = uxc_exists(argv[optind + 1]); | |
if (ret) | |
goto runtime_out; | |
ret = uxc_set(argv[optind + 1], bundle, autostart, pidfile, tmprwsize, writepath, requiredmounts, infra); | |
if (ret < 0) | |
goto runtime_out; | |
if (ret > 0) | |
reload_conf(); | |
ret = uxc_create(argv[optind + 1], false); | |
break; | |
default: | |
goto usage_out; | |
} | |
goto runtime_out; | |
usage_out: | |
ret = usage(); | |
runtime_out: | |
runtime_free(); | |
settings_avl_out: | |
settings_free(); | |
settings_out: | |
blob_buf_free(&settingsbuf); | |
conf_out: | |
blob_buf_free(&conf); | |
out: | |
ubus_free(ctx); | |
if (ret < 0) | |
fprintf(stderr, "uxc error: %s\n", strerror(-ret)); | |
return ret; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment