Skip to content

Instantly share code, notes, and snippets.

@oskarirauta
Created February 23, 2024 17:21
Show Gist options
  • Save oskarirauta/ca0e47ca9c89f920ad281795522e4cdb to your computer and use it in GitHub Desktop.
Save oskarirauta/ca0e47ca9c89f920ad281795522e4cdb to your computer and use it in GitHub Desktop.
uxc infra container support
/*
* Copyright (C) 2013 Felix Fietkau <nbd@openwrt.org>
* Copyright (C) 2013 John Crispin <blogic@openwrt.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 2.1
* as published by the Free Software Foundation
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define _GNU_SOURCE
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <grp.h>
#include <net/if.h>
#include <unistd.h>
#include <stdint.h>
#include <stdio.h>
#include <fcntl.h>
#include <pwd.h>
#include <libgen.h>
#include <unistd.h>
#define SYSLOG_NAMES
#include <syslog.h>
#include <libubox/md5.h>
#include "../procd.h"
#include "service.h"
#include "instance.h"
#define UJAIL_BIN_PATH "/sbin/ujail"
#define CGROUP_BASEDIR "/sys/fs/cgroup/services"
enum {
INSTANCE_ATTR_COMMAND,
INSTANCE_ATTR_ENV,
INSTANCE_ATTR_DATA,
INSTANCE_ATTR_NETDEV,
INSTANCE_ATTR_FILE,
INSTANCE_ATTR_TRIGGER,
INSTANCE_ATTR_RESPAWN,
INSTANCE_ATTR_NICE,
INSTANCE_ATTR_LIMITS,
INSTANCE_ATTR_WATCH,
INSTANCE_ATTR_ERROR,
INSTANCE_ATTR_USER,
INSTANCE_ATTR_GROUP,
INSTANCE_ATTR_STDOUT,
INSTANCE_ATTR_STDERR,
INSTANCE_ATTR_NO_NEW_PRIVS,
INSTANCE_ATTR_JAIL,
INSTANCE_ATTR_TRACE,
INSTANCE_ATTR_SECCOMP,
INSTANCE_ATTR_CAPABILITIES,
INSTANCE_ATTR_PIDFILE,
INSTANCE_ATTR_RELOADSIG,
INSTANCE_ATTR_TERMTIMEOUT,
INSTANCE_ATTR_FACILITY,
INSTANCE_ATTR_EXTROOT,
INSTANCE_ATTR_OVERLAYDIR,
INSTANCE_ATTR_TMPOVERLAYSIZE,
INSTANCE_ATTR_BUNDLE,
INSTANCE_ATTR_INFRA,
INSTANCE_ATTR_WATCHDOG,
__INSTANCE_ATTR_MAX
};
static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = {
[INSTANCE_ATTR_COMMAND] = { "command", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_ENV] = { "env", BLOBMSG_TYPE_TABLE },
[INSTANCE_ATTR_DATA] = { "data", BLOBMSG_TYPE_TABLE },
[INSTANCE_ATTR_NETDEV] = { "netdev", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_FILE] = { "file", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_TRIGGER] = { "triggers", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_RESPAWN] = { "respawn", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_NICE] = { "nice", BLOBMSG_TYPE_INT32 },
[INSTANCE_ATTR_LIMITS] = { "limits", BLOBMSG_TYPE_TABLE },
[INSTANCE_ATTR_WATCH] = { "watch", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_ERROR] = { "error", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_USER] = { "user", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_GROUP] = { "group", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_STDOUT] = { "stdout", BLOBMSG_TYPE_BOOL },
[INSTANCE_ATTR_STDERR] = { "stderr", BLOBMSG_TYPE_BOOL },
[INSTANCE_ATTR_NO_NEW_PRIVS] = { "no_new_privs", BLOBMSG_TYPE_BOOL },
[INSTANCE_ATTR_JAIL] = { "jail", BLOBMSG_TYPE_TABLE },
[INSTANCE_ATTR_TRACE] = { "trace", BLOBMSG_TYPE_BOOL },
[INSTANCE_ATTR_SECCOMP] = { "seccomp", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_CAPABILITIES] = { "capabilities", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_RELOADSIG] = { "reload_signal", BLOBMSG_TYPE_INT32 },
[INSTANCE_ATTR_TERMTIMEOUT] = { "term_timeout", BLOBMSG_TYPE_INT32 },
[INSTANCE_ATTR_FACILITY] = { "facility", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_EXTROOT] = { "extroot", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_OVERLAYDIR] = { "overlaydir", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_TMPOVERLAYSIZE] = { "tmpoverlaysize", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_BUNDLE] = { "bundle", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_INFRA] = { "infra", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_WATCHDOG] = { "watchdog", BLOBMSG_TYPE_ARRAY },
};
enum {
JAIL_ATTR_NAME,
JAIL_ATTR_HOSTNAME,
JAIL_ATTR_PROCFS,
JAIL_ATTR_SYSFS,
JAIL_ATTR_UBUS,
JAIL_ATTR_LOG,
JAIL_ATTR_RONLY,
JAIL_ATTR_MOUNT,
JAIL_ATTR_NETNS,
JAIL_ATTR_USERNS,
JAIL_ATTR_CGROUPSNS,
JAIL_ATTR_CONSOLE,
JAIL_ATTR_REQUIREJAIL,
JAIL_ATTR_IMMEDIATELY,
JAIL_ATTR_PIDFILE,
JAIL_ATTR_SETNS,
__JAIL_ATTR_MAX,
};
static const struct blobmsg_policy jail_attr[__JAIL_ATTR_MAX] = {
[JAIL_ATTR_NAME] = { "name", BLOBMSG_TYPE_STRING },
[JAIL_ATTR_HOSTNAME] = { "hostname", BLOBMSG_TYPE_STRING },
[JAIL_ATTR_PROCFS] = { "procfs", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_SYSFS] = { "sysfs", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_UBUS] = { "ubus", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_LOG] = { "log", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_RONLY] = { "ronly", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_MOUNT] = { "mount", BLOBMSG_TYPE_TABLE },
[JAIL_ATTR_NETNS] = { "netns", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_USERNS] = { "userns", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_CGROUPSNS] = { "cgroupsns", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_CONSOLE] = { "console", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_REQUIREJAIL] = { "requirejail", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_IMMEDIATELY] = { "immediately", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING },
[JAIL_ATTR_SETNS] = { "setns", BLOBMSG_TYPE_ARRAY },
};
enum {
JAIL_SETNS_ATTR_PID,
JAIL_SETNS_ATTR_NS,
__JAIL_SETNS_ATTR_MAX,
};
static const struct blobmsg_policy jail_setns_attr[__JAIL_SETNS_ATTR_MAX] = {
[JAIL_SETNS_ATTR_PID] = { "pid", BLOBMSG_TYPE_INT32 },
[JAIL_SETNS_ATTR_NS] = { "namespaces", BLOBMSG_TYPE_ARRAY },
};
struct instance_netdev {
struct blobmsg_list_node node;
int ifindex;
};
struct instance_file {
struct blobmsg_list_node node;
uint32_t md5[4];
};
struct rlimit_name {
const char *name;
int resource;
};
static const struct rlimit_name rlimit_names[] = {
{ "as", RLIMIT_AS },
{ "core", RLIMIT_CORE },
{ "cpu", RLIMIT_CPU },
{ "data", RLIMIT_DATA },
{ "fsize", RLIMIT_FSIZE },
{ "memlock", RLIMIT_MEMLOCK },
{ "nofile", RLIMIT_NOFILE },
{ "nproc", RLIMIT_NPROC },
{ "rss", RLIMIT_RSS },
{ "stack", RLIMIT_STACK },
#ifdef linux
{ "nice", RLIMIT_NICE },
{ "rtprio", RLIMIT_RTPRIO },
{ "msgqueue", RLIMIT_MSGQUEUE },
{ "sigpending", RLIMIT_SIGPENDING },
#endif
{ NULL, 0 }
};
static void closefd(int fd)
{
if (fd > STDERR_FILENO)
close(fd);
}
/* convert a string into numeric syslog facility or return -1 if no match found */
static int
syslog_facility_str_to_int(const char *facility)
{
CODE *p = facilitynames;
while (p->c_name && strcasecmp(p->c_name, facility))
p++;
return p->c_val;
}
static void
instance_limits(const char *limit, const char *value)
{
int i;
struct rlimit rlim;
unsigned long cur, max;
for (i = 0; rlimit_names[i].name != NULL; i++) {
if (strcmp(rlimit_names[i].name, limit))
continue;
if (!strcmp(value, "unlimited")) {
rlim.rlim_cur = RLIM_INFINITY;
rlim.rlim_max = RLIM_INFINITY;
} else {
if (getrlimit(rlimit_names[i].resource, &rlim))
return;
cur = rlim.rlim_cur;
max = rlim.rlim_max;
if (sscanf(value, "%lu %lu", &cur, &max) < 1)
return;
rlim.rlim_cur = cur;
rlim.rlim_max = max;
}
setrlimit(rlimit_names[i].resource, &rlim);
return;
}
}
static char *
instance_gen_setns_argstr(struct blob_attr *attr)
{
struct blob_attr *tb[__JAIL_SETNS_ATTR_MAX];
struct blob_attr *cur;
int rem, len, total;
char *ret;
blobmsg_parse(jail_setns_attr, __JAIL_SETNS_ATTR_MAX, tb,
blobmsg_data(attr), blobmsg_data_len(attr));
if (!tb[JAIL_SETNS_ATTR_PID] || !tb[JAIL_SETNS_ATTR_NS])
return NULL;
len = snprintf(NULL, 0, "%d:", blobmsg_get_u32(tb[JAIL_SETNS_ATTR_PID]));
blobmsg_for_each_attr(cur, tb[JAIL_SETNS_ATTR_NS], rem) {
char *tmp;
if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING)
return NULL;
tmp = blobmsg_get_string(cur);
if (!tmp)
return NULL;
len += strlen(tmp) + 1;
}
total = len;
ret = malloc(total);
if (!ret)
return NULL;
len = snprintf(ret, total, "%d:", blobmsg_get_u32(tb[JAIL_SETNS_ATTR_PID]));
blobmsg_for_each_attr(cur, tb[JAIL_SETNS_ATTR_NS], rem) {
strncpy(&ret[len], blobmsg_get_string(cur), total - len);
len += strlen(blobmsg_get_string(cur));
ret[len++] = ',';
}
ret[total - 1] = '\0';
return ret;
}
static inline int
jail_run(struct service_instance *in, char **argv)
{
char *term_timeout_str;
struct blobmsg_list_node *var;
struct jail *jail = &in->jail;
int argc = 0;
argv[argc++] = UJAIL_BIN_PATH;
if (asprintf(&term_timeout_str, "%d", in->term_timeout) == -1)
exit(ENOMEM);
argv[argc++] = "-t";
argv[argc++] = term_timeout_str;
if (jail->name) {
argv[argc++] = "-n";
argv[argc++] = jail->name;
}
if (jail->hostname) {
argv[argc++] = "-h";
argv[argc++] = jail->hostname;
}
if (in->seccomp) {
argv[argc++] = "-S";
argv[argc++] = in->seccomp;
}
if (in->user) {
argv[argc++] = "-U";
argv[argc++] = in->user;
}
if (in->group) {
argv[argc++] = "-G";
argv[argc++] = in->group;
}
if (in->capabilities) {
argv[argc++] = "-C";
argv[argc++] = in->capabilities;
}
if (in->no_new_privs)
argv[argc++] = "-c";
if (jail->procfs)
argv[argc++] = "-p";
if (jail->sysfs)
argv[argc++] = "-s";
if (jail->ubus)
argv[argc++] = "-u";
if (jail->log)
argv[argc++] = "-l";
if (jail->ronly)
argv[argc++] = "-o";
if (jail->netns)
argv[argc++] = "-N";
if (jail->userns)
argv[argc++] = "-f";
if (jail->cgroupsns)
argv[argc++] = "-F";
if (jail->console)
argv[argc++] = "-y";
if (in->extroot) {
argv[argc++] = "-R";
argv[argc++] = in->extroot;
}
if (in->overlaydir) {
argv[argc++] = "-O";
argv[argc++] = in->overlaydir;
}
if (in->tmpoverlaysize) {
argv[argc++] = "-T";
argv[argc++] = in->tmpoverlaysize;
}
if (in->immediately)
argv[argc++] = "-i";
if (jail->pidfile) {
argv[argc++] = "-P";
argv[argc++] = jail->pidfile;
}
if (in->bundle) {
argv[argc++] = "-J";
argv[argc++] = in->bundle;
}
if (in->infra) {
argv[argc++] = "-I";
argv[argc++] = in->infra;
}
if (in->require_jail)
argv[argc++] = "-E";
blobmsg_list_for_each(&in->env, var) {
argv[argc++] = "-e";
argv[argc++] = (char *) blobmsg_name(var->data);
}
blobmsg_list_for_each(&jail->mount, var) {
const char *type = blobmsg_data(var->data);
if (*type == '1')
argv[argc++] = "-w";
else
argv[argc++] = "-r";
argv[argc++] = (char *) blobmsg_name(var->data);
}
blobmsg_list_for_each(&jail->setns, var) {
char *setns_arg = instance_gen_setns_argstr(var->data);
if (setns_arg) {
argv[argc++] = "-j";
argv[argc++] = setns_arg;
}
}
argv[argc++] = "--";
return argc;
}
static int
instance_removepid(struct service_instance *in) {
if (!in->pidfile)
return 0;
if (unlink(in->pidfile)) {
ERROR("Failed to remove pidfile: %s: %m\n", in->pidfile);
return 1;
}
return 0;
}
static int
instance_writepid(struct service_instance *in)
{
FILE *_pidfile;
if (!in->pidfile) {
return 0;
}
_pidfile = fopen(in->pidfile, "w");
if (_pidfile == NULL) {
ERROR("failed to open pidfile for writing: %s: %m", in->pidfile);
return 1;
}
if (fprintf(_pidfile, "%d\n", in->proc.pid) < 0) {
ERROR("failed to write pidfile: %s: %m", in->pidfile);
fclose(_pidfile);
return 2;
}
if (fclose(_pidfile)) {
ERROR("failed to close pidfile: %s: %m", in->pidfile);
return 3;
}
return 0;
}
static void
instance_run(struct service_instance *in, int _stdout, int _stderr)
{
struct blobmsg_list_node *var;
struct blob_attr *cur;
char **argv;
int argc = 1; /* NULL terminated */
int rem, _stdin;
bool seccomp = !in->trace && !in->has_jail && in->seccomp;
bool setlbf = _stdout >= 0;
if (in->nice)
setpriority(PRIO_PROCESS, 0, in->nice);
blobmsg_for_each_attr(cur, in->command, rem)
argc++;
blobmsg_list_for_each(&in->env, var)
setenv(blobmsg_name(var->data), blobmsg_data(var->data), 1);
if (seccomp)
setenv("SECCOMP_FILE", in->seccomp, 1);
if (setlbf)
setenv("LD_PRELOAD", "/lib/libsetlbf.so", 1);
blobmsg_list_for_each(&in->limits, var)
instance_limits(blobmsg_name(var->data), blobmsg_data(var->data));
if (in->trace || seccomp)
argc += 1;
argv = alloca(sizeof(char *) * (argc + in->jail.argc));
argc = 0;
#ifdef SECCOMP_SUPPORT
if (in->trace)
argv[argc++] = "/sbin/utrace";
else if (seccomp)
argv[argc++] = "/sbin/seccomp-trace";
#else
if (in->trace || seccomp)
ULOG_WARN("Seccomp support for %s::%s not available\n", in->srv->name, in->name);
#endif
if (in->has_jail) {
argc = jail_run(in, argv);
if (argc != in->jail.argc)
ULOG_WARN("expected %i jail params, used %i for %s::%s\n",
in->jail.argc, argc, in->srv->name, in->name);
}
blobmsg_for_each_attr(cur, in->command, rem)
argv[argc++] = blobmsg_data(cur);
argv[argc] = NULL;
_stdin = open("/dev/null", O_RDONLY);
if (_stdout == -1)
_stdout = open("/dev/null", O_WRONLY);
if (_stderr == -1)
_stderr = open("/dev/null", O_WRONLY);
if (_stdin > -1) {
dup2(_stdin, STDIN_FILENO);
closefd(_stdin);
}
if (_stdout > -1) {
dup2(_stdout, STDOUT_FILENO);
closefd(_stdout);
}
if (_stderr > -1) {
dup2(_stderr, STDERR_FILENO);
closefd(_stderr);
}
if (!in->has_jail && in->user && in->pw_gid && initgroups(in->user, in->pw_gid)) {
ERROR("failed to initgroups() for user %s: %m\n", in->user);
exit(127);
}
if (!in->has_jail && in->gr_gid && setgid(in->gr_gid)) {
ERROR("failed to set group id %d: %m\n", in->gr_gid);
exit(127);
}
if (!in->has_jail && in->uid && setuid(in->uid)) {
ERROR("failed to set user id %d: %m\n", in->uid);
exit(127);
}
execvp(argv[0], argv);
exit(127);
}
static void
instance_add_cgroup(const char *service, const char *instance)
{
struct stat sb;
char cgnamebuf[256];
int fd;
if (stat("/sys/fs/cgroup/cgroup.subtree_control", &sb))
return;
mkdir(CGROUP_BASEDIR, 0700);
snprintf(cgnamebuf, sizeof(cgnamebuf), "%s/%s", CGROUP_BASEDIR, service);
mkdir(cgnamebuf, 0700);
snprintf(cgnamebuf, sizeof(cgnamebuf), "%s/%s/%s", CGROUP_BASEDIR, service, instance);
mkdir(cgnamebuf, 0700);
strcat(cgnamebuf, "/cgroup.procs");
fd = open(cgnamebuf, O_WRONLY);
if (fd == -1)
return;
dprintf(fd, "%d", getpid());
close(fd);
}
static void
instance_free_stdio(struct service_instance *in)
{
if (in->_stdout.fd.fd > -1) {
ustream_free(&in->_stdout.stream);
close(in->_stdout.fd.fd);
in->_stdout.fd.fd = -1;
}
if (in->_stderr.fd.fd > -1) {
ustream_free(&in->_stderr.stream);
close(in->_stderr.fd.fd);
in->_stderr.fd.fd = -1;
}
if (in->console.fd.fd > -1) {
ustream_free(&in->console.stream);
close(in->console.fd.fd);
in->console.fd.fd = -1;
}
if (in->console_client.fd.fd > -1) {
ustream_free(&in->console_client.stream);
close(in->console_client.fd.fd);
in->console_client.fd.fd = -1;
}
}
void
instance_start(struct service_instance *in)
{
int pid;
int opipe[2] = { -1, -1 };
int epipe[2] = { -1, -1 };
if (!avl_is_empty(&in->errors.avl)) {
LOG("Not starting instance %s::%s, an error was indicated\n", in->srv->name, in->name);
return;
}
if (!in->bundle && !in->command) {
LOG("Not starting instance %s::%s, command not set\n", in->srv->name, in->name);
return;
}
if (in->proc.pending) {
if (in->halt)
in->restart = true;
return;
}
instance_free_stdio(in);
if (in->_stdout.fd.fd > -2) {
if (pipe(opipe)) {
ULOG_WARN("pipe() failed: %m\n");
opipe[0] = opipe[1] = -1;
}
}
if (in->_stderr.fd.fd > -2) {
if (pipe(epipe)) {
ULOG_WARN("pipe() failed: %m\n");
epipe[0] = epipe[1] = -1;
}
}
in->restart = false;
in->halt = false;
if (!in->valid)
return;
pid = fork();
if (pid < 0)
return;
if (!pid) {
uloop_done();
closefd(opipe[0]);
closefd(epipe[0]);
instance_add_cgroup(in->srv->name, in->name);
instance_run(in, opipe[1], epipe[1]);
return;
}
P_DEBUG(2, "Started instance %s::%s[%d]\n", in->srv->name, in->name, pid);
in->proc.pid = pid;
instance_writepid(in);
clock_gettime(CLOCK_MONOTONIC, &in->start);
uloop_process_add(&in->proc);
if (opipe[0] > -1) {
ustream_fd_init(&in->_stdout, opipe[0]);
closefd(opipe[1]);
fcntl(opipe[0], F_SETFD, FD_CLOEXEC);
}
if (epipe[0] > -1) {
ustream_fd_init(&in->_stderr, epipe[0]);
closefd(epipe[1]);
fcntl(epipe[0], F_SETFD, FD_CLOEXEC);
}
if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) {
uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000);
P_DEBUG(2, "Started instance %s::%s watchdog timer : timeout = %d\n", in->srv->name, in->name, in->watchdog.freq);
}
service_event("instance.start", in->srv->name, in->name);
}
static void
instance_stdio(struct ustream *s, int prio, struct service_instance *in)
{
char *newline, *str, *arg0, ident[32];
int len;
arg0 = basename(blobmsg_data(blobmsg_data(in->command)));
snprintf(ident, sizeof(ident), "%s[%d]", arg0, in->proc.pid);
ulog_open(ULOG_SYSLOG, in->syslog_facility, ident);
do {
str = ustream_get_read_buf(s, &len);
if (!str)
break;
newline = memchr(str, '\n', len);
if (!newline && (s->r.buffer_len != len))
break;
if (newline) {
*newline = 0;
len = newline + 1 - str;
}
ulog(prio, "%s\n", str);
ustream_consume(s, len);
} while (1);
ulog_open(ULOG_SYSLOG, LOG_DAEMON, "procd");
}
static void
instance_stdout(struct ustream *s, int bytes)
{
instance_stdio(s, LOG_INFO,
container_of(s, struct service_instance, _stdout.stream));
}
static void
instance_console(struct ustream *s, int bytes)
{
struct service_instance *in = container_of(s, struct service_instance, console.stream);
char *buf;
int len;
do {
buf = ustream_get_read_buf(s, &len);
if (!buf)
break;
ulog(LOG_INFO, "out: %s\n", buf);
/* test if console client is attached */
if (in->console_client.fd.fd > -1)
ustream_write(&in->console_client.stream, buf, len, false);
ustream_consume(s, len);
} while (1);
}
static void
instance_console_client(struct ustream *s, int bytes)
{
struct service_instance *in = container_of(s, struct service_instance, console_client.stream);
char *buf;
int len;
do {
buf = ustream_get_read_buf(s, &len);
if (!buf)
break;
ulog(LOG_INFO, "in: %s\n", buf);
ustream_write(&in->console.stream, buf, len, false);
ustream_consume(s, len);
} while (1);
}
static void
instance_stderr(struct ustream *s, int bytes)
{
instance_stdio(s, LOG_ERR,
container_of(s, struct service_instance, _stderr.stream));
}
static void
instance_timeout(struct uloop_timeout *t)
{
struct service_instance *in;
in = container_of(t, struct service_instance, timeout);
if (in->halt) {
LOG("Instance %s::%s pid %d not stopped on SIGTERM, sending SIGKILL instead\n",
in->srv->name, in->name, in->proc.pid);
kill(in->proc.pid, SIGKILL);
} else if (in->restart || in->respawn)
instance_start(in);
}
static void
instance_delete(struct service_instance *in)
{
struct service *s = in->srv;
avl_delete(&s->instances.avl, &in->node.avl);
instance_free(in);
service_stopped(s);
}
static int
instance_exit_code(int ret)
{
if (WIFEXITED(ret)) {
return WEXITSTATUS(ret);
}
if (WIFSIGNALED(ret)) {
return SIGNALLED_OFFSET + WTERMSIG(ret);
}
if (WIFSTOPPED(ret)) {
return WSTOPSIG(ret);
}
return 1;
}
static void
instance_exit(struct uloop_process *p, int ret)
{
struct service_instance *in;
struct timespec tp;
long runtime;
in = container_of(p, struct service_instance, proc);
clock_gettime(CLOCK_MONOTONIC, &tp);
runtime = tp.tv_sec - in->start.tv_sec;
P_DEBUG(2, "Instance %s::%s exit with error code %d after %ld seconds\n", in->srv->name, in->name, ret, runtime);
in->exit_code = instance_exit_code(ret);
uloop_timeout_cancel(&in->timeout);
uloop_timeout_cancel(&in->watchdog.timeout);
service_event("instance.stop", in->srv->name, in->name);
if (in->halt) {
instance_removepid(in);
if (in->restart)
instance_start(in);
else
instance_delete(in);
} else if (in->restart) {
instance_start(in);
} else if (in->respawn) {
if (runtime < in->respawn_threshold)
in->respawn_count++;
else
in->respawn_count = 0;
if (in->respawn_count > in->respawn_retry && in->respawn_retry > 0 ) {
LOG("Instance %s::%s s in a crash loop %d crashes, %ld seconds since last crash\n",
in->srv->name, in->name, in->respawn_count, runtime);
in->restart = in->respawn = 0;
in->halt = 1;
service_event("instance.fail", in->srv->name, in->name);
} else {
service_event("instance.respawn", in->srv->name, in->name);
uloop_timeout_set(&in->timeout, in->respawn_timeout * 1000);
}
}
}
void
instance_stop(struct service_instance *in, bool halt)
{
if (!in->proc.pending) {
if (halt)
instance_delete(in);
return;
}
in->halt = halt;
in->restart = in->respawn = false;
kill(in->proc.pid, SIGTERM);
if (!in->has_jail)
uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
}
static void
instance_restart(struct service_instance *in)
{
if (!in->proc.pending)
return;
if (in->reload_signal) {
kill(in->proc.pid, in->reload_signal);
return;
}
in->halt = true;
in->restart = true;
kill(in->proc.pid, SIGTERM);
if (!in->has_jail)
uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
}
static void
instance_watchdog(struct uloop_timeout *t)
{
struct service_instance *in = container_of(t, struct service_instance, watchdog.timeout);
P_DEBUG(3, "instance %s::%s watchdog timer expired\n", in->srv->name, in->name);
if (in->respawn)
instance_restart(in);
else
instance_stop(in, true);
}
static bool string_changed(const char *a, const char *b)
{
return !((!a && !b) || (a && b && !strcmp(a, b)));
}
static bool
instance_config_changed(struct service_instance *in, struct service_instance *in_new)
{
if (!in->valid)
return true;
if (!blob_attr_equal(in->command, in_new->command))
return true;
if (string_changed(in->bundle, in_new->bundle))
return true;
if (string_changed(in->infra, in_new->infra))
return true;
if (string_changed(in->extroot, in_new->extroot))
return true;
if (string_changed(in->overlaydir, in_new->overlaydir))
return true;
if (string_changed(in->tmpoverlaysize, in_new->tmpoverlaysize))
return true;
if (!blobmsg_list_equal(&in->env, &in_new->env))
return true;
if (!blobmsg_list_equal(&in->netdev, &in_new->netdev))
return true;
if (!blobmsg_list_equal(&in->file, &in_new->file))
return true;
if (in->nice != in_new->nice)
return true;
if (in->syslog_facility != in_new->syslog_facility)
return true;
if (string_changed(in->user, in_new->user))
return true;
if (string_changed(in->group, in_new->group))
return true;
if (in->uid != in_new->uid)
return true;
if (in->pw_gid != in_new->pw_gid)
return true;
if (in->gr_gid != in_new->gr_gid)
return true;
if (string_changed(in->pidfile, in_new->pidfile))
return true;
if (in->respawn_retry != in_new->respawn_retry)
return true;
if (in->respawn_threshold != in_new->respawn_threshold)
return true;
if (in->respawn_timeout != in_new->respawn_timeout)
return true;
if (in->reload_signal != in_new->reload_signal)
return true;
if (in->term_timeout != in_new->term_timeout)
return true;
if (string_changed(in->seccomp, in_new->seccomp))
return true;
if (string_changed(in->capabilities, in_new->capabilities))
return true;
if (!blobmsg_list_equal(&in->limits, &in_new->limits))
return true;
if (!blobmsg_list_equal(&in->jail.mount, &in_new->jail.mount))
return true;
if (!blobmsg_list_equal(&in->jail.setns, &in_new->jail.setns))
return true;
if (!blobmsg_list_equal(&in->errors, &in_new->errors))
return true;
if (in->has_jail != in_new->has_jail)
return true;
if (in->trace != in_new->trace)
return true;
if (in->require_jail != in_new->require_jail)
return true;
if (in->immediately != in_new->immediately)
return true;
if (in->no_new_privs != in_new->no_new_privs)
return true;
if (string_changed(in->jail.name, in_new->jail.name))
return true;
if (string_changed(in->jail.hostname, in_new->jail.hostname))
return true;
if (string_changed(in->jail.pidfile, in_new->jail.pidfile))
return true;
if (in->jail.procfs != in_new->jail.procfs)
return true;
if (in->jail.sysfs != in_new->jail.sysfs)
return true;
if (in->jail.ubus != in_new->jail.ubus)
return true;
if (in->jail.log != in_new->jail.log)
return true;
if (in->jail.ronly != in_new->jail.ronly)
return true;
if (in->jail.netns != in_new->jail.netns)
return true;
if (in->jail.userns != in_new->jail.userns)
return true;
if (in->jail.cgroupsns != in_new->jail.cgroupsns)
return true;
if (in->jail.console != in_new->jail.console)
return true;
if (in->watchdog.mode != in_new->watchdog.mode)
return true;
if (in->watchdog.freq != in_new->watchdog.freq)
return true;
return false;
}
static bool
instance_netdev_cmp(struct blobmsg_list_node *l1, struct blobmsg_list_node *l2)
{
struct instance_netdev *n1 = container_of(l1, struct instance_netdev, node);
struct instance_netdev *n2 = container_of(l2, struct instance_netdev, node);
return n1->ifindex == n2->ifindex;
}
static void
instance_netdev_update(struct blobmsg_list_node *l)
{
struct instance_netdev *n = container_of(l, struct instance_netdev, node);
n->ifindex = if_nametoindex(n->node.avl.key);
}
static bool
instance_file_cmp(struct blobmsg_list_node *l1, struct blobmsg_list_node *l2)
{
struct instance_file *f1 = container_of(l1, struct instance_file, node);
struct instance_file *f2 = container_of(l2, struct instance_file, node);
return !memcmp(f1->md5, f2->md5, sizeof(f1->md5));
}
static void
instance_file_update(struct blobmsg_list_node *l)
{
struct instance_file *f = container_of(l, struct instance_file, node);
md5_ctx_t md5;
char buf[256];
int len, fd;
memset(f->md5, 0, sizeof(f->md5));
fd = open(l->avl.key, O_RDONLY);
if (fd < 0)
return;
md5_begin(&md5);
do {
len = read(fd, buf, sizeof(buf));
if (len < 0) {
if (errno == EINTR)
continue;
break;
}
if (!len)
break;
md5_hash(buf, len, &md5);
} while(1);
md5_end(f->md5, &md5);
close(fd);
}
static void
instance_fill_any(struct blobmsg_list *l, struct blob_attr *cur)
{
if (!cur)
return;
blobmsg_list_fill(l, blobmsg_data(cur), blobmsg_data_len(cur), false);
}
static bool
instance_fill_array(struct blobmsg_list *l, struct blob_attr *cur, blobmsg_update_cb cb, bool array)
{
struct blobmsg_list_node *node;
if (!cur)
return true;
if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING))
return false;
blobmsg_list_fill(l, blobmsg_data(cur), blobmsg_data_len(cur), array);
if (cb) {
blobmsg_list_for_each(l, node)
cb(node);
}
return true;
}
static int
instance_jail_parse(struct service_instance *in, struct blob_attr *attr)
{
struct blob_attr *tb[__JAIL_ATTR_MAX];
struct jail *jail = &in->jail;
struct blobmsg_list_node *var;
blobmsg_parse(jail_attr, __JAIL_ATTR_MAX, tb,
blobmsg_data(attr), blobmsg_data_len(attr));
jail->argc = 4;
if (tb[JAIL_ATTR_REQUIREJAIL] && blobmsg_get_bool(tb[JAIL_ATTR_REQUIREJAIL])) {
in->require_jail = true;
jail->argc++;
}
if (tb[JAIL_ATTR_IMMEDIATELY] && blobmsg_get_bool(tb[JAIL_ATTR_IMMEDIATELY])) {
in->immediately = true;
jail->argc++;
}
if (tb[JAIL_ATTR_NAME]) {
jail->name = strdup(blobmsg_get_string(tb[JAIL_ATTR_NAME]));
jail->argc += 2;
}
if (tb[JAIL_ATTR_HOSTNAME]) {
jail->hostname = strdup(blobmsg_get_string(tb[JAIL_ATTR_HOSTNAME]));
jail->argc += 2;
}
if (tb[JAIL_ATTR_PROCFS] && blobmsg_get_bool(tb[JAIL_ATTR_PROCFS])) {
jail->procfs = true;
jail->argc++;
}
if (tb[JAIL_ATTR_SYSFS] && blobmsg_get_bool(tb[JAIL_ATTR_SYSFS])) {
jail->sysfs = true;
jail->argc++;
}
if (tb[JAIL_ATTR_UBUS] && blobmsg_get_bool(tb[JAIL_ATTR_UBUS])) {
jail->ubus = true;
jail->argc++;
}
if (tb[JAIL_ATTR_LOG] && blobmsg_get_bool(tb[JAIL_ATTR_LOG])) {
jail->log = true;
jail->argc++;
}
if (tb[JAIL_ATTR_RONLY] && blobmsg_get_bool(tb[JAIL_ATTR_RONLY])) {
jail->ronly = true;
jail->argc++;
}
if (tb[JAIL_ATTR_NETNS] && blobmsg_get_bool(tb[JAIL_ATTR_NETNS])) {
jail->netns = true;
jail->argc++;
}
if (tb[JAIL_ATTR_USERNS] && blobmsg_get_bool(tb[JAIL_ATTR_USERNS])) {
jail->userns = true;
jail->argc++;
}
if (tb[JAIL_ATTR_CGROUPSNS] && blobmsg_get_bool(tb[JAIL_ATTR_CGROUPSNS])) {
jail->cgroupsns = true;
jail->argc++;
}
if (tb[JAIL_ATTR_CONSOLE] && blobmsg_get_bool(tb[JAIL_ATTR_CONSOLE])) {
jail->console = true;
jail->argc++;
}
if (tb[JAIL_ATTR_PIDFILE]) {
jail->pidfile = strdup(blobmsg_get_string(tb[JAIL_ATTR_PIDFILE]));
jail->argc += 2;
}
if (tb[JAIL_ATTR_SETNS]) {
struct blob_attr *cur;
int rem;
blobmsg_for_each_attr(cur, tb[JAIL_ATTR_SETNS], rem)
jail->argc += 2;
blobmsg_list_fill(&jail->setns, blobmsg_data(tb[JAIL_ATTR_SETNS]),
blobmsg_data_len(tb[JAIL_ATTR_SETNS]), true);
}
if (tb[JAIL_ATTR_MOUNT]) {
struct blob_attr *cur;
int rem;
blobmsg_for_each_attr(cur, tb[JAIL_ATTR_MOUNT], rem)
jail->argc += 2;
instance_fill_array(&jail->mount, tb[JAIL_ATTR_MOUNT], NULL, false);
}
blobmsg_list_for_each(&in->env, var)
jail->argc += 2;
if (in->seccomp)
jail->argc += 2;
if (in->capabilities)
jail->argc += 2;
if (in->user)
jail->argc += 2;
if (in->group)
jail->argc += 2;
if (in->extroot)
jail->argc += 2;
if (in->overlaydir)
jail->argc += 2;
if (in->tmpoverlaysize)
jail->argc += 2;
if (in->no_new_privs)
jail->argc++;
if (in->bundle)
jail->argc += 2;
if (in->infra)
jail->argc += 2;
return true;
}
static bool
instance_config_parse_command(struct service_instance *in, struct blob_attr **tb)
{
struct blob_attr *cur, *cur2;
bool ret = false;
int rem;
cur = tb[INSTANCE_ATTR_COMMAND];
if (!cur) {
in->command = NULL;
return true;
}
if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING))
return false;
blobmsg_for_each_attr(cur2, cur, rem) {
ret = true;
break;
}
in->command = cur;
return ret;
}
static bool
instance_config_parse(struct service_instance *in)
{
struct blob_attr *tb[__INSTANCE_ATTR_MAX];
struct blob_attr *cur, *cur2;
struct stat s;
int rem, r;
blobmsg_parse(instance_attr, __INSTANCE_ATTR_MAX, tb,
blobmsg_data(in->config), blobmsg_data_len(in->config));
if (!tb[INSTANCE_ATTR_BUNDLE] && !instance_config_parse_command(in, tb))
return false;
if (tb[INSTANCE_ATTR_TERMTIMEOUT])
in->term_timeout = blobmsg_get_u32(tb[INSTANCE_ATTR_TERMTIMEOUT]);
if (tb[INSTANCE_ATTR_RESPAWN]) {
int i = 0;
uint32_t vals[3] = { 3600, 5, 5};
blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_RESPAWN], rem) {
if ((i >= 3) && (blobmsg_type(cur2) == BLOBMSG_TYPE_STRING))
continue;
vals[i] = atoi(blobmsg_get_string(cur2));
i++;
}
in->respawn = true;
in->respawn_count = 0;
in->respawn_threshold = vals[0];
in->respawn_timeout = vals[1];
in->respawn_retry = vals[2];
}
if (tb[INSTANCE_ATTR_TRIGGER]) {
in->trigger = tb[INSTANCE_ATTR_TRIGGER];
trigger_add(in->trigger, in);
}
if (tb[INSTANCE_ATTR_WATCH]) {
blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_WATCH], rem) {
if (blobmsg_type(cur2) != BLOBMSG_TYPE_STRING)
continue;
P_DEBUG(3, "watch for %s\n", blobmsg_get_string(cur2));
watch_add(blobmsg_get_string(cur2), in);
}
}
if ((cur = tb[INSTANCE_ATTR_NICE])) {
in->nice = (int8_t) blobmsg_get_u32(cur);
if (in->nice < -20 || in->nice > 20)
return false;
}
if (tb[INSTANCE_ATTR_USER]) {
const char *user = blobmsg_get_string(tb[INSTANCE_ATTR_USER]);
struct passwd *p = getpwnam(user);
if (p) {
in->user = strdup(user);
in->uid = p->pw_uid;
in->gr_gid = in->pw_gid = p->pw_gid;
}
}
if (tb[INSTANCE_ATTR_GROUP]) {
const char *group = blobmsg_get_string(tb[INSTANCE_ATTR_GROUP]);
struct group *p = getgrnam(group);
if (p) {
in->group = strdup(group);
in->gr_gid = p->gr_gid;
}
}
if (tb[INSTANCE_ATTR_TRACE])
in->trace = blobmsg_get_bool(tb[INSTANCE_ATTR_TRACE]);
if (tb[INSTANCE_ATTR_NO_NEW_PRIVS])
in->no_new_privs = blobmsg_get_bool(tb[INSTANCE_ATTR_NO_NEW_PRIVS]);
if (!in->trace && tb[INSTANCE_ATTR_SECCOMP])
in->seccomp = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_SECCOMP]));
if (tb[INSTANCE_ATTR_CAPABILITIES])
in->capabilities = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_CAPABILITIES]));
if (tb[INSTANCE_ATTR_EXTROOT])
in->extroot = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_EXTROOT]));
if (tb[INSTANCE_ATTR_OVERLAYDIR])
in->overlaydir = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_OVERLAYDIR]));
if (tb[INSTANCE_ATTR_TMPOVERLAYSIZE])
in->tmpoverlaysize = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_TMPOVERLAYSIZE]));
if (tb[INSTANCE_ATTR_BUNDLE])
in->bundle = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_BUNDLE]));
if (tb[INSTANCE_ATTR_INFRA])
in->infra = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_INFRA]));
if (tb[INSTANCE_ATTR_PIDFILE]) {
char *pidfile = blobmsg_get_string(tb[INSTANCE_ATTR_PIDFILE]);
if (pidfile)
in->pidfile = strdup(pidfile);
}
if (tb[INSTANCE_ATTR_RELOADSIG])
in->reload_signal = blobmsg_get_u32(tb[INSTANCE_ATTR_RELOADSIG]);
if (tb[INSTANCE_ATTR_STDOUT] && blobmsg_get_bool(tb[INSTANCE_ATTR_STDOUT]))
in->_stdout.fd.fd = -1;
if (tb[INSTANCE_ATTR_STDERR] && blobmsg_get_bool(tb[INSTANCE_ATTR_STDERR]))
in->_stderr.fd.fd = -1;
instance_fill_any(&in->data, tb[INSTANCE_ATTR_DATA]);
if (!instance_fill_array(&in->env, tb[INSTANCE_ATTR_ENV], NULL, false))
return false;
if (!instance_fill_array(&in->netdev, tb[INSTANCE_ATTR_NETDEV], instance_netdev_update, true))
return false;
if (!instance_fill_array(&in->file, tb[INSTANCE_ATTR_FILE], instance_file_update, true))
return false;
if (!instance_fill_array(&in->limits, tb[INSTANCE_ATTR_LIMITS], NULL, false))
return false;
if (!instance_fill_array(&in->errors, tb[INSTANCE_ATTR_ERROR], NULL, true))
return false;
if (tb[INSTANCE_ATTR_FACILITY]) {
int facility = syslog_facility_str_to_int(blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
if (facility != -1) {
in->syslog_facility = facility;
P_DEBUG(3, "setting facility '%s'\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
} else
P_DEBUG(3, "unknown syslog facility '%s' given, using default (LOG_DAEMON)\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
}
if (tb[INSTANCE_ATTR_WATCHDOG]) {
int i = 0;
uint32_t vals[2] = { 0, 30 };
blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_WATCHDOG], rem) {
if (i >= 2)
break;
vals[i] = atoi(blobmsg_get_string(cur2));
i++;
}
if (vals[0] >= 0 && vals[0] < __INSTANCE_WATCHDOG_MODE_MAX) {
in->watchdog.mode = vals[0];
P_DEBUG(3, "setting watchdog mode (%d)\n", vals[0]);
} else {
in->watchdog.mode = 0;
P_DEBUG(3, "unknown watchdog mode (%d) given, using default (0)\n", vals[0]);
}
if (vals[1] > 0) {
in->watchdog.freq = vals[1];
P_DEBUG(3, "setting watchdog timeout (%d)\n", vals[0]);
} else {
in->watchdog.freq = 30;
P_DEBUG(3, "invalid watchdog timeout (%d) given, using default (30)\n", vals[1]);
}
}
if (!in->trace && tb[INSTANCE_ATTR_JAIL])
in->has_jail = instance_jail_parse(in, tb[INSTANCE_ATTR_JAIL]);
if (in->has_jail) {
r = stat(UJAIL_BIN_PATH, &s);
if (r < 0) {
if (in->require_jail) {
ERROR("Cannot jail service %s::%s. %s: %m (%d)\n",
in->srv->name, in->name, UJAIL_BIN_PATH, r);
return false;
}
P_DEBUG(2, "unable to find %s: %m (%d)\n", UJAIL_BIN_PATH, r);
in->has_jail = false;
}
}
return true;
}
static void
instance_config_cleanup(struct service_instance *in)
{
blobmsg_list_free(&in->env);
blobmsg_list_free(&in->data);
blobmsg_list_free(&in->netdev);
blobmsg_list_free(&in->file);
blobmsg_list_free(&in->limits);
blobmsg_list_free(&in->errors);
blobmsg_list_free(&in->jail.mount);
blobmsg_list_free(&in->jail.setns);
}
static void
instance_config_move_strdup(char **dst, char *src)
{
if (*dst) {
free(*dst);
*dst = NULL;
}
if (!src)
return;
*dst = strdup(src);
}
static void
instance_config_move(struct service_instance *in, struct service_instance *in_src)
{
instance_config_cleanup(in);
blobmsg_list_move(&in->env, &in_src->env);
blobmsg_list_move(&in->data, &in_src->data);
blobmsg_list_move(&in->netdev, &in_src->netdev);
blobmsg_list_move(&in->file, &in_src->file);
blobmsg_list_move(&in->limits, &in_src->limits);
blobmsg_list_move(&in->errors, &in_src->errors);
blobmsg_list_move(&in->jail.mount, &in_src->jail.mount);
blobmsg_list_move(&in->jail.setns, &in_src->jail.setns);
in->trigger = in_src->trigger;
in->command = in_src->command;
in->respawn = in_src->respawn;
in->respawn_retry = in_src->respawn_retry;
in->respawn_threshold = in_src->respawn_threshold;
in->respawn_timeout = in_src->respawn_timeout;
in->reload_signal = in_src->reload_signal;
in->term_timeout = in_src->term_timeout;
in->watchdog.mode = in_src->watchdog.mode;
in->watchdog.freq = in_src->watchdog.freq;
in->watchdog.timeout = in_src->watchdog.timeout;
in->name = in_src->name;
in->nice = in_src->nice;
in->trace = in_src->trace;
in->node.avl.key = in_src->node.avl.key;
in->syslog_facility = in_src->syslog_facility;
in->require_jail = in_src->require_jail;
in->no_new_privs = in_src->no_new_privs;
in->immediately = in_src->immediately;
in->uid = in_src->uid;
in->pw_gid = in_src->pw_gid;
in->gr_gid = in_src->gr_gid;
in->has_jail = in_src->has_jail;
in->jail.procfs = in_src->jail.procfs;
in->jail.sysfs = in_src->jail.sysfs;
in->jail.ubus = in_src->jail.ubus;
in->jail.log = in_src->jail.log;
in->jail.ronly = in_src->jail.ronly;
in->jail.netns = in_src->jail.netns;
in->jail.cgroupsns = in_src->jail.cgroupsns;
in->jail.console = in_src->jail.console;
in->jail.argc = in_src->jail.argc;
instance_config_move_strdup(&in->pidfile, in_src->pidfile);
instance_config_move_strdup(&in->seccomp, in_src->seccomp);
instance_config_move_strdup(&in->capabilities, in_src->capabilities);
instance_config_move_strdup(&in->bundle, in_src->bundle);
instance_config_move_strdup(&in->infra, in_src->infra);
instance_config_move_strdup(&in->extroot, in_src->extroot);
instance_config_move_strdup(&in->overlaydir, in_src->overlaydir);
instance_config_move_strdup(&in->tmpoverlaysize, in_src->tmpoverlaysize);
instance_config_move_strdup(&in->user, in_src->user);
instance_config_move_strdup(&in->group, in_src->group);
instance_config_move_strdup(&in->jail.name, in_src->jail.name);
instance_config_move_strdup(&in->jail.hostname, in_src->jail.hostname);
instance_config_move_strdup(&in->jail.pidfile, in_src->jail.pidfile);
free(in->config);
in->config = in_src->config;
in_src->config = NULL;
}
void
instance_update(struct service_instance *in, struct service_instance *in_new)
{
bool changed = instance_config_changed(in, in_new);
bool running = in->proc.pending;
bool stopping = in->halt;
if (!running || stopping) {
instance_config_move(in, in_new);
instance_start(in);
} else {
if (changed)
instance_restart(in);
instance_config_move(in, in_new);
/* restart happens in the child callback handler */
}
}
void
instance_free(struct service_instance *in)
{
instance_free_stdio(in);
uloop_process_delete(&in->proc);
uloop_timeout_cancel(&in->timeout);
uloop_timeout_cancel(&in->watchdog.timeout);
trigger_del(in);
watch_del(in);
instance_config_cleanup(in);
free(in->config);
free(in->user);
free(in->group);
free(in->extroot);
free(in->overlaydir);
free(in->tmpoverlaysize);
free(in->bundle);
free(in->infra);
free(in->jail.name);
free(in->jail.hostname);
free(in->jail.pidfile);
free(in->seccomp);
free(in->capabilities);
free(in->pidfile);
free(in);
}
void
instance_init(struct service_instance *in, struct service *s, struct blob_attr *config)
{
config = blob_memdup(config);
in->srv = s;
in->name = blobmsg_name(config);
in->config = config;
in->timeout.cb = instance_timeout;
in->proc.cb = instance_exit;
in->term_timeout = 5;
in->syslog_facility = LOG_DAEMON;
in->exit_code = 0;
in->require_jail = false;
in->immediately = false;
in->_stdout.fd.fd = -2;
in->_stdout.stream.string_data = true;
in->_stdout.stream.notify_read = instance_stdout;
in->_stderr.fd.fd = -2;
in->_stderr.stream.string_data = true;
in->_stderr.stream.notify_read = instance_stderr;
in->console.fd.fd = -2;
in->console.stream.string_data = true;
in->console.stream.notify_read = instance_console;
in->console_client.fd.fd = -2;
in->console_client.stream.string_data = true;
in->console_client.stream.notify_read = instance_console_client;
blobmsg_list_init(&in->netdev, struct instance_netdev, node, instance_netdev_cmp);
blobmsg_list_init(&in->file, struct instance_file, node, instance_file_cmp);
blobmsg_list_simple_init(&in->env);
blobmsg_list_simple_init(&in->data);
blobmsg_list_simple_init(&in->limits);
blobmsg_list_simple_init(&in->errors);
blobmsg_list_simple_init(&in->jail.mount);
blobmsg_list_simple_init(&in->jail.setns);
in->watchdog.timeout.cb = instance_watchdog;
in->valid = instance_config_parse(in);
}
void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose)
{
void *i;
if (!in->valid)
return;
i = blobmsg_open_table(b, in->name);
blobmsg_add_u8(b, "running", in->proc.pending);
if (in->proc.pending)
blobmsg_add_u32(b, "pid", in->proc.pid);
if (in->command)
blobmsg_add_blob(b, in->command);
if (in->bundle)
blobmsg_add_string(b, "bundle", in->bundle);
if (in->infra)
blobmsg_add_string(b, "infra", in->infra);
blobmsg_add_u32(b, "term_timeout", in->term_timeout);
if (!in->proc.pending)
blobmsg_add_u32(b, "exit_code", in->exit_code);
if (!avl_is_empty(&in->errors.avl)) {
struct blobmsg_list_node *var;
void *e = blobmsg_open_array(b, "errors");
blobmsg_list_for_each(&in->errors, var)
blobmsg_add_string(b, NULL, blobmsg_data(var->data));
blobmsg_close_table(b, e);
}
if (!avl_is_empty(&in->env.avl)) {
struct blobmsg_list_node *var;
void *e = blobmsg_open_table(b, "env");
blobmsg_list_for_each(&in->env, var)
blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data));
blobmsg_close_table(b, e);
}
if (!avl_is_empty(&in->data.avl)) {
struct blobmsg_list_node *var;
void *e = blobmsg_open_table(b, "data");
blobmsg_list_for_each(&in->data, var)
blobmsg_add_blob(b, var->data);
blobmsg_close_table(b, e);
}
if (!avl_is_empty(&in->limits.avl)) {
struct blobmsg_list_node *var;
void *e = blobmsg_open_table(b, "limits");
blobmsg_list_for_each(&in->limits, var)
blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data));
blobmsg_close_table(b, e);
}
if (!avl_is_empty(&in->netdev.avl)) {
struct blobmsg_list_node *var;
void *n = blobmsg_open_array(b, "netdev");
blobmsg_list_for_each(&in->netdev, var)
blobmsg_add_string(b, NULL, blobmsg_data(var->data));
blobmsg_close_array(b, n);
}
if (in->reload_signal)
blobmsg_add_u32(b, "reload_signal", in->reload_signal);
if (in->respawn) {
void *r = blobmsg_open_table(b, "respawn");
blobmsg_add_u32(b, "threshold", in->respawn_threshold);
blobmsg_add_u32(b, "timeout", in->respawn_timeout);
blobmsg_add_u32(b, "retry", in->respawn_retry);
blobmsg_close_table(b, r);
}
if (in->trace)
blobmsg_add_u8(b, "trace", true);
if (in->no_new_privs)
blobmsg_add_u8(b, "no_new_privs", true);
if (in->seccomp)
blobmsg_add_string(b, "seccomp", in->seccomp);
if (in->capabilities)
blobmsg_add_string(b, "capabilities", in->capabilities);
if (in->pidfile)
blobmsg_add_string(b, "pidfile", in->pidfile);
if (in->user)
blobmsg_add_string(b, "user", in->user);
if (in->group)
blobmsg_add_string(b, "group", in->group);
if (in->has_jail) {
void *r = blobmsg_open_table(b, "jail");
if (in->jail.name)
blobmsg_add_string(b, "name", in->jail.name);
if (!in->bundle) {
if (in->jail.hostname)
blobmsg_add_string(b, "hostname", in->jail.hostname);
blobmsg_add_u8(b, "procfs", in->jail.procfs);
blobmsg_add_u8(b, "sysfs", in->jail.sysfs);
blobmsg_add_u8(b, "ubus", in->jail.ubus);
blobmsg_add_u8(b, "log", in->jail.log);
blobmsg_add_u8(b, "ronly", in->jail.ronly);
blobmsg_add_u8(b, "netns", in->jail.netns);
blobmsg_add_u8(b, "userns", in->jail.userns);
blobmsg_add_u8(b, "cgroupsns", in->jail.cgroupsns);
} else {
if (in->jail.pidfile)
blobmsg_add_string(b, "pidfile", in->jail.pidfile);
blobmsg_add_u8(b, "immediately", in->immediately);
}
blobmsg_add_u8(b, "console", (in->console.fd.fd > -1));
blobmsg_close_table(b, r);
if (!avl_is_empty(&in->jail.mount.avl)) {
struct blobmsg_list_node *var;
void *e = blobmsg_open_table(b, "mount");
blobmsg_list_for_each(&in->jail.mount, var)
blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data));
blobmsg_close_table(b, e);
}
if (!avl_is_empty(&in->jail.setns.avl)) {
struct blobmsg_list_node *var;
void *s = blobmsg_open_array(b, "setns");
blobmsg_list_for_each(&in->jail.setns, var)
blobmsg_add_blob(b, var->data);
blobmsg_close_array(b, s);
}
}
if (in->extroot)
blobmsg_add_string(b, "extroot", in->extroot);
if (in->overlaydir)
blobmsg_add_string(b, "overlaydir", in->overlaydir);
if (in->tmpoverlaysize)
blobmsg_add_string(b, "tmpoverlaysize", in->tmpoverlaysize);
if (verbose && in->trigger)
blobmsg_add_blob(b, in->trigger);
if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) {
void *r = blobmsg_open_table(b, "watchdog");
blobmsg_add_u32(b, "mode", in->watchdog.mode);
blobmsg_add_u32(b, "timeout", in->watchdog.freq);
blobmsg_close_table(b, r);
}
blobmsg_close_table(b, i);
}
/*
* Copyright (C) 2013 Felix Fietkau <nbd@openwrt.org>
* Copyright (C) 2013 John Crispin <blogic@openwrt.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 2.1
* as published by the Free Software Foundation
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef __PROCD_INSTANCE_H
#define __PROCD_INSTANCE_H
#include <libubox/vlist.h>
#include <libubox/uloop.h>
#include <libubox/ustream.h>
#include "../utils/utils.h"
#define RESPAWN_ERROR (5 * 60)
#define SIGNALLED_OFFSET 128
struct jail {
bool procfs;
bool sysfs;
bool ubus;
bool log;
bool ronly;
bool netns;
bool userns;
bool cgroupsns;
bool console;
char *name;
char *hostname;
char *pidfile;
struct blobmsg_list mount;
struct blobmsg_list setns;
int argc;
};
typedef enum instance_watchdog {
INSTANCE_WATCHDOG_MODE_DISABLED,
INSTANCE_WATCHDOG_MODE_PASSIVE,
INSTANCE_WATCHDOG_MODE_ACTIVE,
__INSTANCE_WATCHDOG_MODE_MAX,
} instance_watchdog_mode_t;
struct watchdog {
instance_watchdog_mode_t mode;
uint32_t freq;
struct uloop_timeout timeout;
};
struct service_instance {
struct vlist_node node;
struct service *srv;
const char *name;
int8_t nice;
bool valid;
char *user;
uid_t uid;
gid_t pw_gid;
char *group;
gid_t gr_gid;
bool halt;
bool restart;
bool respawn;
int respawn_count;
int reload_signal;
struct timespec start;
bool trace;
bool has_jail;
bool require_jail;
bool immediately;
bool no_new_privs;
struct jail jail;
char *seccomp;
char *capabilities;
char *pidfile;
char *extroot;
char *overlaydir;
char *tmpoverlaysize;
char *bundle;
char *infra;
int syslog_facility;
int exit_code;
uint32_t term_timeout;
uint32_t respawn_timeout;
uint32_t respawn_threshold;
uint32_t respawn_retry;
struct blob_attr *config;
struct uloop_process proc;
struct uloop_timeout timeout;
struct ustream_fd _stdout;
struct ustream_fd _stderr;
struct ustream_fd console;
struct ustream_fd console_client;
struct blob_attr *command;
struct blob_attr *trigger;
struct blobmsg_list env;
struct blobmsg_list data;
struct blobmsg_list netdev;
struct blobmsg_list file;
struct blobmsg_list limits;
struct blobmsg_list errors;
struct watchdog watchdog;
};
void instance_start(struct service_instance *in);
void instance_stop(struct service_instance *in, bool halt);
void instance_update(struct service_instance *in, struct service_instance *in_new);
void instance_init(struct service_instance *in, struct service *s, struct blob_attr *config);
void instance_free(struct service_instance *in);
void instance_dump(struct blob_buf *b, struct service_instance *in, int debug);
#endif
/*
* Copyright (C) 2015 John Crispin <blogic@openwrt.org>
* Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 2.1
* as published by the Free Software Foundation
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define _GNU_SOURCE
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
/* musl only defined 15 limit types, make sure all 16 are supported */
#ifndef RLIMIT_RTTIME
#define RLIMIT_RTTIME 15
#undef RLIMIT_NLIMITS
#define RLIMIT_NLIMITS 16
#undef RLIM_NLIMITS
#define RLIM_NLIMITS 16
#endif
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <pwd.h>
#include <grp.h>
#include <string.h>
#include <fcntl.h>
#include <sched.h>
#include <linux/filter.h>
#include <linux/limits.h>
#include <linux/nsfs.h>
#include <linux/securebits.h>
#include <signal.h>
#include <inttypes.h>
#include "capabilities.h"
#include "elf.h"
#include "fs.h"
#include "jail.h"
#include "log.h"
#include "seccomp-oci.h"
#include "cgroups.h"
#include "netifd.h"
#include <libubox/blobmsg.h>
#include <libubox/blobmsg_json.h>
#include <libubox/list.h>
#include <libubox/vlist.h>
#include <libubox/uloop.h>
#include <libubox/utils.h>
#include <libubus.h>
#ifndef CLONE_NEWCGROUP
#define CLONE_NEWCGROUP 0x02000000
#endif
#define STACK_SIZE (1024 * 1024)
#define OPT_ARGS "cC:d:e:EfFG:h:ij:J:ln:NoO:pP:r:R:sS:uU:w:t:T:y"
#define OCI_VERSION_STRING "1.0.2"
struct hook_execvpe {
char *file;
char **argv;
char **envp;
int timeout;
};
struct sysctl_val {
char *entry;
char *value;
};
struct mknod_args {
char *path;
mode_t mode;
dev_t dev;
uid_t uid;
gid_t gid;
};
static struct {
char *name;
char *hostname;
char **jail_argv;
char *cwd;
char *seccomp;
struct sock_fprog *ociseccomp;
char *capabilities;
struct jail_capset capset;
char *user;
char *group;
char *extroot;
char *overlaydir;
char *tmpoverlaysize;
char **envp;
char *uidmap;
char *gidmap;
char *pidfile;
struct sysctl_val **sysctl;
int no_new_privs;
int namespace;
struct {
int pid;
int net;
int ns;
int ipc;
int uts;
int user;
int cgroup;
#ifdef CLONE_NEWTIME
int time;
#endif
} setns;
int procfs;
int ronly;
int sysfs;
int console;
int pw_uid;
int pw_gid;
int gr_gid;
int root_map_uid;
gid_t *additional_gids;
size_t num_additional_gids;
mode_t umask;
bool set_umask;
int require_jail;
struct {
struct hook_execvpe **createRuntime;
struct hook_execvpe **createContainer;
struct hook_execvpe **startContainer;
struct hook_execvpe **poststart;
struct hook_execvpe **poststop;
} hooks;
struct rlimit *rlimits[RLIM_NLIMITS];
int oom_score_adj;
bool set_oom_score_adj;
struct mknod_args **devices;
char *ocibundle;
char *infra;
bool immediately;
struct blob_attr *annotations;
int term_timeout;
} opts;
static struct blob_buf ocibuf;
extern int pivot_root(const char *new_root, const char *put_old);
int debug = 0;
static char child_stack[STACK_SIZE];
static struct ubus_context *parent_ctx;
int console_fd;
static inline bool has_namespaces(void)
{
return ((opts.setns.pid != -1) ||
(opts.setns.net != -1) ||
(opts.setns.ns != -1) ||
(opts.setns.ipc != -1) ||
(opts.setns.uts != -1) ||
(opts.setns.user != -1) ||
(opts.setns.cgroup != -1) ||
#ifdef CLONE_NEWTIME
(opts.setns.time != -1) ||
#endif
opts.namespace);
}
static void free_oci_envp(char **p) {
char **tmp;
if (p) {
tmp = p;
while (*tmp)
free(*(tmp++));
free(p);
}
}
static void free_hooklist(struct hook_execvpe **hooklist)
{
struct hook_execvpe *cur;
if (!hooklist)
return;
cur = *hooklist;
while (cur) {
free_oci_envp(cur->argv);
free_oci_envp(cur->envp);
free(cur->file);
free(cur++);
}
free(hooklist);
}
static void free_sysctl(void) {
struct sysctl_val *cur;
if (!opts.sysctl)
return;
cur = *opts.sysctl;
while (cur) {
free(cur->entry);
free(cur->value);
free(cur++);
}
free(opts.sysctl);
}
static void free_devices(void) {
struct mknod_args **cur;
if (!opts.devices)
return;
cur = opts.devices;
while (*cur) {
free((*cur)->path);
free(*(cur++));
}
free(opts.devices);
}
static void free_rlimits(void) {
int type;
for (type = 0; type < RLIM_NLIMITS; ++type)
free(opts.rlimits[type]);
}
static void free_opts(bool parent) {
free_library_search();
mount_free();
cgroups_free();
/* we need to keep argv, envp and seccomp filter in child */
if (parent) { /* parent-only */
if (opts.ociseccomp) {
free(opts.ociseccomp->filter);
free(opts.ociseccomp);
}
free_oci_envp(opts.jail_argv);
free_oci_envp(opts.envp);
}
free_rlimits();
free_sysctl();
free_devices();
free(opts.hostname);
free(opts.cwd);
free(opts.uidmap);
free(opts.gidmap);
free(opts.annotations);
free(opts.extroot);
free(opts.overlaydir);
free(opts.infra);
free_hooklist(opts.hooks.createRuntime);
free_hooklist(opts.hooks.createContainer);
free_hooklist(opts.hooks.startContainer);
free_hooklist(opts.hooks.poststart);
free_hooklist(opts.hooks.poststop);
}
static int mount_overlay(char *jail_root, char *overlaydir) {
char *upperdir, *workdir, *optsstr, *upperetc, *upperresolvconf;
const char mountoptsformat[] = "lowerdir=%s,upperdir=%s,workdir=%s";
int ret = -1, fd;
if (asprintf(&upperdir, "%s%s", overlaydir, "/upper") < 0)
goto out;
if (asprintf(&workdir, "%s%s", overlaydir, "/work") < 0)
goto upper_printf;
if (asprintf(&optsstr, mountoptsformat, jail_root, upperdir, workdir) < 0)
goto work_printf;
if (mkdir_p(upperdir, 0755) || mkdir_p(workdir, 0755))
goto opts_printf;
/*
* make sure /etc/resolv.conf exists in overlay and is owned by jail userns root
* this is to work-around a bug in overlayfs described in the overlayfs-userns
* patch:
* 3. modification of a file 'hithere' which is in l but not yet
* in u, and which is not owned by T, is not allowed, even if
* writes to u are allowed. This may be a bug in overlayfs,
* but it is safe behavior.
*/
if (asprintf(&upperetc, "%s/etc", upperdir) < 0)
goto opts_printf;
if (mkdir_p(upperetc, 0755))
goto upper_etc_printf;
if (asprintf(&upperresolvconf, "%s/resolv.conf", upperetc) < 0)
goto upper_etc_printf;
fd = creat(upperresolvconf, 0644);
if (fd < 0) {
if (errno != EEXIST)
ERROR("creat(%s) failed: %m\n", upperresolvconf);
} else {
close(fd);
}
DEBUG("mount -t overlay %s %s (%s)\n", jail_root, jail_root, optsstr);
if (mount(jail_root, jail_root, "overlay", MS_NOATIME, optsstr))
goto upper_resolvconf_printf;
ret = 0;
upper_resolvconf_printf:
free(upperresolvconf);
upper_etc_printf:
free(upperetc);
opts_printf:
free(optsstr);
work_printf:
free(workdir);
upper_printf:
free(upperdir);
out:
return ret;
}
static void pass_console(int console_fd)
{
struct ubus_context *child_ctx = ubus_connect(NULL);
static struct blob_buf req;
uint32_t id;
if (!child_ctx)
return;
blob_buf_init(&req, 0);
blobmsg_add_string(&req, "name", opts.name);
if (ubus_lookup_id(child_ctx, "container", &id) ||
ubus_invoke_fd(child_ctx, id, "console_set", req.head, NULL, NULL, 3000, console_fd))
INFO("ubus request failed\n");
else
close(console_fd);
blob_buf_free(&req);
ubus_free(child_ctx);
}
static int create_dev_console(const char *jail_root)
{
char *console_fname;
char dev_console_path[PATH_MAX];
int slave_console_fd, dev_console_dummy;
/* Open UNIX/98 virtual console */
console_fd = posix_openpt(O_RDWR | O_NOCTTY);
if (console_fd < 0)
return -1;
console_fname = ptsname(console_fd);
DEBUG("got console fd %d and PTS client name %s\n", console_fd, console_fname);
if (!console_fname)
goto no_console;
grantpt(console_fd);
unlockpt(console_fd);
/* pass PTY master to procd */
pass_console(console_fd);
/* mount-bind PTY slave to /dev/console in jail */
snprintf(dev_console_path, sizeof(dev_console_path), "%s/dev/console", jail_root);
dev_console_dummy = creat(dev_console_path, 0620);
if (dev_console_dummy < 0)
goto no_console;
close(dev_console_dummy);
if (mount(console_fname, dev_console_path, "bind", MS_BIND, NULL))
goto no_console;
/* use PTY slave for stdio */
slave_console_fd = open(console_fname, O_RDWR); /* | O_NOCTTY */
if (slave_console_fd < 0)
goto no_console;
dup2(slave_console_fd, 0);
dup2(slave_console_fd, 1);
dup2(slave_console_fd, 2);
close(slave_console_fd);
INFO("using guest console %s\n", console_fname);
return 0;
no_console:
close(console_fd);
return 1;
}
static int hook_running = 0;
static int hook_return_code = 0;
static struct hook_execvpe **current_hook = NULL;
typedef void (*hook_return_handler)(void);
static hook_return_handler hook_return_cb = NULL;
static void hook_process_timeout_cb(struct uloop_timeout *t);
static struct uloop_timeout hook_process_timeout = {
.cb = hook_process_timeout_cb,
};
static void run_hooklist(void);
static void hook_process_handler(struct uloop_process *c, int ret)
{
uloop_timeout_cancel(&hook_process_timeout);
if (WIFEXITED(ret)) {
hook_return_code = WEXITSTATUS(ret);
if (hook_return_code)
ERROR("hook (%d) exited with exit: %d\n", c->pid, hook_return_code);
else
DEBUG("hook (%d) exited with exit: %d\n", c->pid, hook_return_code);
} else {
hook_return_code = WTERMSIG(ret);
ERROR("hook (%d) exited with signal: %d\n", c->pid, hook_return_code);
}
hook_running = 0;
++current_hook;
run_hooklist();
}
static struct uloop_process hook_process = {
.cb = hook_process_handler,
};
static void hook_process_timeout_cb(struct uloop_timeout *t)
{
DEBUG("hook process failed to stop, sending SIGKILL\n");
kill(hook_process.pid, SIGKILL);
}
static void run_hooklist(void)
{
struct hook_execvpe *hook = *current_hook;
struct stat s;
if (!hook)
return hook_return_cb();
DEBUG("executing hook %s\n", hook->file);
if (stat(hook->file, &s))
hook_process_handler(&hook_process, ENOENT);
if (!((unsigned long)s.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)))
hook_process_handler(&hook_process, EPERM);
hook_running = 1;
hook_process.pid = fork();
if (hook_process.pid == 0) {
/* child */
execve(hook->file, hook->argv, hook->envp);
ERROR("execve error %m\n");
_exit(errno);
} else if (hook_process.pid < 0) {
/* fork error */
ERROR("hook fork error\n");
hook_running = 0;
hook_process_handler(&hook_process, errno);
}
/* parent */
uloop_process_add(&hook_process);
if (hook->timeout > 0)
uloop_timeout_set(&hook_process_timeout, 1000 * hook->timeout);
uloop_run();
if (hook_running) {
DEBUG("uloop interrupted, killing jail process\n");
kill(hook_process.pid, SIGTERM);
uloop_timeout_set(&hook_process_timeout, 1000);
uloop_run();
}
}
static void run_hooks(struct hook_execvpe **hooklist, hook_return_handler return_cb)
{
if (!hooklist)
return_cb();
current_hook = hooklist;
hook_return_cb = return_cb;
run_hooklist();
}
static int apply_sysctl(const char *jail_root)
{
struct sysctl_val **cur;
char *procdir, *fname;
int f;
if (!opts.sysctl)
return 0;
if (asprintf(&procdir, "%s/proc", jail_root) < 0)
return ENOMEM;
mkdir(procdir, 0700);
if (mount("proc", procdir, "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0))
return EPERM;
cur = opts.sysctl;
while (*cur) {
if (asprintf(&fname, "%s/sys/%s", procdir, (*cur)->entry) < 0)
return ENOMEM;
DEBUG("sysctl: writing '%s' to %s\n", (*cur)->value, fname);
f = open(fname, O_WRONLY);
if (f < 0) {
ERROR("sysctl: can't open %s\n", fname);
free(fname);
return errno;
}
if (write(f, (*cur)->value, strlen((*cur)->value)) < 0) {
ERROR("sysctl: write to %s\n", fname);
free(fname);
close(f);
return errno;
}
free(fname);
close(f);
++cur;
}
umount(procdir);
rmdir(procdir);
free(procdir);
return 0;
}
/* glibc defines makedev calling a function. make sure it's a pure macro */
#if defined(__GLIBC__)
#undef makedev
/* from musl's sys/sysmacros.h */
#define makedev(x,y) ( \
(((x)&0xfffff000ULL) << 32) | \
(((x)&0x00000fffULL) << 8) | \
(((y)&0xffffff00ULL) << 12) | \
(((y)&0x000000ffULL)) )
#endif
static struct mknod_args default_devices[] = {
{ .path = "/dev/null", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 3) },
{ .path = "/dev/zero", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 5) },
{ .path = "/dev/full", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 7) },
{ .path = "/dev/random", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 8) },
{ .path = "/dev/urandom", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 9) },
{ .path = "/dev/tty", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(5, 0), .gid = 5 },
{ 0 },
};
static int create_devices(void)
{
struct mknod_args **cur, *curdef;
char *path, *tmp;
int ret;
if (!opts.devices)
goto only_default_devices;
cur = opts.devices;
while (*cur) {
path = (*cur)->path;
/* don't allow devices outside of /dev */
if (strncmp(path, "/dev", 4))
return EPERM;
/* make sure parent folder exists */
tmp = strrchr(path, '/');
if (!tmp)
return EINVAL;
*tmp = '\0';
if (strcmp(path, "/dev")) {
DEBUG("creating directory %s\n", path);
mkdir_p(path, 0755);
}
*tmp = '/';
DEBUG("creating %s (mode=%08o)\n", path, (*cur)->mode);
/* create device */
if (mknod(path, (*cur)->mode, (*cur)->dev))
return errno;
/* change owner, if needed */
if (((*cur)->uid || (*cur)->gid) &&
chown(path, (*cur)->uid, (*cur)->gid))
return errno;
++cur;
}
only_default_devices:
curdef = default_devices;
while(curdef->path) {
DEBUG("creating %s (mode=%08o)\n", curdef->path, curdef->mode);
if (mknod(curdef->path, curdef->mode, curdef->dev)) {
++curdef;
continue; /* may already exist, eg. due to a bind-mount */
}
if ((curdef->uid || curdef->gid) &&
chown(curdef->path, curdef->uid, curdef->gid))
return errno;
++curdef;
}
/* Dev symbolic links as defined in OCI spec */
ret = symlink("/dev/pts/ptmx", "/dev/ptmx");
if (ret < 0)
WARNING("symlink() failed to create link to /dev/pts/ptmx");
ret = symlink("/proc/self/fd", "/dev/fd");
if (ret < 0)
WARNING("symlink() failed to create link to /proc/self/fd");
ret = symlink("/proc/self/fd/0", "/dev/stdin");
if (ret < 0)
WARNING("symlink() failed to create link to /proc/self/fd/0");
ret = symlink("/proc/self/fd/1", "/dev/stdout");
if (ret < 0)
WARNING("symlink() failed to create link to /proc/self/fd/1");
ret = symlink("/proc/self/fd/2", "/dev/stderr");
if (ret < 0)
WARNING("symlink() failed to create link to /proc/self/fd/2");
return 0;
}
static char jail_root[] = "/tmp/ujail-XXXXXX";
static char tmpovdir[] = "/tmp/ujail-overlay-XXXXXX";
static mode_t old_umask;
static void enter_jail_fs(void);
static int build_jail_fs(void)
{
char *overlaydir = NULL;
int ret;
old_umask = umask(0);
if (mkdtemp(jail_root) == NULL) {
ERROR("mkdtemp(%s) failed: %m\n", jail_root);
return -1;
}
if (apply_sysctl(jail_root)) {
ERROR("failed to apply sysctl values\n");
return -1;
}
/* oldroot can't be MS_SHARED else pivot_root() fails */
if (mount("none", "/", "none", MS_REC|MS_PRIVATE, NULL)) {
ERROR("private mount failed %m\n");
return -1;
}
if (opts.extroot) {
if (mount(opts.extroot, jail_root, "bind", MS_BIND, NULL)) {
ERROR("extroot mount failed %m\n");
return -1;
}
} else {
if (mount("tmpfs", jail_root, "tmpfs", MS_NOATIME, "mode=0755")) {
ERROR("tmpfs mount failed %m\n");
return -1;
}
}
if (opts.tmpoverlaysize) {
char mountoptsstr[] = "mode=0755,size=XXXXXXXX";
snprintf(mountoptsstr, sizeof(mountoptsstr),
"mode=0755,size=%s", opts.tmpoverlaysize);
if (mkdtemp(tmpovdir) == NULL) {
ERROR("mkdtemp(%s) failed: %m\n", jail_root);
return -1;
}
if (mount("tmpfs", tmpovdir, "tmpfs", MS_NOATIME,
mountoptsstr)) {
ERROR("failed to mount tmpfs for overlay (size=%s)\n", opts.tmpoverlaysize);
return -1;
}
overlaydir = tmpovdir;
}
if (opts.overlaydir)
overlaydir = opts.overlaydir;
if (overlaydir) {
ret = mount_overlay(jail_root, overlaydir);
if (ret)
return ret;
}
if (chdir(jail_root)) {
ERROR("chdir(%s) (jail_root) failed: %m\n", jail_root);
return -1;
}
if (mount_all(jail_root)) {
ERROR("mount_all() failed\n");
return -1;
}
if (opts.console)
create_dev_console(jail_root);
/* make sure /etc/resolv.conf exists if in new network namespace */
if (opts.namespace & CLONE_NEWNET) {
char jailetc[PATH_MAX], jaillink[PATH_MAX];
snprintf(jailetc, PATH_MAX, "%s/etc", jail_root);
mkdir_p(jailetc, 0755);
snprintf(jaillink, PATH_MAX, "%s/etc/resolv.conf", jail_root);
if (overlaydir)
unlink(jaillink);
ret = symlink("../dev/resolv.conf.d/resolv.conf.auto", jaillink);
if (ret < 0)
WARNING("symlink() failed to create link to ../dev/resolv.conf.d/resolv.conf.auto");
}
run_hooks(opts.hooks.createContainer, enter_jail_fs);
return 0;
}
static bool exit_from_child;
static void free_and_exit(int ret)
{
if (!exit_from_child && opts.ocibundle)
cgroups_free();
if (!exit_from_child && parent_ctx)
ubus_free(parent_ctx);
free_opts(!exit_from_child);
exit(ret);
}
static void post_jail_fs(void);
static void enter_jail_fs(void)
{
char dirbuf[sizeof(jail_root) + 4];
snprintf(dirbuf, sizeof(dirbuf), "%s/old", jail_root);
mkdir(dirbuf, 0755);
if (pivot_root(jail_root, dirbuf) == -1) {
ERROR("pivot_root(%s, %s) failed: %m\n", jail_root, dirbuf);
free_and_exit(-1);
}
if (chdir("/")) {
ERROR("chdir(/) (after pivot_root) failed: %m\n");
free_and_exit(-1);
}
snprintf(dirbuf, sizeof(dirbuf), "/old%s", jail_root);
umount2(dirbuf, MNT_DETACH);
rmdir(dirbuf);
if (opts.tmpoverlaysize) {
char tmpdirbuf[sizeof(tmpovdir) + 4];
snprintf(tmpdirbuf, sizeof(tmpdirbuf), "/old%s", tmpovdir);
umount2(tmpdirbuf, MNT_DETACH);
rmdir(tmpdirbuf);
}
umount2("/old", MNT_DETACH);
rmdir("/old");
if (create_devices()) {
ERROR("create_devices() failed\n");
free_and_exit(-1);
}
if (opts.ronly)
mount(NULL, "/", "bind", MS_REMOUNT | MS_BIND | MS_RDONLY, 0);
umask(old_umask);
post_jail_fs();
}
static int write_uid_gid_map(pid_t child_pid, bool gidmap, char *mapstr)
{
int map_file;
char map_path[64];
if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s",
child_pid, gidmap?"gid_map":"uid_map") < 0)
return -1;
if ((map_file = open(map_path, O_WRONLY)) < 0)
return -1;
if (dprintf(map_file, "%s", mapstr)) {
close(map_file);
return -1;
}
close(map_file);
return 0;
}
static int write_single_uid_gid_map(pid_t child_pid, bool gidmap, int id)
{
int map_file;
char map_path[64];
const char *map_format = "%d %d %d\n";
if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s",
child_pid, gidmap?"gid_map":"uid_map") < 0)
return -1;
if ((map_file = open(map_path, O_WRONLY)) < 0)
return -1;
if (dprintf(map_file, map_format, 0, id, 1) < 0) {
close(map_file);
return -1;
}
close(map_file);
return 0;
}
static int write_setgroups(pid_t child_pid, bool allow)
{
int setgroups_file;
char setgroups_path[64];
if (snprintf(setgroups_path, sizeof(setgroups_path), "/proc/%d/setgroups",
child_pid) < 0) {
return -1;
}
if ((setgroups_file = open(setgroups_path, O_WRONLY)) < 0) {
return -1;
}
if (dprintf(setgroups_file, "%s", allow?"allow":"deny") == -1) {
close(setgroups_file);
return -1;
}
close(setgroups_file);
return 0;
}
static void get_jail_user(int *user, int *user_gid, int *gr_gid)
{
struct passwd *p = NULL;
struct group *g = NULL;
if (opts.user) {
p = getpwnam(opts.user);
if (!p) {
ERROR("failed to get uid/gid for user %s: %d (%s)\n",
opts.user, errno, strerror(errno));
free_and_exit(EXIT_FAILURE);
}
*user = p->pw_uid;
*user_gid = p->pw_gid;
} else {
*user = -1;
*user_gid = -1;
}
if (opts.group) {
g = getgrnam(opts.group);
if (!g) {
ERROR("failed to get gid for group %s: %m\n", opts.group);
free_and_exit(EXIT_FAILURE);
}
*gr_gid = g->gr_gid;
} else {
*gr_gid = -1;
}
};
static void set_jail_user(int pw_uid, int user_gid, int gr_gid)
{
if (opts.user && (user_gid != -1) && initgroups(opts.user, user_gid)) {
ERROR("failed to initgroups() for user %s: %m\n", opts.user);
free_and_exit(EXIT_FAILURE);
}
if ((gr_gid != -1) && setregid(gr_gid, gr_gid)) {
ERROR("failed to set group id %d: %m\n", gr_gid);
free_and_exit(EXIT_FAILURE);
}
if ((pw_uid != -1) && setreuid(pw_uid, pw_uid)) {
ERROR("failed to set user id %d: %m\n", pw_uid);
free_and_exit(EXIT_FAILURE);
}
}
static int apply_rlimits(void)
{
int resource;
for (resource = 0; resource < RLIM_NLIMITS; ++resource) {
if (opts.rlimits[resource])
DEBUG("applying limits to resource %u\n", resource);
if (opts.rlimits[resource] &&
setrlimit(resource, opts.rlimits[resource]))
return errno;
}
return 0;
}
#define MAX_ENVP 64
static char** build_envp(const char *seccomp, char **ocienvp)
{
static char *envp[MAX_ENVP];
static char preload_var[PATH_MAX];
static char seccomp_var[PATH_MAX];
static char seccomp_debug_var[20];
static char debug_var[] = "LD_DEBUG=all";
static char container_var[] = "container=ujail";
const char *preload_lib = find_lib("libpreload-seccomp.so");
char **addenv;
int count = 0;
if (seccomp && !preload_lib) {
ERROR("failed to add preload-lib to env\n");
return NULL;
}
if (seccomp) {
snprintf(seccomp_var, sizeof(seccomp_var), "SECCOMP_FILE=%s", seccomp);
envp[count++] = seccomp_var;
snprintf(seccomp_debug_var, sizeof(seccomp_debug_var), "SECCOMP_DEBUG=%2d", debug);
envp[count++] = seccomp_debug_var;
snprintf(preload_var, sizeof(preload_var), "LD_PRELOAD=%s", preload_lib);
envp[count++] = preload_var;
}
envp[count++] = container_var;
if (debug > 1)
envp[count++] = debug_var;
addenv = ocienvp;
while (addenv && *addenv) {
envp[count++] = *(addenv++);
if (count >= MAX_ENVP) {
ERROR("environment limited to %d extra records, truncating\n", MAX_ENVP);
break;
}
}
return envp;
}
static void usage(void)
{
fprintf(stderr, "ujail <options> -- <binary> <params ...>\n");
fprintf(stderr, " -d <num>\tshow debug log (increase num to increase verbosity)\n");
fprintf(stderr, " -S <file>\tseccomp filter config\n");
fprintf(stderr, " -C <file>\tcapabilities drop config\n");
fprintf(stderr, " -c\t\tset PR_SET_NO_NEW_PRIVS\n");
fprintf(stderr, " -n <name>\tthe name of the jail\n");
fprintf(stderr, " -e <var>\timport environment variable\n");
fprintf(stderr, "namespace jail options:\n");
fprintf(stderr, " -h <hostname>\tchange the hostname of the jail\n");
fprintf(stderr, " -N\t\tjail has network namespace\n");
fprintf(stderr, " -f\t\tjail has user namespace\n");
fprintf(stderr, " -F\t\tjail has cgroups namespace\n");
fprintf(stderr, " -r <file>\treadonly files that should be staged\n");
fprintf(stderr, " -w <file>\twriteable files that should be staged\n");
fprintf(stderr, " -p\t\tjail has /proc\n");
fprintf(stderr, " -s\t\tjail has /sys\n");
fprintf(stderr, " -l\t\tjail has /dev/log\n");
fprintf(stderr, " -u\t\tjail has a ubus socket\n");
fprintf(stderr, " -U <name>\tuser to run jailed process\n");
fprintf(stderr, " -G <name>\tgroup to run jailed process\n");
fprintf(stderr, " -o\t\tremont jail root (/) read only\n");
fprintf(stderr, " -R <dir>\texternal jail rootfs (system container)\n");
fprintf(stderr, " -O <dir>\tdirectory for r/w overlayfs\n");
fprintf(stderr, " -T <size>\tuse tmpfs r/w overlayfs with <size>\n");
fprintf(stderr, " -E\t\tfail if jail cannot be setup\n");
fprintf(stderr, " -y\t\tprovide jail console\n");
fprintf(stderr, " -J <dir>\tcreate container from OCI bundle\n");
fprintf(stderr, " -I <infra>\tshare namespace with another container\n");
fprintf(stderr, " -i\t\tstart container immediately\n");
fprintf(stderr, " -P <pidfile>\tcreate <pidfile>\n");
fprintf(stderr, "\nWarning: by default root inside the jail is the same\n\
and he has the same powers as root outside the jail,\n\
thus he can escape the jail and/or break stuff.\n\
Please use seccomp/capabilities (-S/-C) to restrict his powers\n\n\
If you use none of the namespace jail options,\n\
ujail will not use namespace/build a jail,\n\
and will only drop capabilities/apply seccomp filter.\n\n");
}
static int* get_namespace_fd(const unsigned int nstype)
{
switch (nstype) {
case CLONE_NEWPID:
return &opts.setns.pid;
case CLONE_NEWNET:
return &opts.setns.net;
case CLONE_NEWNS:
return &opts.setns.ns;
case CLONE_NEWIPC:
return &opts.setns.ipc;
case CLONE_NEWUTS:
return &opts.setns.uts;
case CLONE_NEWUSER:
return &opts.setns.user;
case CLONE_NEWCGROUP:
return &opts.setns.cgroup;
#ifdef CLONE_NEWTIME
case CLONE_NEWTIME:
return &opts.setns.time;
#endif
default:
return NULL;
}
}
static int setns_open(unsigned long nstype)
{
int *fd = get_namespace_fd(nstype);
assert(fd != NULL);
if (*fd < 0)
return 0;
if (setns(*fd, nstype) == -1) {
close(*fd);
return errno;
}
close(*fd);
return 0;
}
static int jail_running = 0;
static int jail_return_code = 0;
static void jail_process_timeout_cb(struct uloop_timeout *t);
static struct uloop_timeout jail_process_timeout = {
.cb = jail_process_timeout_cb,
};
static void poststop(void);
static void jail_process_handler(struct uloop_process *c, int ret)
{
uloop_timeout_cancel(&jail_process_timeout);
if (WIFEXITED(ret)) {
jail_return_code = WEXITSTATUS(ret);
INFO("jail (%d) exited with exit: %d\n", c->pid, jail_return_code);
} else {
jail_return_code = WTERMSIG(ret);
INFO("jail (%d) exited with signal: %d\n", c->pid, jail_return_code);
}
jail_running = 0;
poststop();
}
static struct uloop_process jail_process = {
.cb = jail_process_handler,
};
static void jail_process_timeout_cb(struct uloop_timeout *t)
{
DEBUG("jail process failed to stop, sending SIGKILL\n");
kill(jail_process.pid, SIGKILL);
}
static void jail_handle_signal(int signo)
{
if (hook_running) {
DEBUG("forwarding signal %d to the hook process\n", signo);
kill(hook_process.pid, signo);
/* set timeout to send SIGKILL hook process in case SIGTERM doesn't succeed */
if (signo == SIGTERM)
uloop_timeout_set(&hook_process_timeout, opts.term_timeout * 1000);
}
if (jail_running) {
DEBUG("forwarding signal %d to the jailed process\n", signo);
kill(jail_process.pid, signo);
/* set timeout to send SIGKILL jail process in case SIGTERM doesn't succeed */
if (signo == SIGTERM)
uloop_timeout_set(&jail_process_timeout, opts.term_timeout * 1000);
}
}
static void signals_init(void)
{
int i;
sigset_t sigmask;
sigfillset(&sigmask);
for (i = 0; i < _NSIG; i++) {
struct sigaction s = { 0 };
if (!sigismember(&sigmask, i))
continue;
if ((i == SIGCHLD) || (i == SIGPIPE) || (i == SIGSEGV) || (i == SIGSTOP) || (i == SIGKILL))
continue;
s.sa_handler = jail_handle_signal;
sigaction(i, &s, NULL);
}
}
static void pre_exec_jail(struct uloop_timeout *t);
static struct uloop_timeout pre_exec_timeout = {
.cb = pre_exec_jail,
};
int pipes[4];
static int exec_jail(void *arg)
{
char buf[1];
exit_from_child = true;
prctl(PR_SET_SECUREBITS, 0);
uloop_init();
signals_init();
close(pipes[0]);
close(pipes[3]);
setns_open(CLONE_NEWUSER);
setns_open(CLONE_NEWNET);
setns_open(CLONE_NEWNS);
setns_open(CLONE_NEWIPC);
setns_open(CLONE_NEWUTS);
buf[0] = 'i';
if (write(pipes[1], buf, 1) < 1) {
ERROR("can't write to parent\n");
return EXIT_FAILURE;
}
close(pipes[1]);
if (read(pipes[2], buf, 1) < 1) {
ERROR("can't read from parent\n");
return EXIT_FAILURE;
}
if (buf[0] != 'O') {
ERROR("parent had an error, child exiting\n");
return EXIT_FAILURE;
}
if (opts.namespace & CLONE_NEWCGROUP)
unshare(CLONE_NEWCGROUP);
setns_open(CLONE_NEWCGROUP);
if ((opts.namespace & CLONE_NEWUSER) || (opts.setns.user != -1)) {
if (setregid(0, 0) < 0) {
ERROR("setgid\n");
free_and_exit(EXIT_FAILURE);
}
if (setreuid(0, 0) < 0) {
ERROR("setuid\n");
free_and_exit(EXIT_FAILURE);
}
if (setgroups(0, NULL) < 0) {
ERROR("setgroups\n");
free_and_exit(EXIT_FAILURE);
}
}
if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0
&& sethostname(opts.hostname, strlen(opts.hostname))) {
ERROR("sethostname(%s) failed: %m\n", opts.hostname);
free_and_exit(EXIT_FAILURE);
}
uloop_timeout_add(&pre_exec_timeout);
uloop_run();
free_and_exit(-1);
return -1;
}
static void pre_exec_jail(struct uloop_timeout *t)
{
if ((opts.namespace & CLONE_NEWNS) && build_jail_fs()) {
ERROR("failed to build jail fs\n");
free_and_exit(EXIT_FAILURE);
} else {
run_hooks(opts.hooks.createContainer, post_jail_fs);
}
}
static void post_start_hook(void);
static void post_jail_fs(void)
{
char buf[1];
if (read(pipes[2], buf, 1) < 1) {
ERROR("can't read from parent\n");
free_and_exit(EXIT_FAILURE);
}
if (buf[0] != '!') {
ERROR("parent had an error, child exiting\n");
free_and_exit(EXIT_FAILURE);
}
close(pipes[2]);
run_hooks(opts.hooks.startContainer, post_start_hook);
}
static void post_start_hook(void)
{
int pw_uid, pw_gid, gr_gid;
/*
* make sure setuid/setgid won't drop capabilities in case capabilities
* have been specified explicitely.
*/
if (opts.capset.apply) {
if (prctl(PR_SET_SECUREBITS, SECBIT_NO_SETUID_FIXUP)) {
ERROR("prctl(PR_SET_SECUREBITS) failed: %m\n");
free_and_exit(EXIT_FAILURE);
}
}
/* drop capabilities, retain those still needed to further setup jail */
if (applyOCIcapabilities(opts.capset, (1LLU << CAP_SETGID) | (1LLU << CAP_SETUID) | (1LLU << CAP_SETPCAP)))
free_and_exit(EXIT_FAILURE);
/* use either cmdline-supplied user/group or uid/gid from OCI spec */
get_jail_user(&pw_uid, &pw_gid, &gr_gid);
set_jail_user(opts.pw_uid?:pw_uid, opts.pw_gid?:pw_gid, opts.gr_gid?:gr_gid);
if (opts.additional_gids &&
(setgroups(opts.num_additional_gids, opts.additional_gids) < 0)) {
ERROR("setgroups failed: %m\n");
free_and_exit(EXIT_FAILURE);
}
if (opts.set_umask)
umask(opts.umask);
/* restore securebits back to normal (and lock them if not in userns) */
if (opts.capset.apply) {
if (prctl(PR_SET_SECUREBITS, (opts.namespace & CLONE_NEWUSER)?0:
SECBIT_KEEP_CAPS_LOCKED|SECBIT_NO_SETUID_FIXUP_LOCKED|SECBIT_NOROOT_LOCKED)) {
ERROR("prctl(PR_SET_SECUREBITS) failed: %m\n");
free_and_exit(EXIT_FAILURE);
}
}
/* drop remaining capabilities to end up with specified sets */
if (applyOCIcapabilities(opts.capset, 0))
free_and_exit(EXIT_FAILURE);
if (opts.no_new_privs && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
free_and_exit(EXIT_FAILURE);
}
char **envp = build_envp(opts.seccomp, opts.envp);
if (!envp)
free_and_exit(EXIT_FAILURE);
if (opts.cwd && chdir(opts.cwd))
free_and_exit(EXIT_FAILURE);
if (opts.ociseccomp && applyOCIlinuxseccomp(opts.ociseccomp))
free_and_exit(EXIT_FAILURE);
uloop_end();
free_opts(false);
INFO("exec-ing %s\n", *opts.jail_argv);
if (opts.envp) /* respect PATH if potentially set in ENV */
execvpe(*opts.jail_argv, opts.jail_argv, envp);
else
execve(*opts.jail_argv, opts.jail_argv, envp);
/* we get there only if execve fails */
ERROR("failed to execve %s: %m\n", *opts.jail_argv);
exit(EXIT_FAILURE);
}
int ns_open_pid(const char *nstype, const pid_t target_ns)
{
char pid_pid_path[PATH_MAX];
snprintf(pid_pid_path, sizeof(pid_pid_path), "/proc/%u/ns/%s", target_ns, nstype);
return open(pid_pid_path, O_RDONLY);
}
static int parseOCIenvarray(struct blob_attr *msg, char ***envp)
{
struct blob_attr *cur;
int sz = 0, rem;
blobmsg_for_each_attr(cur, msg, rem)
++sz;
if (sz > 0) {
*envp = calloc(1 + sz, sizeof(char*));
if (!(*envp))
return ENOMEM;
} else {
*envp = NULL;
return 0;
}
sz = 0;
blobmsg_for_each_attr(cur, msg, rem)
(*envp)[sz++] = strdup(blobmsg_get_string(cur));
if (sz)
(*envp)[sz] = NULL;
return 0;
}
enum {
OCI_ROOT_PATH,
OCI_ROOT_READONLY,
__OCI_ROOT_MAX,
};
static const struct blobmsg_policy oci_root_policy[] = {
[OCI_ROOT_PATH] = { "path", BLOBMSG_TYPE_STRING },
[OCI_ROOT_READONLY] = { "readonly", BLOBMSG_TYPE_BOOL },
};
static int parseOCIroot(const char *jsonfile, struct blob_attr *msg)
{
char extroot[PATH_MAX] = { 0 };
struct blob_attr *tb[__OCI_ROOT_MAX];
char *cur;
char *root_path;
blobmsg_parse(oci_root_policy, __OCI_ROOT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
if (!tb[OCI_ROOT_PATH])
return ENODATA;
root_path = blobmsg_get_string(tb[OCI_ROOT_PATH]);
/* prepend bundle directory in case of relative paths */
if (root_path[0] != '/') {
strncpy(extroot, jsonfile, PATH_MAX - 1);
cur = strrchr(extroot, '/');
if (!cur)
return ENOTDIR;
*(++cur) = '\0';
}
strncat(extroot, root_path, PATH_MAX - (strlen(extroot) + 1));
/* follow symbolic link(s) */
opts.extroot = realpath(extroot, NULL);
if (!opts.extroot)
return errno;
if (tb[OCI_ROOT_READONLY])
opts.ronly = blobmsg_get_bool(tb[OCI_ROOT_READONLY]);
return 0;
}
enum {
OCI_HOOK_PATH,
OCI_HOOK_ARGS,
OCI_HOOK_ENV,
OCI_HOOK_TIMEOUT,
__OCI_HOOK_MAX,
};
static const struct blobmsg_policy oci_hook_policy[] = {
[OCI_HOOK_PATH] = { "path", BLOBMSG_TYPE_STRING },
[OCI_HOOK_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
[OCI_HOOK_ENV] = { "env", BLOBMSG_TYPE_ARRAY },
[OCI_HOOK_TIMEOUT] = { "timeout", BLOBMSG_TYPE_INT32 },
};
static int parseOCIhook(struct hook_execvpe ***hooklist, struct blob_attr *msg)
{
struct blob_attr *tb[__OCI_HOOK_MAX];
struct blob_attr *cur;
int rem, ret = 0;
int idx = 0;
blobmsg_for_each_attr(cur, msg, rem)
++idx;
if (!idx)
return 0;
*hooklist = calloc(idx + 1, sizeof(struct hook_execvpe *));
idx = 0;
if (!(*hooklist))
return ENOMEM;
blobmsg_for_each_attr(cur, msg, rem) {
blobmsg_parse(oci_hook_policy, __OCI_HOOK_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[OCI_HOOK_PATH]) {
ret = EINVAL;
goto errout;
}
(*hooklist)[idx] = calloc(1, sizeof(struct hook_execvpe));
if (tb[OCI_HOOK_ARGS]) {
ret = parseOCIenvarray(tb[OCI_HOOK_ARGS], &((*hooklist)[idx]->argv));
if (ret)
goto errout;
} else {
(*hooklist)[idx]->argv = calloc(2, sizeof(char *));
((*hooklist)[idx]->argv)[0] = strdup(blobmsg_get_string(tb[OCI_HOOK_PATH]));
((*hooklist)[idx]->argv)[1] = NULL;
};
if (tb[OCI_HOOK_ENV]) {
ret = parseOCIenvarray(tb[OCI_HOOK_ENV], &((*hooklist)[idx]->envp));
if (ret)
goto errout;
}
if (tb[OCI_HOOK_TIMEOUT])
(*hooklist)[idx]->timeout = blobmsg_get_u32(tb[OCI_HOOK_TIMEOUT]);
(*hooklist)[idx]->file = strdup(blobmsg_get_string(tb[OCI_HOOK_PATH]));
++idx;
}
(*hooklist)[idx] = NULL;
DEBUG("added %d hooks\n", idx);
return 0;
errout:
free_hooklist(*hooklist);
*hooklist = NULL;
return ret;
};
enum {
OCI_HOOKS_PRESTART,
OCI_HOOKS_CREATERUNTIME,
OCI_HOOKS_CREATECONTAINER,
OCI_HOOKS_STARTCONTAINER,
OCI_HOOKS_POSTSTART,
OCI_HOOKS_POSTSTOP,
__OCI_HOOKS_MAX,
};
static const struct blobmsg_policy oci_hooks_policy[] = {
[OCI_HOOKS_PRESTART] = { "prestart", BLOBMSG_TYPE_ARRAY },
[OCI_HOOKS_CREATERUNTIME] = { "createRuntime", BLOBMSG_TYPE_ARRAY },
[OCI_HOOKS_CREATECONTAINER] = { "createContainer", BLOBMSG_TYPE_ARRAY },
[OCI_HOOKS_STARTCONTAINER] = { "startContainer", BLOBMSG_TYPE_ARRAY },
[OCI_HOOKS_POSTSTART] = { "poststart", BLOBMSG_TYPE_ARRAY },
[OCI_HOOKS_POSTSTOP] = { "poststop", BLOBMSG_TYPE_ARRAY },
};
static int parseOCIhooks(struct blob_attr *msg)
{
struct blob_attr *tb[__OCI_HOOKS_MAX];
int ret;
blobmsg_parse(oci_hooks_policy, __OCI_HOOKS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
if (tb[OCI_HOOKS_PRESTART])
INFO("warning: ignoring deprecated prestart hook\n");
if (tb[OCI_HOOKS_CREATERUNTIME]) {
ret = parseOCIhook(&opts.hooks.createRuntime, tb[OCI_HOOKS_CREATERUNTIME]);
if (ret)
return ret;
}
if (tb[OCI_HOOKS_CREATECONTAINER]) {
ret = parseOCIhook(&opts.hooks.createContainer, tb[OCI_HOOKS_CREATECONTAINER]);
if (ret)
goto out_createruntime;
}
if (tb[OCI_HOOKS_STARTCONTAINER]) {
ret = parseOCIhook(&opts.hooks.startContainer, tb[OCI_HOOKS_STARTCONTAINER]);
if (ret)
goto out_createcontainer;
}
if (tb[OCI_HOOKS_POSTSTART]) {
ret = parseOCIhook(&opts.hooks.poststart, tb[OCI_HOOKS_POSTSTART]);
if (ret)
goto out_startcontainer;
}
if (tb[OCI_HOOKS_POSTSTOP]) {
ret = parseOCIhook(&opts.hooks.poststop, tb[OCI_HOOKS_POSTSTOP]);
if (ret)
goto out_poststart;
}
return 0;
out_poststart:
free_hooklist(opts.hooks.poststart);
out_startcontainer:
free_hooklist(opts.hooks.startContainer);
out_createcontainer:
free_hooklist(opts.hooks.createContainer);
out_createruntime:
free_hooklist(opts.hooks.createRuntime);
return ret;
};
enum {
OCI_PROCESS_USER_UID,
OCI_PROCESS_USER_GID,
OCI_PROCESS_USER_UMASK,
OCI_PROCESS_USER_ADDITIONALGIDS,
__OCI_PROCESS_USER_MAX,
};
static const struct blobmsg_policy oci_process_user_policy[] = {
[OCI_PROCESS_USER_UID] = { "uid", BLOBMSG_TYPE_INT32 },
[OCI_PROCESS_USER_GID] = { "gid", BLOBMSG_TYPE_INT32 },
[OCI_PROCESS_USER_UMASK] = { "umask", BLOBMSG_TYPE_INT32 },
[OCI_PROCESS_USER_ADDITIONALGIDS] = { "additionalGids", BLOBMSG_TYPE_ARRAY },
};
static int parseOCIprocessuser(struct blob_attr *msg) {
struct blob_attr *tb[__OCI_PROCESS_USER_MAX];
struct blob_attr *cur;
int rem;
int has_gid = 0;
blobmsg_parse(oci_process_user_policy, __OCI_PROCESS_USER_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
if (tb[OCI_PROCESS_USER_UID])
opts.pw_uid = blobmsg_get_u32(tb[OCI_PROCESS_USER_UID]);
if (tb[OCI_PROCESS_USER_GID]) {
opts.pw_gid = blobmsg_get_u32(tb[OCI_PROCESS_USER_GID]);
opts.gr_gid = blobmsg_get_u32(tb[OCI_PROCESS_USER_GID]);
has_gid = 1;
}
if (tb[OCI_PROCESS_USER_ADDITIONALGIDS]) {
size_t gidcnt = 0;
blobmsg_for_each_attr(cur, tb[OCI_PROCESS_USER_ADDITIONALGIDS], rem) {
++gidcnt;
if (has_gid && (blobmsg_get_u32(cur) == opts.gr_gid))
continue;
}
if (gidcnt) {
opts.additional_gids = calloc(gidcnt + has_gid, sizeof(gid_t));
gidcnt = 0;
/* always add primary GID to set of GIDs if set */
if (has_gid)
opts.additional_gids[gidcnt++] = opts.gr_gid;
blobmsg_for_each_attr(cur, tb[OCI_PROCESS_USER_ADDITIONALGIDS], rem) {
if (has_gid && (blobmsg_get_u32(cur) == opts.gr_gid))
continue;
opts.additional_gids[gidcnt++] = blobmsg_get_u32(cur);
}
opts.num_additional_gids = gidcnt;
}
DEBUG("read %zu additional groups\n", gidcnt);
}
if (tb[OCI_PROCESS_USER_UMASK]) {
opts.umask = blobmsg_get_u32(tb[OCI_PROCESS_USER_UMASK]);
opts.set_umask = true;
}
return 0;
}
enum {
OCI_PROCESS_RLIMIT_TYPE,
OCI_PROCESS_RLIMIT_SOFT,
OCI_PROCESS_RLIMIT_HARD,
__OCI_PROCESS_RLIMIT_MAX,
};
static const struct blobmsg_policy oci_process_rlimit_policy[] = {
[OCI_PROCESS_RLIMIT_TYPE] = { "type", BLOBMSG_TYPE_STRING },
[OCI_PROCESS_RLIMIT_SOFT] = { "soft", BLOBMSG_CAST_INT64 },
[OCI_PROCESS_RLIMIT_HARD] = { "hard", BLOBMSG_CAST_INT64 },
};
/* from manpage GETRLIMIT(2) */
static const char* const rlimit_names[RLIM_NLIMITS] = {
[RLIMIT_AS] = "AS",
[RLIMIT_CORE] = "CORE",
[RLIMIT_CPU] = "CPU",
[RLIMIT_DATA] = "DATA",
[RLIMIT_FSIZE] = "FSIZE",
[RLIMIT_LOCKS] = "LOCKS",
[RLIMIT_MEMLOCK] = "MEMLOCK",
[RLIMIT_MSGQUEUE] = "MSGQUEUE",
[RLIMIT_NICE] = "NICE",
[RLIMIT_NOFILE] = "NOFILE",
[RLIMIT_NPROC] = "NPROC",
[RLIMIT_RSS] = "RSS",
[RLIMIT_RTPRIO] = "RTPRIO",
[RLIMIT_RTTIME] = "RTTIME",
[RLIMIT_SIGPENDING] = "SIGPENDING",
[RLIMIT_STACK] = "STACK",
};
static int resolve_rlimit(char *type) {
unsigned int rltype;
for (rltype = 0; rltype < RLIM_NLIMITS; ++rltype)
if (rlimit_names[rltype] &&
!strncmp("RLIMIT_", type, 7) &&
!strcmp(rlimit_names[rltype], type + 7))
return rltype;
return -1;
}
static int parseOCIrlimit(struct blob_attr *msg)
{
struct blob_attr *tb[__OCI_PROCESS_RLIMIT_MAX];
int limtype = -1;
struct rlimit *curlim;
blobmsg_parse(oci_process_rlimit_policy, __OCI_PROCESS_RLIMIT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
if (!tb[OCI_PROCESS_RLIMIT_TYPE] ||
!tb[OCI_PROCESS_RLIMIT_SOFT] ||
!tb[OCI_PROCESS_RLIMIT_HARD])
return ENODATA;
limtype = resolve_rlimit(blobmsg_get_string(tb[OCI_PROCESS_RLIMIT_TYPE]));
if (limtype < 0)
return EINVAL;
if (opts.rlimits[limtype])
return ENOTUNIQ;
curlim = malloc(sizeof(struct rlimit));
curlim->rlim_cur = blobmsg_cast_u64(tb[OCI_PROCESS_RLIMIT_SOFT]);
curlim->rlim_max = blobmsg_cast_u64(tb[OCI_PROCESS_RLIMIT_HARD]);
opts.rlimits[limtype] = curlim;
return 0;
};
enum {
OCI_PROCESS_ARGS,
OCI_PROCESS_CAPABILITIES,
OCI_PROCESS_CWD,
OCI_PROCESS_ENV,
OCI_PROCESS_OOMSCOREADJ,
OCI_PROCESS_NONEWPRIVILEGES,
OCI_PROCESS_RLIMITS,
OCI_PROCESS_TERMINAL,
OCI_PROCESS_USER,
__OCI_PROCESS_MAX,
};
static const struct blobmsg_policy oci_process_policy[] = {
[OCI_PROCESS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
[OCI_PROCESS_CAPABILITIES] = { "capabilities", BLOBMSG_TYPE_TABLE },
[OCI_PROCESS_CWD] = { "cwd", BLOBMSG_TYPE_STRING },
[OCI_PROCESS_ENV] = { "env", BLOBMSG_TYPE_ARRAY },
[OCI_PROCESS_OOMSCOREADJ] = { "oomScoreAdj", BLOBMSG_TYPE_INT32 },
[OCI_PROCESS_NONEWPRIVILEGES] = { "noNewPrivileges", BLOBMSG_TYPE_BOOL },
[OCI_PROCESS_RLIMITS] = { "rlimits", BLOBMSG_TYPE_ARRAY },
[OCI_PROCESS_TERMINAL] = { "terminal", BLOBMSG_TYPE_BOOL },
[OCI_PROCESS_USER] = { "user", BLOBMSG_TYPE_TABLE },
};
static int parseOCIprocess(struct blob_attr *msg)
{
struct blob_attr *tb[__OCI_PROCESS_MAX], *cur;
int rem, res;
blobmsg_parse(oci_process_policy, __OCI_PROCESS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
if (!tb[OCI_PROCESS_ARGS])
return ENOENT;
res = parseOCIenvarray(tb[OCI_PROCESS_ARGS], &opts.jail_argv);
if (res)
return res;
if (tb[OCI_PROCESS_TERMINAL])
opts.console = blobmsg_get_bool(tb[OCI_PROCESS_TERMINAL]);
if (tb[OCI_PROCESS_NONEWPRIVILEGES])
opts.no_new_privs = blobmsg_get_bool(tb[OCI_PROCESS_NONEWPRIVILEGES]);
if (tb[OCI_PROCESS_CWD])
opts.cwd = strdup(blobmsg_get_string(tb[OCI_PROCESS_CWD]));
if (tb[OCI_PROCESS_ENV]) {
res = parseOCIenvarray(tb[OCI_PROCESS_ENV], &opts.envp);
if (res)
return res;
}
if (tb[OCI_PROCESS_USER] && (res = parseOCIprocessuser(tb[OCI_PROCESS_USER])))
return res;
if (tb[OCI_PROCESS_CAPABILITIES] &&
(res = parseOCIcapabilities(&opts.capset, tb[OCI_PROCESS_CAPABILITIES])))
return res;
if (tb[OCI_PROCESS_RLIMITS]) {
blobmsg_for_each_attr(cur, tb[OCI_PROCESS_RLIMITS], rem) {
res = parseOCIrlimit(cur);
if (res)
return res;
}
}
if (tb[OCI_PROCESS_OOMSCOREADJ]) {
opts.oom_score_adj = blobmsg_get_u32(tb[OCI_PROCESS_OOMSCOREADJ]);
opts.set_oom_score_adj = true;
}
return 0;
}
enum {
OCI_LINUX_NAMESPACE_TYPE,
OCI_LINUX_NAMESPACE_PATH,
__OCI_LINUX_NAMESPACE_MAX,
};
static const struct blobmsg_policy oci_linux_namespace_policy[] = {
[OCI_LINUX_NAMESPACE_TYPE] = { "type", BLOBMSG_TYPE_STRING },
[OCI_LINUX_NAMESPACE_PATH] = { "path", BLOBMSG_TYPE_STRING },
};
static int resolve_nstype(char *type) {
if (!strcmp("pid", type))
return CLONE_NEWPID;
else if (!strcmp("network", type))
return CLONE_NEWNET;
else if (!strcmp("net", type))
return CLONE_NEWNET;
else if (!strcmp("mount", type))
return CLONE_NEWNS;
else if (!strcmp("ipc", type))
return CLONE_NEWIPC;
else if (!strcmp("uts", type))
return CLONE_NEWUTS;
else if (!strcmp("user", type))
return CLONE_NEWUSER;
else if (!strcmp("cgroup", type))
return CLONE_NEWCGROUP;
#ifdef CLONE_NEWTIME
else if (!strcmp("time", type))
return CLONE_NEWTIME;
#endif
else
return 0;
}
static int parseOCIlinuxns(struct blob_attr *msg)
{
struct blob_attr *tb[__OCI_LINUX_NAMESPACE_MAX];
int nstype;
int *setns;
int fd;
blobmsg_parse(oci_linux_namespace_policy, __OCI_LINUX_NAMESPACE_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
if (!tb[OCI_LINUX_NAMESPACE_TYPE])
return EINVAL;
nstype = resolve_nstype(blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE]));
if (!nstype)
return EINVAL;
if (opts.namespace & nstype)
return ENOTUNIQ;
setns = get_namespace_fd(nstype);
if (!setns)
return EFAULT;
if (*setns != -1)
return ENOTUNIQ;
if (tb[OCI_LINUX_NAMESPACE_PATH]) {
DEBUG("opening existing %s namespace from path %s\n",
blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE]),
blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_PATH]));
fd = open(blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_PATH]), O_RDONLY);
if (fd < 0)
return errno?:ESTALE;
if (ioctl(fd, NS_GET_NSTYPE) != nstype) {
close(fd);
return EINVAL;
}
DEBUG("opened existing %s namespace got filehandler %u\n",
blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE]),
fd);
*setns = fd;
} else {
opts.namespace |= nstype;
}
return 0;
}
/*
* join namespace of existing PID
* The string argument is the reference PID followed by ':' and a
* ',' separated list of namespaces to to join.
*/
static int jail_join_ns(char *arg)
{
pid_t pid;
int fd;
int nstype;
char *tmp, *etmp, *nspath;
int *setns;
tmp = strchr(arg, ':');
if (!tmp)
return EINVAL;
*tmp = '\0';
pid = atoi(arg);
do {
++tmp;
etmp = strchr(tmp, ',');
if (etmp)
*etmp = '\0';
nstype = resolve_nstype(tmp);
if (!nstype)
return EINVAL;
if (opts.namespace & nstype)
return ENOTUNIQ;
setns = get_namespace_fd(nstype);
if (!setns)
return EFAULT;
if (*setns != -1)
return ENOTUNIQ;
if (asprintf(&nspath, "/proc/%d/ns/%s", pid, tmp) < 0)
return ENOMEM;
fd = open(nspath, O_RDONLY);
free(nspath);
if (fd < 0)
return errno?:ESTALE;
*setns = fd;
if (etmp)
tmp = etmp;
else
tmp = NULL;
} while (tmp);
return 0;
}
static void get_jail_root_user(bool is_gidmap, uint32_t container_id, uint32_t host_id, uint32_t size)
{
if (container_id == 0 && size >= 1)
if (!is_gidmap)
opts.root_map_uid = host_id;
}
enum {
OCI_LINUX_UIDGIDMAP_CONTAINERID,
OCI_LINUX_UIDGIDMAP_HOSTID,
OCI_LINUX_UIDGIDMAP_SIZE,
__OCI_LINUX_UIDGIDMAP_MAX,
};
static const struct blobmsg_policy oci_linux_uidgidmap_policy[] = {
[OCI_LINUX_UIDGIDMAP_CONTAINERID] = { "containerID", BLOBMSG_TYPE_INT32 },
[OCI_LINUX_UIDGIDMAP_HOSTID] = { "hostID", BLOBMSG_TYPE_INT32 },
[OCI_LINUX_UIDGIDMAP_SIZE] = { "size", BLOBMSG_TYPE_INT32 },
};
static int parseOCIuidgidmappings(struct blob_attr *msg, bool is_gidmap)
{
struct blob_attr *tb[__OCI_LINUX_UIDGIDMAP_MAX];
struct blob_attr *cur;
int rem;
char *map;
size_t len, pos, totallen = 0;
blobmsg_for_each_attr(cur, msg, rem) {
blobmsg_parse(oci_linux_uidgidmap_policy, __OCI_LINUX_UIDGIDMAP_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[OCI_LINUX_UIDGIDMAP_CONTAINERID] ||
!tb[OCI_LINUX_UIDGIDMAP_HOSTID] ||
!tb[OCI_LINUX_UIDGIDMAP_SIZE])
return EINVAL;
/* count length */
totallen += snprintf(NULL, 0, "%d %d %d\n",
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]),
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]),
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE]));
}
/* allocate combined mapping string */
map = malloc(totallen + 1);
if (!map)
return ENOMEM;
pos = 0;
blobmsg_for_each_attr(cur, msg, rem) {
blobmsg_parse(oci_linux_uidgidmap_policy, __OCI_LINUX_UIDGIDMAP_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
get_jail_root_user(is_gidmap, blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]),
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]),
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE]));
/* write mapping line into pre-allocated string */
len = snprintf(&map[pos], totallen + 1, "%d %d %d\n",
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]),
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]),
blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE]));
pos += len;
totallen -= len;
}
assert(totallen == 0);
if (is_gidmap)
opts.gidmap = map;
else
opts.uidmap = map;
return 0;
}
enum {
OCI_DEVICES_TYPE,
OCI_DEVICES_PATH,
OCI_DEVICES_MAJOR,
OCI_DEVICES_MINOR,
OCI_DEVICES_FILEMODE,
OCI_DEVICES_UID,
OCI_DEVICES_GID,
__OCI_DEVICES_MAX,
};
static const struct blobmsg_policy oci_devices_policy[] = {
[OCI_DEVICES_TYPE] = { "type", BLOBMSG_TYPE_STRING },
[OCI_DEVICES_PATH] = { "path", BLOBMSG_TYPE_STRING },
[OCI_DEVICES_MAJOR] = { "major", BLOBMSG_TYPE_INT32 },
[OCI_DEVICES_MINOR] = { "minor", BLOBMSG_TYPE_INT32 },
[OCI_DEVICES_FILEMODE] = { "fileMode", BLOBMSG_TYPE_INT32 },
[OCI_DEVICES_UID] = { "uid", BLOBMSG_TYPE_INT32 },
[OCI_DEVICES_GID] = { "uid", BLOBMSG_TYPE_INT32 },
};
static mode_t resolve_devtype(char *tstr)
{
if (!strcmp("c", tstr) ||
!strcmp("u", tstr))
return S_IFCHR;
else if (!strcmp("b", tstr))
return S_IFBLK;
else if (!strcmp("p", tstr))
return S_IFIFO;
else
return 0;
}
static int parseOCIdevices(struct blob_attr *msg)
{
struct blob_attr *tb[__OCI_DEVICES_MAX];
struct blob_attr *cur;
int rem;
size_t cnt = 0;
struct mknod_args *tmp;
blobmsg_for_each_attr(cur, msg, rem)
++cnt;
opts.devices = calloc(cnt + 1, sizeof(struct mknod_args *));
cnt = 0;
blobmsg_for_each_attr(cur, msg, rem) {
blobmsg_parse(oci_devices_policy, __OCI_DEVICES_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[OCI_DEVICES_TYPE] ||
!tb[OCI_DEVICES_PATH])
return ENODATA;
tmp = calloc(1, sizeof(struct mknod_args));
if (!tmp)
return ENOMEM;
tmp->mode = resolve_devtype(blobmsg_get_string(tb[OCI_DEVICES_TYPE]));
if (!tmp->mode) {
free(tmp);
return EINVAL;
}
if (tmp->mode != S_IFIFO) {
if (!tb[OCI_DEVICES_MAJOR] || !tb[OCI_DEVICES_MINOR]) {
free(tmp);
return ENODATA;
}
tmp->dev = makedev(blobmsg_get_u32(tb[OCI_DEVICES_MAJOR]),
blobmsg_get_u32(tb[OCI_DEVICES_MINOR]));
}
if (tb[OCI_DEVICES_FILEMODE]) {
if (~(S_IRWXU|S_IRWXG|S_IRWXO) & blobmsg_get_u32(tb[OCI_DEVICES_FILEMODE])) {
free(tmp);
return EINVAL;
}
tmp->mode |= blobmsg_get_u32(tb[OCI_DEVICES_FILEMODE]);
} else {
tmp->mode |= (S_IRUSR|S_IWUSR); /* 0600 */
}
tmp->path = strdup(blobmsg_get_string(tb[OCI_DEVICES_PATH]));
if (tb[OCI_DEVICES_UID])
tmp->uid = blobmsg_get_u32(tb[OCI_DEVICES_UID]);
else
tmp->uid = -1;
if (tb[OCI_DEVICES_GID])
tmp->gid = blobmsg_get_u32(tb[OCI_DEVICES_GID]);
else
tmp->gid = -1;
DEBUG("read device %s (%s)\n", blobmsg_get_string(tb[OCI_DEVICES_PATH]), blobmsg_get_string(tb[OCI_DEVICES_TYPE]));
opts.devices[cnt++] = tmp;
}
opts.devices[cnt] = NULL;
return 0;
}
static int parseOCIsysctl(struct blob_attr *msg)
{
struct blob_attr *cur;
int rem;
char *tmp, *tc;
size_t cnt = 0;
blobmsg_for_each_attr(cur, msg, rem) {
if (!blobmsg_name(cur) || !blobmsg_get_string(cur))
return EINVAL;
++cnt;
}
if (!cnt)
return 0;
opts.sysctl = calloc(cnt + 1, sizeof(struct sysctl_val *));
if (!opts.sysctl)
return ENOMEM;
cnt = 0;
blobmsg_for_each_attr(cur, msg, rem) {
opts.sysctl[cnt] = malloc(sizeof(struct sysctl_val));
if (!opts.sysctl[cnt])
return ENOMEM;
/* replace '.' with '/' in entry name */
tc = tmp = strdup(blobmsg_name(cur));
while ((tc = strchr(tc, '.')))
*tc = '/';
opts.sysctl[cnt]->value = strdup(blobmsg_get_string(cur));
opts.sysctl[cnt]->entry = tmp;
++cnt;
}
opts.sysctl[cnt] = NULL;
return 0;
}
enum {
OCI_LINUX_CGROUPSPATH,
OCI_LINUX_RESOURCES,
OCI_LINUX_SECCOMP,
OCI_LINUX_SYSCTL,
OCI_LINUX_NAMESPACES,
OCI_LINUX_DEVICES,
OCI_LINUX_UIDMAPPINGS,
OCI_LINUX_GIDMAPPINGS,
OCI_LINUX_MASKEDPATHS,
OCI_LINUX_READONLYPATHS,
OCI_LINUX_ROOTFSPROPAGATION,
__OCI_LINUX_MAX,
};
static const struct blobmsg_policy oci_linux_policy[] = {
[OCI_LINUX_CGROUPSPATH] = { "cgroupsPath", BLOBMSG_TYPE_STRING },
[OCI_LINUX_RESOURCES] = { "resources", BLOBMSG_TYPE_TABLE },
[OCI_LINUX_SECCOMP] = { "seccomp", BLOBMSG_TYPE_TABLE },
[OCI_LINUX_SYSCTL] = { "sysctl", BLOBMSG_TYPE_TABLE },
[OCI_LINUX_NAMESPACES] = { "namespaces", BLOBMSG_TYPE_ARRAY },
[OCI_LINUX_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY },
[OCI_LINUX_UIDMAPPINGS] = { "uidMappings", BLOBMSG_TYPE_ARRAY },
[OCI_LINUX_GIDMAPPINGS] = { "gidMappings", BLOBMSG_TYPE_ARRAY },
[OCI_LINUX_MASKEDPATHS] = { "maskedPaths", BLOBMSG_TYPE_ARRAY },
[OCI_LINUX_READONLYPATHS] = { "readonlyPaths", BLOBMSG_TYPE_ARRAY },
[OCI_LINUX_ROOTFSPROPAGATION] = { "rootfsPropagation", BLOBMSG_TYPE_STRING },
};
static int parseOCIlinux(struct blob_attr *msg)
{
struct blob_attr *tb[__OCI_LINUX_MAX];
struct blob_attr *cur;
int rem;
int res = 0;
char *cgpath;
char cgfullpath[256] = "/sys/fs/cgroup";
blobmsg_parse(oci_linux_policy, __OCI_LINUX_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
if (tb[OCI_LINUX_NAMESPACES]) {
blobmsg_for_each_attr(cur, tb[OCI_LINUX_NAMESPACES], rem) {
res = parseOCIlinuxns(cur);
if (res)
return res;
}
}
if (tb[OCI_LINUX_UIDMAPPINGS]) {
res = parseOCIuidgidmappings(tb[OCI_LINUX_GIDMAPPINGS], 0);
if (res)
return res;
}
if (tb[OCI_LINUX_GIDMAPPINGS]) {
res = parseOCIuidgidmappings(tb[OCI_LINUX_GIDMAPPINGS], 1);
if (res)
return res;
}
if (tb[OCI_LINUX_READONLYPATHS]) {
blobmsg_for_each_attr(cur, tb[OCI_LINUX_READONLYPATHS], rem) {
res = add_mount(NULL, blobmsg_get_string(cur), NULL, MS_BIND | MS_REC | MS_RDONLY, 0, NULL, 0);
if (res)
return res;
}
}
if (tb[OCI_LINUX_MASKEDPATHS]) {
blobmsg_for_each_attr(cur, tb[OCI_LINUX_MASKEDPATHS], rem) {
res = add_mount((void *)(-1), blobmsg_get_string(cur), NULL, 0, 0, NULL, 0);
if (res)
return res;
}
}
if (tb[OCI_LINUX_SYSCTL]) {
res = parseOCIsysctl(tb[OCI_LINUX_SYSCTL]);
if (res)
return res;
}
if (tb[OCI_LINUX_SECCOMP]) {
opts.ociseccomp = parseOCIlinuxseccomp(tb[OCI_LINUX_SECCOMP]);
if (!opts.ociseccomp)
return EINVAL;
}
if (tb[OCI_LINUX_DEVICES]) {
res = parseOCIdevices(tb[OCI_LINUX_DEVICES]);
if (res)
return res;
}
if (tb[OCI_LINUX_CGROUPSPATH]) {
cgpath = blobmsg_get_string(tb[OCI_LINUX_CGROUPSPATH]);
if (cgpath[0] == '/') {
if (strlen(cgpath) + 1 >= (sizeof(cgfullpath) - strlen(cgfullpath)))
return E2BIG;
strcat(cgfullpath, cgpath);
} else {
strcat(cgfullpath, "/containers/");
if (strlen(opts.name) + strlen(cgpath) + 2 >= (sizeof(cgfullpath) - strlen(cgfullpath)))
return E2BIG;
strcat(cgfullpath, opts.name); /* should be container name rather than jail name */
strcat(cgfullpath, "/");
strcat(cgfullpath, cgpath);
}
} else {
strcat(cgfullpath, "/containers/");
if (2 * strlen(opts.name) + 2 >= (sizeof(cgfullpath) - strlen(cgfullpath)))
return E2BIG;
strcat(cgfullpath, opts.name); /* should be container name rather than jail name */
strcat(cgfullpath, "/");
strcat(cgfullpath, opts.name); /* should be container instance name rather than jail name */
}
cgroups_init(cgfullpath);
if (tb[OCI_LINUX_RESOURCES]) {
res = parseOCIlinuxcgroups(tb[OCI_LINUX_RESOURCES]);
if (res)
return res;
}
return 0;
}
enum {
OCI_VERSION,
OCI_HOSTNAME,
OCI_PROCESS,
OCI_ROOT,
OCI_MOUNTS,
OCI_HOOKS,
OCI_LINUX,
OCI_ANNOTATIONS,
__OCI_MAX,
};
static const struct blobmsg_policy oci_policy[] = {
[OCI_VERSION] = { "ociVersion", BLOBMSG_TYPE_STRING },
[OCI_HOSTNAME] = { "hostname", BLOBMSG_TYPE_STRING },
[OCI_PROCESS] = { "process", BLOBMSG_TYPE_TABLE },
[OCI_ROOT] = { "root", BLOBMSG_TYPE_TABLE },
[OCI_MOUNTS] = { "mounts", BLOBMSG_TYPE_ARRAY },
[OCI_HOOKS] = { "hooks", BLOBMSG_TYPE_TABLE },
[OCI_LINUX] = { "linux", BLOBMSG_TYPE_TABLE },
[OCI_ANNOTATIONS] = { "annotations", BLOBMSG_TYPE_TABLE },
};
static int parseOCI(const char *jsonfile)
{
struct blob_attr *tb[__OCI_MAX];
struct blob_attr *cur;
int rem;
int res;
blob_buf_init(&ocibuf, 0);
if (!blobmsg_add_json_from_file(&ocibuf, jsonfile)) {
res=ENOENT;
goto errout;
}
blobmsg_parse(oci_policy, __OCI_MAX, tb, blob_data(ocibuf.head), blob_len(ocibuf.head));
if (!tb[OCI_VERSION]) {
res=ENOMSG;
goto errout;
}
if (strncmp("1.0", blobmsg_get_string(tb[OCI_VERSION]), 3)) {
ERROR("unsupported ociVersion %s\n", blobmsg_get_string(tb[OCI_VERSION]));
res=ENOTSUP;
goto errout;
}
if (tb[OCI_HOSTNAME])
opts.hostname = strdup(blobmsg_get_string(tb[OCI_HOSTNAME]));
if (!tb[OCI_PROCESS]) {
res=ENODATA;
goto errout;
}
if ((res = parseOCIprocess(tb[OCI_PROCESS])))
goto errout;
if (!tb[OCI_ROOT]) {
res=ENODATA;
goto errout;
}
if ((res = parseOCIroot(jsonfile, tb[OCI_ROOT])))
goto errout;
if (!tb[OCI_MOUNTS]) {
res=ENODATA;
goto errout;
}
blobmsg_for_each_attr(cur, tb[OCI_MOUNTS], rem)
if ((res = parseOCImount(cur)))
goto errout;
if (tb[OCI_LINUX] && (res = parseOCIlinux(tb[OCI_LINUX])))
goto errout;
if (tb[OCI_HOOKS] && (res = parseOCIhooks(tb[OCI_HOOKS])))
goto errout;
if (tb[OCI_ANNOTATIONS])
opts.annotations = blob_memdup(tb[OCI_ANNOTATIONS]);
errout:
blob_buf_free(&ocibuf);
return res;
}
static int set_oom_score_adj(void)
{
int f;
char fname[32];
if (!opts.set_oom_score_adj)
return 0;
snprintf(fname, sizeof(fname), "/proc/%u/oom_score_adj", jail_process.pid);
f = open(fname, O_WRONLY | O_TRUNC);
if (f < 0)
return errno;
dprintf(f, "%d", opts.oom_score_adj);
close(f);
return 0;
}
enum {
OCI_STATE_CREATING,
OCI_STATE_CREATED,
OCI_STATE_RUNNING,
OCI_STATE_STOPPED,
};
static int jail_oci_state = OCI_STATE_CREATED;
static void pipe_send_start_container(struct uloop_timeout *t);
static struct uloop_timeout start_container_timeout = {
.cb = pipe_send_start_container,
};
static int handle_start(struct ubus_context *ctx, struct ubus_object *obj,
struct ubus_request_data *req, const char *method,
struct blob_attr *msg)
{
if (jail_oci_state != OCI_STATE_CREATED)
return UBUS_STATUS_INVALID_ARGUMENT;
uloop_timeout_add(&start_container_timeout);
return UBUS_STATUS_OK;
}
static struct blob_buf bb;
static int handle_state(struct ubus_context *ctx, struct ubus_object *obj,
struct ubus_request_data *req, const char *method,
struct blob_attr *msg)
{
char *statusstr;
switch (jail_oci_state) {
case OCI_STATE_CREATING:
statusstr = "creating";
break;
case OCI_STATE_CREATED:
statusstr = "created";
break;
case OCI_STATE_RUNNING:
statusstr = "running";
break;
case OCI_STATE_STOPPED:
statusstr = "stopped";
break;
default:
statusstr = "unknown";
}
blob_buf_init(&bb, 0);
blobmsg_add_string(&bb, "ociVersion", OCI_VERSION_STRING);
blobmsg_add_string(&bb, "id", opts.name);
blobmsg_add_string(&bb, "status", statusstr);
if (jail_oci_state == OCI_STATE_CREATED ||
jail_oci_state == OCI_STATE_RUNNING)
blobmsg_add_u32(&bb, "pid", jail_process.pid);
blobmsg_add_string(&bb, "bundle", opts.ocibundle);
if (opts.infra)
blobmsg_add_string(&bb, "infra", opts.infra);
if (opts.annotations)
blobmsg_add_blob(&bb, opts.annotations);
ubus_send_reply(ctx, req, bb.head);
return UBUS_STATUS_OK;
}
enum {
CONTAINER_KILL_ATTR_SIGNAL,
__CONTAINER_KILL_ATTR_MAX,
};
static const struct blobmsg_policy container_kill_attrs[__CONTAINER_KILL_ATTR_MAX] = {
[CONTAINER_KILL_ATTR_SIGNAL] = { "signal", BLOBMSG_TYPE_INT32 },
};
static int
container_handle_kill(struct ubus_context *ctx, struct ubus_object *obj,
struct ubus_request_data *req, const char *method,
struct blob_attr *msg)
{
struct blob_attr *tb[__CONTAINER_KILL_ATTR_MAX], *cur;
int sig = SIGTERM;
blobmsg_parse(container_kill_attrs, __CONTAINER_KILL_ATTR_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg));
cur = tb[CONTAINER_KILL_ATTR_SIGNAL];
if (cur)
sig = blobmsg_get_u32(cur);
if (jail_oci_state == OCI_STATE_CREATING)
return UBUS_STATUS_NOT_FOUND;
if (kill(jail_process.pid, sig) == 0)
return 0;
switch (errno) {
case EINVAL: return UBUS_STATUS_INVALID_ARGUMENT;
case EPERM: return UBUS_STATUS_PERMISSION_DENIED;
case ESRCH: return UBUS_STATUS_NOT_FOUND;
}
return UBUS_STATUS_UNKNOWN_ERROR;
}
static int
jail_writepid(pid_t pid)
{
FILE *_pidfile;
if (!opts.pidfile)
return 0;
_pidfile = fopen(opts.pidfile, "w");
if (_pidfile == NULL)
return errno;
if (fprintf(_pidfile, "%d\n", pid) < 0) {
fclose(_pidfile);
return errno;
}
if (fclose(_pidfile))
return errno;
return 0;
}
static int checkpath(const char *path)
{
int dirfd = open(path, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
if (dirfd < 0) {
ERROR("path %s open failed %m\n", path);
return -1;
}
close(dirfd);
return 0;
}
static struct ubus_method container_methods[] = {
UBUS_METHOD_NOARG("start", handle_start),
UBUS_METHOD_NOARG("state", handle_state),
UBUS_METHOD("kill", container_handle_kill, container_kill_attrs),
};
static struct ubus_object_type container_object_type =
UBUS_OBJECT_TYPE("container", container_methods);
static struct ubus_object container_object = {
.type = &container_object_type,
.methods = container_methods,
.n_methods = ARRAY_SIZE(container_methods),
};
static void post_main(struct uloop_timeout *t);
static struct uloop_timeout post_main_timeout = {
.cb = post_main,
};
static int netns_fd;
static int pidns_fd;
#ifdef CLONE_NEWTIME
static int timens_fd;
#endif
static void post_create_runtime(void);
struct env_e {
struct list_head list;
char *envarg;
};
int main(int argc, char **argv)
{
uid_t uid = getuid();
const char log[] = "/dev/log";
const char ubus[] = "/var/run/ubus/ubus.sock";
int ret = EXIT_FAILURE;
int ch;
char *tmp;
struct list_head envl = LIST_HEAD_INIT(envl);
struct env_e *enve, *tmpenve;
unsigned short int envn = 0, envc = 0;
if (uid) {
ERROR("not root, aborting: %m\n");
return EXIT_FAILURE;
}
/* those are filehandlers, so -1 indicates unused */
opts.setns.pid = -1;
opts.setns.net = -1;
opts.setns.ns = -1;
opts.setns.ipc = -1;
opts.setns.uts = -1;
opts.setns.user = -1;
opts.setns.cgroup = -1;
#ifdef CLONE_NEWTIME
opts.setns.time = -1;
#endif
/* default 5 seconds timeout after SIGTERM before SIGKILL is sent */
opts.term_timeout = 5;
umask(022);
mount_list_init();
init_library_search();
cgroups_prepare();
exit_from_child = false;
while ((ch = getopt(argc, argv, OPT_ARGS)) != -1) {
switch (ch) {
case 'd':
debug = atoi(optarg);
break;
case 'e':
enve = calloc(1, sizeof(*enve));
enve->envarg = optarg;
list_add_tail(&enve->list, &envl);
break;
case 'p':
opts.namespace |= CLONE_NEWNS;
opts.procfs = 1;
break;
case 'o':
opts.namespace |= CLONE_NEWNS;
opts.ronly = 1;
break;
case 'f':
opts.namespace |= CLONE_NEWUSER;
break;
case 'F':
opts.namespace |= CLONE_NEWCGROUP;
break;
case 'R':
opts.extroot = realpath(optarg, NULL);
break;
case 's':
opts.namespace |= CLONE_NEWNS;
opts.sysfs = 1;
break;
case 'S':
opts.seccomp = optarg;
add_mount_bind(optarg, 1, -1);
break;
case 'C':
opts.capabilities = optarg;
break;
case 'c':
opts.no_new_privs = 1;
break;
case 'n':
opts.name = optarg;
break;
case 'N':
opts.namespace |= CLONE_NEWNET;
break;
case 'h':
opts.namespace |= CLONE_NEWUTS;
opts.hostname = strdup(optarg);
break;
case 'j':
jail_join_ns(optarg);
break;
case 'r':
opts.namespace |= CLONE_NEWNS;
tmp = strchr(optarg, ':');
if (tmp) {
*(tmp++) = '\0';
add_2paths_and_deps(optarg, tmp, 1, 0, 0);
} else {
add_path_and_deps(optarg, 1, 0, 0);
}
break;
case 'w':
opts.namespace |= CLONE_NEWNS;
tmp = strchr(optarg, ':');
if (tmp) {
*(tmp++) = '\0';
add_2paths_and_deps(optarg, tmp, 0, 0, 0);
} else {
add_path_and_deps(optarg, 0, 0, 0);
}
break;
case 'u':
opts.namespace |= CLONE_NEWNS;
add_mount_bind(ubus, 0, -1);
break;
case 'l':
opts.namespace |= CLONE_NEWNS;
add_mount_bind(log, 0, -1);
break;
case 'U':
opts.user = optarg;
break;
case 'G':
opts.group = optarg;
break;
case 'O':
opts.overlaydir = realpath(optarg, NULL);
break;
case 't':
opts.term_timeout = atoi(optarg);
break;
case 'T':
opts.tmpoverlaysize = optarg;
break;
case 'E':
opts.require_jail = 1;
break;
case 'y':
opts.console = 1;
break;
case 'J':
opts.ocibundle = optarg;
break;
case 'I':
opts.infra = optarg;
break;
case 'i':
opts.immediately = true;
break;
case 'P':
opts.pidfile = optarg;
break;
}
}
if (opts.namespace && !opts.ocibundle)
opts.namespace |= CLONE_NEWIPC | CLONE_NEWPID;
/*
* env import from cmdline is not available for OCI containers
*/
if (opts.ocibundle && !list_empty(&envl)) {
ret=-ENOTSUP;
goto errout;
}
/*
* prepare list of env variables to import for slim containers
*/
if (!list_empty(&envl)) {
list_for_each_entry(enve, &envl, list)
++envn;
opts.envp = calloc(1 + envn, sizeof(char*));
list_for_each_entry_safe(enve, tmpenve, &envl, list) {
tmp = getenv(enve->envarg);
if (tmp) {
ret = asprintf(&opts.envp[envc++], "%s=%s", enve->envarg, tmp);
if (ret < 0) {
ERROR("filed to handle envargs %s\n", tmp);
free(enve);
goto errout;
}
}
list_del(&enve->list);
free(enve);
}
opts.envp[envc] = NULL;
}
/*
* uid in parent user namespace representing root user in new
* user namespace, defaults to nobody unless specified in uidMappings
*/
opts.root_map_uid = 65534;
if (opts.capabilities && parseOCIcapabilities_from_file(&opts.capset, opts.capabilities)) {
ERROR("failed to read capabilities from file %s\n", opts.capabilities);
ret=-1;
goto errout;
}
if (opts.ocibundle) {
char *jsonfile;
int ocires;
if (!opts.name) {
ERROR("OCI bundle needs a named jail\n");
ret=-1;
goto errout;
}
if (asprintf(&jsonfile, "%s/config.json", opts.ocibundle) < 0) {
ret=-ENOMEM;
goto errout;
}
ocires = parseOCI(jsonfile);
free(jsonfile);
if (ocires) {
ERROR("parsing of OCI JSON spec has failed: %s (%d)\n", strerror(ocires), ocires);
ret=ocires;
goto errout;
}
}
if (opts.namespace & CLONE_NEWNET) {
if (!opts.name) {
ERROR("netns needs a named jail\n");
ret=-1;
goto errout;
}
}
if (opts.tmpoverlaysize && strlen(opts.tmpoverlaysize) > 8) {
ERROR("size parameter too long: \"%s\"\n", opts.tmpoverlaysize);
ret=-1;
goto errout;
}
if (opts.extroot && checkpath(opts.extroot)) {
ERROR("invalid rootfs path '%s'", opts.extroot);
ret=-1;
goto errout;
}
if (opts.overlaydir && checkpath(opts.overlaydir)) {
ERROR("invalid rootfs overlay path '%s'", opts.overlaydir);
ret=-1;
goto errout;
}
/* no <binary> param found */
if (!opts.ocibundle && (argc - optind < 1)) {
usage();
ret=EXIT_FAILURE;
goto errout;
}
if (!(opts.ocibundle||opts.namespace||opts.capabilities||opts.seccomp||
(opts.setns.net != -1) ||
(opts.setns.ns != -1) ||
(opts.setns.ipc != -1) ||
(opts.setns.uts != -1) ||
(opts.setns.user != -1) ||
(opts.setns.cgroup != -1))) {
ERROR("Not using namespaces, capabilities or seccomp !!!\n\n");
usage();
ret=EXIT_FAILURE;
goto errout;
}
DEBUG("Using namespaces(0x%08x), capabilities(%d), seccomp(%d)\n",
opts.namespace,
opts.capset.apply,
opts.seccomp != 0 || opts.ociseccomp != 0);
uloop_init();
signals_init();
parent_ctx = ubus_connect(NULL);
ubus_add_uloop(parent_ctx);
if (opts.ocibundle) {
char *objname;
if (asprintf(&objname, "container.%s", opts.name) < 0) {
ret=-ENOMEM;
goto errout;
}
container_object.name = objname;
ret = ubus_add_object(parent_ctx, &container_object);
if (ret) {
ERROR("Failed to add object: %s\n", ubus_strerror(ret));
ret=-1;
goto errout;
}
}
/* deliberately not using 'else' on unrelated conditional branches */
if (!opts.ocibundle) {
/* allocate NULL-terminated array for argv */
opts.jail_argv = calloc(1 + argc - optind, sizeof(void *));
if (!opts.jail_argv) {
ret=EXIT_FAILURE;
goto errout;
}
for (size_t s = optind; s < argc; s++)
opts.jail_argv[s - optind] = strdup(argv[s]);
if (opts.namespace & CLONE_NEWUSER)
get_jail_user(&opts.pw_uid, &opts.pw_gid, &opts.gr_gid);
}
if (!opts.extroot) {
if (opts.namespace && add_path_and_deps(*opts.jail_argv, 1, -1, 0)) {
ERROR("failed to load dependencies\n");
ret=-1;
goto errout;
}
}
if (opts.namespace && opts.seccomp && add_path_and_deps("libpreload-seccomp.so", 1, -1, 1)) {
ERROR("failed to load libpreload-seccomp.so\n");
opts.seccomp = 0;
if (opts.require_jail) {
ret=-1;
goto errout;
}
}
uloop_timeout_add(&post_main_timeout);
uloop_run();
errout:
if (opts.ocibundle)
cgroups_free();
free_opts(true);
return ret;
}
static void post_main(struct uloop_timeout *t)
{
if (apply_rlimits()) {
ERROR("error applying resource limits\n");
free_and_exit(EXIT_FAILURE);
}
if (opts.name)
prctl(PR_SET_NAME, opts.name, NULL, NULL, NULL);
if (pipe(&pipes[0]) < 0 || pipe(&pipes[2]) < 0)
free_and_exit(-1);
if (has_namespaces()) {
if (opts.namespace & CLONE_NEWNS) {
if (!opts.extroot && (opts.user || opts.group)) {
add_mount_bind("/etc/passwd", 1, -1);
add_mount_bind("/etc/group", 1, -1);
}
#if defined(__GLIBC__)
if (!opts.extroot)
add_mount_bind("/etc/nsswitch.conf", 1, -1);
#endif
if (opts.setns.ns == -1) {
if (!(opts.namespace & CLONE_NEWNET)) {
add_mount_bind("/etc/resolv.conf", 1, 0);
} else {
/* new mount namespace to provide /dev/resolv.conf.d */
char hostdir[PATH_MAX];
snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name);
mkdir_p(hostdir, 0755);
add_mount(hostdir, "/dev/resolv.conf.d", NULL,
MS_BIND | MS_NOEXEC | MS_NOATIME | MS_NOSUID | MS_NODEV | MS_RDONLY, 0, NULL, 0);
}
}
/* default mounts */
add_mount(NULL, "/dev", "tmpfs", MS_NOATIME | MS_NOEXEC | MS_NOSUID, 0, "size=1M", -1);
add_mount(NULL, "/dev/pts", "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, 0, "newinstance,ptmxmode=0666,mode=0620,gid=5", 0);
if (opts.procfs || opts.ocibundle) {
add_mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0, NULL, -1);
/*
* hack to make /proc/sys/net read-write while the rest of /proc/sys is read-only
* which cannot be expressed with OCI spec, but happends to be very useful.
* Only apply it if '/proc/sys' is not already listed as mount, maskedPath or
* readonlyPath.
* If not running in a new network namespace, only make /proc/sys read-only.
* If running in a new network namespace, temporarily stash (ie. mount-bind)
* /proc/sys/net into (totally unrelated, but surely existing) /proc/self/net.
* Then we mount-bind /proc/sys read-only and then mount-move /proc/self/net into
* /proc/sys/net.
* This works because mounts are executed in incrementing strcmp() order and
* /proc/self/net appears there before /proc/sys/net and hence the operation
* succeeds as the bind-mount of /proc/self/net is performed first and then
* move-mount of /proc/sys/net follows because 'e' preceeds 'y' in the ASCII
* table (and in the alphabet).
*/
if (!add_mount(NULL, "/proc/sys", NULL, MS_BIND | MS_RDONLY, 0, NULL, -1))
if (opts.namespace & CLONE_NEWNET)
if (!add_mount_inner("/proc/self/net", "/proc/sys/net", NULL, MS_MOVE, 0, NULL, -1))
add_mount_inner("/proc/sys/net", "/proc/self/net", NULL, MS_BIND, 0, NULL, -1);
}
if (opts.sysfs || opts.ocibundle)
add_mount("sysfs", "/sys", "sysfs", MS_RELATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, 0, NULL, -1);
if (opts.ocibundle)
add_mount("shm", "/dev/shm", "tmpfs", MS_NOSUID | MS_NOEXEC | MS_NODEV, 0, "mode=1777", -1);
}
if (opts.setns.pid != -1) {
pidns_fd = ns_open_pid("pid", getpid());
setns_open(CLONE_NEWPID);
} else {
pidns_fd = -1;
}
#ifdef CLONE_NEWTIME
if (opts.setns.time != -1) {
timens_fd = ns_open_pid("time", getpid());
setns_open(CLONE_NEWTIME);
} else {
timens_fd = -1;
}
#endif
if (opts.namespace & CLONE_NEWUSER) {
if (prctl(PR_SET_SECUREBITS, SECBIT_NO_SETUID_FIXUP)) {
ERROR("prctl(PR_SET_SECUREBITS) failed: %m\n");
free_and_exit(EXIT_FAILURE);
}
if (seteuid(opts.root_map_uid)) {
ERROR("seteuid(%d) failed: %m\n", opts.root_map_uid);
free_and_exit(EXIT_FAILURE);
}
}
jail_process.pid = clone(exec_jail, child_stack + STACK_SIZE, SIGCHLD | (opts.namespace & (~CLONE_NEWCGROUP)), NULL);
} else {
jail_process.pid = fork();
}
if (jail_process.pid > 0) {
/* parent process */
char sig_buf[1];
uloop_process_add(&jail_process);
jail_running = 1;
if (seteuid(0)) {
ERROR("seteuid(%d) failed: %m\n", opts.root_map_uid);
free_and_exit(EXIT_FAILURE);
}
prctl(PR_SET_SECUREBITS, 0);
if (pidns_fd != -1) {
setns(pidns_fd, CLONE_NEWPID);
close(pidns_fd);
}
#ifdef CLONE_NEWTIME
if (timens_fd != -1) {
setns(timens_fd, CLONE_NEWTIME);
close(timens_fd);
}
#endif
if (opts.setns.net != -1)
close(opts.setns.net);
if (opts.setns.ns != -1)
close(opts.setns.ns);
if (opts.setns.ipc != -1)
close(opts.setns.ipc);
if (opts.setns.uts != -1)
close(opts.setns.uts);
if (opts.setns.user != -1)
close(opts.setns.user);
if (opts.setns.cgroup != -1)
close(opts.setns.cgroup);
close(pipes[1]);
close(pipes[2]);
if (read(pipes[0], sig_buf, 1) < 1) {
ERROR("can't read from child\n");
free_and_exit(-1);
}
close(pipes[0]);
set_oom_score_adj();
if (opts.ocibundle)
cgroups_apply(jail_process.pid);
if (opts.namespace & CLONE_NEWUSER) {
if (write_setgroups(jail_process.pid, true)) {
ERROR("can't write setgroups\n");
free_and_exit(-1);
}
if (!opts.uidmap) {
bool has_gr = (opts.gr_gid != -1);
if (opts.pw_uid != -1) {
write_single_uid_gid_map(jail_process.pid, 0, opts.pw_uid);
write_single_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:opts.pw_gid);
} else {
write_single_uid_gid_map(jail_process.pid, 0, 65534);
write_single_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:65534);
}
} else {
write_uid_gid_map(jail_process.pid, 0, opts.uidmap);
if (opts.gidmap)
write_uid_gid_map(jail_process.pid, 1, opts.gidmap);
}
}
if (opts.namespace & CLONE_NEWNET)
jail_network_start(parent_ctx, opts.name, jail_process.pid);
if (jail_writepid(jail_process.pid)) {
ERROR("failed to write pidfile: %m\n");
free_and_exit(-1);
}
} else if (jail_process.pid == 0) {
/* fork child process */
free_and_exit(exec_jail(NULL));
} else {
ERROR("failed to clone/fork: %m\n");
free_and_exit(EXIT_FAILURE);
}
run_hooks(opts.hooks.createRuntime, post_create_runtime);
}
static void post_poststart(void);
static void post_create_runtime(void)
{
char sig_buf[1];
sig_buf[0] = 'O';
if (write(pipes[3], sig_buf, 1) < 0) {
ERROR("can't write to child\n");
free_and_exit(-1);
}
jail_oci_state = OCI_STATE_CREATED;
if (opts.ocibundle && !opts.immediately)
uloop_run(); /* wait for 'start' command via ubus */
else
pipe_send_start_container(NULL);
}
static void pipe_send_start_container(struct uloop_timeout *t)
{
char sig_buf[1];
jail_oci_state = OCI_STATE_RUNNING;
sig_buf[0] = '!';
if (write(pipes[3], sig_buf, 1) < 0) {
ERROR("can't write to child\n");
free_and_exit(-1);
}
close(pipes[3]);
run_hooks(opts.hooks.poststart, post_poststart);
}
static void post_poststart(void)
{
uloop_run(); /* idle here while jail is running */
if (jail_running) {
DEBUG("uloop interrupted, killing jail process\n");
kill(jail_process.pid, SIGTERM);
uloop_timeout_set(&jail_process_timeout, 1000);
uloop_run();
}
uloop_done();
poststop();
}
static void post_poststop(void);
static void poststop(void) {
if (opts.namespace & CLONE_NEWNET) {
setns(netns_fd, CLONE_NEWNET);
jail_network_stop();
close(netns_fd);
}
run_hooks(opts.hooks.poststop, post_poststop);
}
static void post_poststop(void)
{
free_opts(true);
if (parent_ctx)
ubus_free(parent_ctx);
exit(jail_return_code);
}
/*
* Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 2.1
* as published by the Free Software Foundation
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <glob.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <signal.h>
#include <termios.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sched.h>
#include <libubus.h>
#include <libubox/avl-cmp.h>
#include <libubox/blobmsg.h>
#include <libubox/blobmsg_json.h>
#include <libubox/ustream.h>
#include "log.h"
#define UXC_VERSION "0.3"
#define OCI_VERSION_STRING "1.0.2"
#define UXC_ETC_CONFDIR "/etc/uxc"
#define UXC_VOL_CONFDIR "/tmp/run/uvol/.meta/uxc"
static bool verbose = false;
static bool json_output = false;
static char *confdir = UXC_ETC_CONFDIR;
static struct ustream_fd cufd;
static struct ustream_fd lufd;
struct runtime_state {
struct avl_node avl;
char *container_name;
char *instance_name;
char *jail_name;
bool running;
int runtime_pid;
int exitcode;
struct blob_attr *ocistate;
};
struct settings {
struct avl_node avl;
char *container_name;
const char *fname;
char *infra;
char *tmprwsize;
char *writepath;
signed char autostart;
struct blob_attr *volumes;
};
enum uxc_cmd {
CMD_ATTACH,
CMD_EXEC,
CMD_LIST,
CMD_BOOT,
CMD_START,
CMD_STATE,
CMD_KILL,
CMD_ENABLE,
CMD_DISABLE,
CMD_DELETE,
CMD_CREATE,
CMD_RESTART,
CMD_UNKNOWN
};
#define OPT_ARGS "ab:fjm:p:t:vVw:"
static struct option long_options[] = {
{"autostart", no_argument, 0, 'a' },
{"console", no_argument, 0, 'c' },
{"bundle", required_argument, 0, 'b' },
{"force", no_argument, 0, 'f' },
{"json", no_argument, 0, 'j' },
{"mounts", required_argument, 0, 'm' },
{"pid-file", required_argument, 0, 'p' },
{"temp-overlay-size", required_argument, 0, 't' },
{"write-overlay-path", required_argument, 0, 'w' },
{"verbose", no_argument, 0, 'v' },
{"version", no_argument, 0, 'V' },
{"infra", required_argument, 0, 'I' },
{0, 0, 0, 0 }
};
AVL_TREE(runtime, avl_strcmp, false, NULL);
AVL_TREE(settings, avl_strcmp, false, NULL);
static struct blob_buf conf;
static struct blob_buf settingsbuf;
static struct blob_attr *blockinfo;
static struct blob_attr *fstabinfo;
static struct ubus_context *ctx;
static int usage(void) {
printf("syntax: uxc <command> [parameters ...]\n");
printf("commands:\n");
printf("\tlist [--json]\t\t\t\tlist all configured containers\n");
printf("\tattach <conf>\t\t\t\tattach to container console\n");
printf("\texec <conf> [cmd]\t\t\texecute command or shell in container\n");
printf("\tcreate <conf>\t\t\t\t(re-)create <conf>\n");
printf("\t\t[--bundle <path>]\t\t\tOCI bundle at <path>\n");
printf("\t\t[--autostart]\t\t\t\tstart on boot\n");
printf("\t\t[--temp-overlay-size <size>]\t\tuse tmpfs overlay with {size}\n");
printf("\t\t[--write-overlay-path <path>]\t\tuse overlay on {path}\n");
printf("\t\t[--mounts <v1>,<v2>,...,<vN>]\t\trequire filesystems to be available\n");
printf("\t\t[--infra <conf>\t\t\tuse shared namespace of other container\n");
printf("\tstart [--console] <conf>\t\tstart container <conf>\n");
printf("\tstate <conf>\t\t\t\tget state of container <conf>\n");
printf("\tkill <conf> [<signal>]\t\t\tsend signal to container <conf>\n");
printf("\trestart [--console] <conf> [<signal>]\trestart container <conf>\n");
printf("\tenable <conf>\t\t\t\tstart container <conf> on boot\n");
printf("\tdisable <conf>\t\t\t\tdon't start container <conf> on boot\n");
printf("\tdelete <conf> [--force]\t\t\tdelete <conf>\n");
return -EINVAL;
}
enum {
CONF_NAME,
CONF_PATH,
CONF_JAIL,
CONF_INFRA,
CONF_AUTOSTART,
CONF_PIDFILE,
CONF_TEMP_OVERLAY_SIZE,
CONF_WRITE_OVERLAY_PATH,
CONF_VOLUMES,
__CONF_MAX,
};
static const struct blobmsg_policy conf_policy[__CONF_MAX] = {
[CONF_NAME] = { .name = "name", .type = BLOBMSG_TYPE_STRING },
[CONF_PATH] = { .name = "path", .type = BLOBMSG_TYPE_STRING },
[CONF_JAIL] = { .name = "jail", .type = BLOBMSG_TYPE_STRING },
[CONF_INFRA] = { .name = "infra", .type = BLOBMSG_TYPE_STRING },
[CONF_AUTOSTART] = { .name = "autostart", .type = BLOBMSG_TYPE_BOOL },
[CONF_PIDFILE] = { .name = "pidfile", .type = BLOBMSG_TYPE_STRING },
[CONF_TEMP_OVERLAY_SIZE] = { .name = "temp-overlay-size", .type = BLOBMSG_TYPE_STRING },
[CONF_WRITE_OVERLAY_PATH] = { .name = "write-overlay-path", .type = BLOBMSG_TYPE_STRING },
[CONF_VOLUMES] = { .name = "volumes", .type = BLOBMSG_TYPE_ARRAY },
};
static int open_ns(int pid, char *name) {
char *path;
if (asprintf(&path, "/proc/%d/%s", pid, name) == -1 ) {
fprintf(stderr, "cannot allocate path /proc/%d/%s\n", pid, name);
return -1;
}
struct stat st;
if (stat(path, &st) == 0 && S_ISLNK(st.st_mode)) {
fprintf(stderr, "file /proc/%d/%s does not exists or is not a symbolic link\n", pid, name);
return -1;
}
int fd = open(path, O_RDONLY);
if ( fd < 0 ) {
fprintf(stderr, "cannot open /proc/%d/%s\n", pid, name);
return fd;
}
return fd;
}
static int conf_load(bool load_settings)
{
int gl_flags = GLOB_NOESCAPE | GLOB_MARK;
int j, res;
glob_t gl;
char *globstr;
void *c, *o;
struct stat sb;
struct blob_buf *target;
if (asprintf(&globstr, "%s/%s*.json", UXC_ETC_CONFDIR, load_settings?"settings/":"") == -1)
return -ENOMEM;
res = glob(globstr, gl_flags, NULL, &gl);
if (res == 0)
gl_flags |= GLOB_APPEND;
free(globstr);
if (!stat(UXC_VOL_CONFDIR, &sb)) {
if (sb.st_mode & S_IFDIR) {
if (asprintf(&globstr, "%s/%s*.json", UXC_VOL_CONFDIR, load_settings?"settings/":"") == -1)
return -ENOMEM;
res = glob(globstr, gl_flags, NULL, &gl);
free(globstr);
}
}
target = load_settings ? &settingsbuf : &conf;
blob_buf_init(target, 0);
c = blobmsg_open_table(target, NULL);
if (res < 0)
return 0;
for (j = 0; j < gl.gl_pathc; j++) {
o = blobmsg_open_table(target, strdup(gl.gl_pathv[j]));
if (!blobmsg_add_json_from_file(target, gl.gl_pathv[j])) {
ERROR("uxc: failed to load %s\n", gl.gl_pathv[j]);
continue;
}
blobmsg_close_table(target, o);
}
blobmsg_close_table(target, c);
globfree(&gl);
return 0;
}
static struct settings *
settings_alloc(const char *container_name)
{
struct settings *s;
char *new_name;
s = calloc_a(sizeof(*s), &new_name, strlen(container_name) + 1);
strcpy(new_name, container_name);
s->container_name = new_name;
s->avl.key = s->container_name;
s->autostart = -1;
s->infra = NULL;
s->tmprwsize = NULL;
s->writepath = NULL;
s->volumes = NULL;
return s;
}
static int settings_add(void)
{
struct blob_attr *cur, *tb[__CONF_MAX];
struct settings *s;
int rem, err;
avl_init(&settings, avl_strcmp, false, NULL);
blobmsg_for_each_attr(cur, blob_data(settingsbuf.head), rem) {
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[CONF_NAME])
continue;
if (tb[CONF_TEMP_OVERLAY_SIZE] && tb[CONF_WRITE_OVERLAY_PATH])
return -EINVAL;
s = settings_alloc(blobmsg_get_string(tb[CONF_NAME]));
if (tb[CONF_INFRA])
s->infra = blobmsg_get_string(tb[CONF_INFRA]);
if (tb[CONF_AUTOSTART])
s->autostart = blobmsg_get_bool(tb[CONF_AUTOSTART]);
if (tb[CONF_TEMP_OVERLAY_SIZE])
s->tmprwsize = blobmsg_get_string(tb[CONF_TEMP_OVERLAY_SIZE]);
if (tb[CONF_WRITE_OVERLAY_PATH])
s->writepath = blobmsg_get_string(tb[CONF_WRITE_OVERLAY_PATH]);
s->volumes = tb[CONF_VOLUMES];
s->fname = blobmsg_name(cur);
err = avl_insert(&settings, &s->avl);
if (err) {
fprintf(stderr, "error adding settings for %s\n", blobmsg_get_string(tb[CONF_NAME]));
free(s);
}
}
return 0;
}
static void settings_free(void)
{
struct settings *item, *tmp;
avl_for_each_element_safe(&settings, item, avl, tmp) {
avl_delete(&settings, &item->avl);
free(item);
}
return;
}
enum {
LIST_INSTANCES,
__LIST_MAX,
};
static const struct blobmsg_policy list_policy[__LIST_MAX] = {
[LIST_INSTANCES] = { .name = "instances", .type = BLOBMSG_TYPE_TABLE },
};
enum {
INSTANCE_RUNNING,
INSTANCE_PID,
INSTANCE_EXITCODE,
INSTANCE_JAIL,
__INSTANCE_MAX,
};
static const struct blobmsg_policy instance_policy[__INSTANCE_MAX] = {
[INSTANCE_RUNNING] = { .name = "running", .type = BLOBMSG_TYPE_BOOL },
[INSTANCE_PID] = { .name = "pid", .type = BLOBMSG_TYPE_INT32 },
[INSTANCE_EXITCODE] = { .name = "exit_code", .type = BLOBMSG_TYPE_INT32 },
[INSTANCE_JAIL] = { .name = "jail", .type = BLOBMSG_TYPE_TABLE },
};
enum {
JAIL_NAME,
__JAIL_MAX,
};
static const struct blobmsg_policy jail_policy[__JAIL_MAX] = {
[JAIL_NAME] = { .name = "name", .type = BLOBMSG_TYPE_STRING },
};
static struct runtime_state *
runtime_alloc(const char *container_name)
{
struct runtime_state *s;
char *new_name;
s = calloc_a(sizeof(*s), &new_name, strlen(container_name) + 1);
strcpy(new_name, container_name);
s->container_name = new_name;
s->avl.key = s->container_name;
return s;
}
enum {
STATE_OCIVERSION,
STATE_ID,
STATE_STATUS,
STATE_PID,
STATE_BUNDLE,
STATE_INFRA,
STATE_ANNOTATIONS,
__STATE_MAX,
};
static const struct blobmsg_policy state_policy[__STATE_MAX] = {
[STATE_OCIVERSION] = { .name = "ociVersion", .type = BLOBMSG_TYPE_STRING },
[STATE_ID] = { .name = "id", .type = BLOBMSG_TYPE_STRING },
[STATE_STATUS] = { .name = "status", .type = BLOBMSG_TYPE_STRING },
[STATE_PID] = { .name = "pid", .type = BLOBMSG_TYPE_INT32 },
[STATE_BUNDLE] = { .name = "bundle", .type = BLOBMSG_TYPE_STRING },
[STATE_INFRA] = { .name = "infra", .type = BLOBMSG_TYPE_STRING },
[STATE_ANNOTATIONS] = { .name = "annotations", .type = BLOBMSG_TYPE_TABLE },
};
static void ocistate_cb(struct ubus_request *req, int type, struct blob_attr *msg)
{
struct blob_attr **ocistate = (struct blob_attr **)req->priv;
struct blob_attr *tb[__STATE_MAX];
blobmsg_parse(state_policy, __STATE_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
if (!tb[STATE_OCIVERSION] ||
!tb[STATE_ID] ||
!tb[STATE_STATUS] ||
!tb[STATE_BUNDLE])
return;
*ocistate = blob_memdup(msg);
}
static void get_ocistate(struct blob_attr **ocistate, const char *name)
{
char *objname;
unsigned int id;
int ret;
*ocistate = NULL;
if (asprintf(&objname, "container.%s", name) == -1)
exit(ENOMEM);
ret = ubus_lookup_id(ctx, objname, &id);
free(objname);
if (ret)
return;
ubus_invoke(ctx, id, "state", NULL, ocistate_cb, ocistate, 3000);
}
static void list_cb(struct ubus_request *req, int type, struct blob_attr *msg)
{
struct blob_attr *cur, *curi, *tl[__LIST_MAX], *ti[__INSTANCE_MAX], *tj[__JAIL_MAX];
int rem, remi;
const char *container_name, *instance_name, *jail_name;
bool running;
int pid, exitcode;
struct runtime_state *rs;
blobmsg_for_each_attr(cur, msg, rem) {
container_name = blobmsg_name(cur);
blobmsg_parse(list_policy, __LIST_MAX, tl, blobmsg_data(cur), blobmsg_len(cur));
if (!tl[LIST_INSTANCES])
continue;
blobmsg_for_each_attr(curi, tl[LIST_INSTANCES], remi) {
instance_name = blobmsg_name(curi);
blobmsg_parse(instance_policy, __INSTANCE_MAX, ti, blobmsg_data(curi), blobmsg_len(curi));
if (!ti[INSTANCE_JAIL])
continue;
blobmsg_parse(jail_policy, __JAIL_MAX, tj, blobmsg_data(ti[INSTANCE_JAIL]), blobmsg_len(ti[INSTANCE_JAIL]));
if (!tj[JAIL_NAME])
continue;
jail_name = blobmsg_get_string(tj[JAIL_NAME]);
running = ti[INSTANCE_RUNNING] && blobmsg_get_bool(ti[INSTANCE_RUNNING]);
if (ti[INSTANCE_PID])
pid = blobmsg_get_u32(ti[INSTANCE_PID]);
else
pid = -1;
if (ti[INSTANCE_EXITCODE])
exitcode = blobmsg_get_u32(ti[INSTANCE_EXITCODE]);
else
exitcode = -1;
rs = runtime_alloc(container_name);
rs->instance_name = strdup(instance_name);
rs->jail_name = strdup(jail_name);
rs->runtime_pid = pid;
rs->exitcode = exitcode;
rs->running = running;
avl_insert(&runtime, &rs->avl);
}
}
return;
}
static int runtime_load(void)
{
struct runtime_state *item, *tmp;
uint32_t id;
avl_init(&runtime, avl_strcmp, false, NULL);
if (ubus_lookup_id(ctx, "container", &id) ||
ubus_invoke(ctx, id, "list", NULL, list_cb, &runtime, 3000))
return -EIO;
avl_for_each_element_safe(&runtime, item, avl, tmp)
get_ocistate(&item->ocistate, item->jail_name);
return 0;
}
static void runtime_free(void)
{
struct runtime_state *item, *tmp;
avl_for_each_element_safe(&runtime, item, avl, tmp) {
avl_delete(&runtime, &item->avl);
free(item->instance_name);
free(item->jail_name);
free(item->ocistate);
free(item);
}
return;
}
static inline int setup_tios(int fd, struct termios *oldtios)
{
struct termios newtios;
if (!isatty(fd)) {
return -EIO;
}
/* Get current termios */
if (tcgetattr(fd, oldtios) < 0)
return -errno;
newtios = *oldtios;
/* We use the same settings that ssh does. */
newtios.c_iflag |= IGNPAR;
newtios.c_iflag &= ~(ISTRIP | INLCR | IGNCR | ICRNL | IXON | IXANY | IXOFF);
newtios.c_lflag &= ~(TOSTOP | ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHONL);
newtios.c_oflag &= ~ONLCR;
newtios.c_oflag |= OPOST;
newtios.c_cc[VMIN] = 1;
newtios.c_cc[VTIME] = 0;
/* Set new attributes */
if (tcsetattr(fd, TCSAFLUSH, &newtios) < 0)
return -errno;
return 0;
}
static void client_cb(struct ustream *s, int bytes)
{
char *buf;
int len, rv;
do {
buf = ustream_get_read_buf(s, &len);
if (!buf)
break;
rv = ustream_write(&lufd.stream, buf, len, false);
if (rv > 0)
ustream_consume(s, rv);
if (rv <= len)
break;
} while(1);
}
static void local_cb(struct ustream *s, int bytes)
{
char *buf;
int len, rv;
do {
buf = ustream_get_read_buf(s, &len);
if (!buf)
break;
if ((len > 0) && (buf[0] == 2))
uloop_end();
rv = ustream_write(&cufd.stream, buf, len, false);
if (rv > 0)
ustream_consume(s, rv);
if (rv <= len)
break;
} while(1);
}
static int uxc_attach(const char *container_name)
{
struct ubus_context *ctx;
uint32_t id;
static struct blob_buf req;
int client_fd, server_fd, tty_fd;
struct termios oldtermios;
ctx = ubus_connect(NULL);
if (!ctx) {
fprintf(stderr, "can't connect to ubus!\n");
return -ECONNREFUSED;
}
/* open pseudo-terminal pair */
client_fd = posix_openpt(O_RDWR | O_NOCTTY);
if (client_fd < 0) {
fprintf(stderr, "can't create virtual console!\n");
ubus_free(ctx);
return -EIO;
}
setup_tios(client_fd, &oldtermios);
grantpt(client_fd);
unlockpt(client_fd);
server_fd = open(ptsname(client_fd), O_RDWR | O_NOCTTY);
if (server_fd < 0) {
fprintf(stderr, "can't open virtual console!\n");
close(client_fd);
ubus_free(ctx);
return -EIO;
}
setup_tios(server_fd, &oldtermios);
tty_fd = open("/dev/tty", O_RDWR);
if (tty_fd < 0) {
fprintf(stderr, "can't open local console!\n");
close(server_fd);
close(client_fd);
ubus_free(ctx);
return -EIO;
}
setup_tios(tty_fd, &oldtermios);
/* register server-side with procd */
blob_buf_init(&req, 0);
blobmsg_add_string(&req, "name", container_name);
blobmsg_add_string(&req, "instance", container_name);
if (ubus_lookup_id(ctx, "container", &id) ||
ubus_invoke_fd(ctx, id, "console_attach", req.head, NULL, NULL, 3000, server_fd)) {
fprintf(stderr, "ubus request failed\n");
close(tty_fd);
close(server_fd);
close(client_fd);
blob_buf_free(&req);
ubus_free(ctx);
return -ENXIO;
}
close(server_fd);
blob_buf_free(&req);
ubus_free(ctx);
uloop_init();
/* forward between stdio and client_fd until detach is requested */
lufd.stream.notify_read = local_cb;
ustream_fd_init(&lufd, tty_fd);
cufd.stream.notify_read = client_cb;
/* ToDo: handle remote close and other events */
// cufd.stream.notify_state = client_state_cb;
ustream_fd_init(&cufd, client_fd);
fprintf(stderr, "attaching to jail console. press [CTRL]+[B] to exit.\n");
close(0);
close(1);
close(2);
uloop_run();
tcsetattr(tty_fd, TCSAFLUSH, &oldtermios);
ustream_free(&lufd.stream);
ustream_free(&cufd.stream);
close(client_fd);
return 0;
}
static int uxc_exec(const char *container_name, char **args)
{
struct blob_attr *cur, *tb[__CONF_MAX], *ts[__STATE_MAX];
struct runtime_state *rsstate = NULL;
int rem, container_pid;
bool found = false;
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[CONF_NAME] || !tb[CONF_PATH])
continue;
if (strcmp(container_name, blobmsg_get_string(tb[CONF_NAME])))
continue;
found = true;
break;
}
if (!found)
return -ENOENT;
rsstate = avl_find_element(&runtime, container_name, rsstate, avl);
container_pid = 0;
if (rsstate && rsstate->ocistate) {
blobmsg_parse(state_policy, __STATE_MAX, ts, blobmsg_data(rsstate->ocistate), blobmsg_len(rsstate->ocistate));
container_pid = blobmsg_get_u32(ts[STATE_PID]);
}
if (container_pid < 2) {
fprintf(stderr, "failed to container pid for %s\n", container_name);
return -ENOENT;
}
int ns_ipc = open_ns(container_pid, "ns/ipc");
int ns_mnt = open_ns(container_pid, "ns/mnt");
int ns_net = open_ns(container_pid, "ns/net");
int ns_uts = open_ns(container_pid, "ns/uts");
int ns_pid = open_ns(container_pid, "ns/pid");
int ns_root = open_ns(container_pid, "root");
if (ns_ipc == -1 || ns_mnt == -1 || ns_net == -1 || ns_uts == -1 || ns_pid == -1 || ns_root == -1)
return -ENXIO;
if (setns(ns_ipc, 0) == -1) {
fprintf(stderr, "failed to enter ipc namespace\n");
return -ENXIO;
}
if (setns(ns_mnt, 0) == -1) {
fprintf(stderr, "failed to enter mnt namespace\n");
return -ENXIO;
}
if (setns(ns_net, 0) == -1) {
fprintf(stderr, "failed to enter net namespace\n");
return -ENXIO;
}
if (setns(ns_uts, 0) == -1) {
fprintf(stderr, "failed to enter uts namespace\n");
return -ENXIO;
}
if (setns(ns_pid, 0) == -1) {
fprintf(stderr, "failed to enter pid namespace\n");
return -ENXIO;
}
if (fchdir(ns_root) == -1) {
fprintf(stderr, "failed to change working directory\n");
return -ENXIO;
}
if (chroot(".") == -1) {
fprintf(stderr, "failed to chroot\n");
return -ENXIO;
}
if (execv(args[0], args) == -1) {
fprintf(stderr, "failed to execute %s in container %s\n", args[0], container_name);
return -ENXIO;
}
return 0;
}
static int uxc_state(char *name)
{
struct runtime_state *rsstate = avl_find_element(&runtime, name, rsstate, avl);
struct blob_attr *ocistate = NULL;
struct blob_attr *cur, *tb[__CONF_MAX];
int rem;
char *bundle = NULL;
char *jail_name = NULL;
char *state = NULL;
char *infra = NULL;
char *tmp;
static struct blob_buf buf;
if (rsstate)
ocistate = rsstate->ocistate;
if (ocistate) {
state = blobmsg_format_json_indent(ocistate, true, 0);
if (!state)
return -ENOMEM;
printf("%s\n", state);
free(state);
return 0;
}
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[CONF_NAME] || !tb[CONF_PATH])
continue;
if (!strcmp(name, blobmsg_get_string(tb[CONF_NAME]))) {
if (tb[CONF_JAIL])
jail_name = blobmsg_get_string(tb[CONF_JAIL]);
else
jail_name = name;
if (tb[CONF_INFRA])
infra = blobmsg_get_string(tb[CONF_INFRA]);
bundle = blobmsg_get_string(tb[CONF_PATH]);
break;
}
}
if (!bundle)
return -ENOENT;
blob_buf_init(&buf, 0);
blobmsg_add_string(&buf, "ociVersion", OCI_VERSION_STRING);
blobmsg_add_string(&buf, "id", jail_name);
blobmsg_add_string(&buf, "status", rsstate?"stopped":"uninitialized");
blobmsg_add_string(&buf, "bundle", bundle);
if (infra)
blobmsg_add_string(&buf, "infra", infra);
tmp = blobmsg_format_json_indent(buf.head, true, 0);
if (!tmp) {
blob_buf_free(&buf);
return -ENOMEM;
}
printf("%s\n", tmp);
free(tmp);
blob_buf_free(&buf);
return 0;
}
static int uxc_list(void)
{
struct blob_attr *cur, *tb[__CONF_MAX], *ts[__STATE_MAX];
int rem;
struct runtime_state *rsstate = NULL;
struct settings *usettings = NULL;
char *name, *ocistatus, *status, *tmp;
int container_pid = -1;
bool autostart;
static struct blob_buf buf;
void *arr, *obj;
if (json_output) {
blob_buf_init(&buf, 0);
arr = blobmsg_open_array(&buf, "");
}
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[CONF_NAME] || !tb[CONF_PATH])
continue;
autostart = tb[CONF_AUTOSTART] && blobmsg_get_bool(tb[CONF_AUTOSTART]);
ocistatus = NULL;
container_pid = 0;
name = blobmsg_get_string(tb[CONF_NAME]);
rsstate = avl_find_element(&runtime, name, rsstate, avl);
if (rsstate && rsstate->ocistate) {
blobmsg_parse(state_policy, __STATE_MAX, ts, blobmsg_data(rsstate->ocistate), blobmsg_len(rsstate->ocistate));
ocistatus = blobmsg_get_string(ts[STATE_STATUS]);
container_pid = blobmsg_get_u32(ts[STATE_PID]);
}
status = ocistatus?:(rsstate && rsstate->running)?"creating":"stopped";
usettings = avl_find_element(&settings, name, usettings, avl);
if (usettings && (usettings->autostart >= 0))
autostart = !!(usettings->autostart);
if (json_output) {
obj = blobmsg_open_table(&buf, "");
blobmsg_add_string(&buf, "name", name);
blobmsg_add_string(&buf, "status", status);
blobmsg_add_u8(&buf, "autostart", autostart);
} else {
printf("[%c] %s %s", autostart?'*':' ', name, status);
}
if (rsstate && !rsstate->running && (rsstate->exitcode >= 0)) {
if (json_output)
blobmsg_add_u32(&buf, "exitcode", rsstate->exitcode);
else
printf(" exitcode: %d (%s)", rsstate->exitcode, strerror(rsstate->exitcode));
}
if (rsstate && rsstate->running && (rsstate->runtime_pid >= 0)) {
if (json_output)
blobmsg_add_u32(&buf, "runtime_pid", rsstate->runtime_pid);
else
printf(" runtime pid: %d", rsstate->runtime_pid);
}
if (rsstate && rsstate->running && (container_pid >= 0)) {
if (json_output)
blobmsg_add_u32(&buf, "container_pid", container_pid);
else
printf(" container pid: %d", container_pid);
}
if (!json_output)
printf("\n");
else
blobmsg_close_table(&buf, obj);
}
if (json_output) {
blobmsg_close_array(&buf, arr);
tmp = blobmsg_format_json_indent(buf.head, true, 0);
if (!tmp) {
blob_buf_free(&buf);
return -ENOMEM;
}
printf("%s\n", tmp);
free(tmp);
blob_buf_free(&buf);
};
return 0;
}
static int uxc_exists(char *name)
{
struct runtime_state *rsstate = NULL;
rsstate = avl_find_element(&runtime, name, rsstate, avl);
if (rsstate && (rsstate->running))
return -EEXIST;
return 0;
}
static int uxc_create(char *name, bool immediately)
{
static struct blob_buf req;
struct blob_attr *cur, *tb[__CONF_MAX];
int rem, ret = 0;
uint32_t id;
struct settings *usettings = NULL;
char *path = NULL, *jailname = NULL, *pidfile = NULL, *tmprwsize = NULL, *writepath = NULL, *infra = NULL;
void *in, *ins, *j;
bool found = false;
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[CONF_NAME] || !tb[CONF_PATH])
continue;
if (strcmp(name, blobmsg_get_string(tb[CONF_NAME])))
continue;
found = true;
break;
}
if (!found)
return -ENOENT;
path = blobmsg_get_string(tb[CONF_PATH]);
if (tb[CONF_PIDFILE])
pidfile = blobmsg_get_string(tb[CONF_PIDFILE]);
if (tb[CONF_TEMP_OVERLAY_SIZE])
tmprwsize = blobmsg_get_string(tb[CONF_TEMP_OVERLAY_SIZE]);
if (tb[CONF_WRITE_OVERLAY_PATH])
writepath = blobmsg_get_string(tb[CONF_WRITE_OVERLAY_PATH]);
if (tb[CONF_JAIL])
jailname = blobmsg_get_string(tb[CONF_JAIL]);
if (tb[CONF_INFRA])
infra = blobmsg_get_string(tb[CONF_INFRA]);
usettings = avl_find_element(&settings, blobmsg_get_string(tb[CONF_NAME]), usettings, avl);
if (usettings) {
if (usettings->writepath) {
writepath = usettings->writepath;
tmprwsize = NULL;
}
if (usettings->tmprwsize) {
tmprwsize = usettings->tmprwsize;
writepath = NULL;
}
if (!infra && usettings->infra)
infra = usettings->infra;
}
blob_buf_init(&req, 0);
blobmsg_add_string(&req, "name", name);
ins = blobmsg_open_table(&req, "instances");
in = blobmsg_open_table(&req, name);
blobmsg_add_string(&req, "bundle", path);
j = blobmsg_open_table(&req, "jail");
blobmsg_add_string(&req, "name", jailname?:name);
blobmsg_add_u8(&req, "immediately", immediately);
if (pidfile)
blobmsg_add_string(&req, "pidfile", pidfile);
blobmsg_close_table(&req, j);
if (writepath)
blobmsg_add_string(&req, "overlaydir", writepath);
if (tmprwsize)
blobmsg_add_string(&req, "tmpoverlaysize", tmprwsize);
if (infra)
blobmsg_add_string(&req, "infra", infra);
blobmsg_close_table(&req, in);
blobmsg_close_table(&req, ins);
if (verbose) {
char *tmp;
tmp = blobmsg_format_json_indent(req.head, true, 1);
if (!tmp)
return -ENOMEM;
fprintf(stderr, "adding container to procd:\n\t%s\n", tmp);
free(tmp);
}
if (ubus_lookup_id(ctx, "container", &id) ||
ubus_invoke(ctx, id, "add", req.head, NULL, NULL, 3000)) {
blob_buf_free(&req);
ret = -EIO;
}
return ret;
}
static int uxc_start(const char *name, bool console)
{
char *objname;
unsigned int id;
pid_t pid;
if (console) {
pid = fork();
if (pid > 0)
exit(uxc_attach(name));
}
if (asprintf(&objname, "container.%s", name) == -1)
return -ENOMEM;
if (ubus_lookup_id(ctx, objname, &id))
return -ENOENT;
free(objname);
return ubus_invoke(ctx, id, "start", NULL, NULL, NULL, 3000);
}
static int uxc_kill(char *name, int signal)
{
static struct blob_buf req;
struct blob_attr *cur, *tb[__CONF_MAX];
int rem, ret;
char *objname;
unsigned int id;
struct runtime_state *rsstate = NULL;
bool found = false;
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[CONF_NAME] || !tb[CONF_PATH])
continue;
if (strcmp(name, blobmsg_get_string(tb[CONF_NAME])))
continue;
found = true;
break;
}
if (!found)
return -ENOENT;
rsstate = avl_find_element(&runtime, name, rsstate, avl);
if (!rsstate || !(rsstate->running))
return -ENOENT;
blob_buf_init(&req, 0);
blobmsg_add_u32(&req, "signal", signal);
blobmsg_add_string(&req, "name", name);
if (asprintf(&objname, "container.%s", name) == -1)
return -ENOMEM;
ret = ubus_lookup_id(ctx, objname, &id);
free(objname);
if (ret)
return -ENOENT;
if (ubus_invoke(ctx, id, "kill", req.head, NULL, NULL, 3000))
return -EIO;
return 0;
}
static int uxc_set(char *name, char *path, signed char autostart, char *pidfile, char *tmprwsize, char *writepath, char *requiredmounts, char *infra)
{
static struct blob_buf req;
struct settings *usettings = NULL;
struct blob_attr *cur, *tb[__CONF_MAX];
int rem, ret;
const char *cfname = NULL;
const char *sfname = NULL;
char *fname = NULL;
char *curvol, *tmp, *mnttok;
void *mntarr;
int f;
struct stat sb;
/* nothing to do */
if (!path && (autostart<0) && !pidfile && !tmprwsize && !writepath && !requiredmounts && !infra)
return 0;
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[CONF_NAME] || !tb[CONF_PATH])
continue;
if (strcmp(name, blobmsg_get_string(tb[CONF_NAME])))
continue;
cfname = blobmsg_name(cur);
break;
}
if (cfname && path)
return -EEXIST;
if (!cfname && !path)
return -ENOENT;
if (path) {
if (stat(path, &sb) == -1)
return -ENOENT;
if ((sb.st_mode & S_IFMT) != S_IFDIR)
return -ENOTDIR;
}
usettings = avl_find_element(&settings, blobmsg_get_string(tb[CONF_NAME]), usettings, avl);
if (path && usettings)
return -EIO;
if (usettings) {
sfname = usettings->fname;
if (!tmprwsize && !writepath) {
if (usettings->tmprwsize) {
tmprwsize = usettings->tmprwsize;
writepath = NULL;
}
if (usettings->writepath) {
writepath = usettings->writepath;
tmprwsize = NULL;
}
}
if (usettings->autostart >= 0 && autostart < 0)
autostart = !!(usettings->autostart);
if (usettings->infra)
infra = usettings->infra;
}
if (path) {
ret = mkdir(confdir, 0755);
if (ret && errno != EEXIST)
return -errno;
if (asprintf(&fname, "%s/%s.json", confdir, name) == -1)
return -ENOMEM;
f = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (f < 0)
return -errno;
free(fname);
} else {
if (sfname) {
f = open(sfname, O_WRONLY | O_CREAT | O_TRUNC, 0644);
} else {
char *t1, *t2;
t1 = strdup(cfname);
t2 = strrchr(t1, '/');
if (!t2)
return -EINVAL;
*t2 = '\0';
if (asprintf(&t2, "%s/settings", t1) == -1)
return -ENOMEM;
ret = mkdir(t2, 0755);
if (ret && ret != EEXIST)
return -ret;
free(t2);
if (asprintf(&t2, "%s/settings/%s.json", t1, name) == -1)
return -ENOMEM;
free(t1);
f = open(t2, O_WRONLY | O_CREAT | O_TRUNC, 0644);
free(t2);
}
if (f < 0)
return -errno;
}
blob_buf_init(&req, 0);
blobmsg_add_string(&req, "name", name);
if (path)
blobmsg_add_string(&req, "path", path);
if (autostart >= 0)
blobmsg_add_u8(&req, "autostart", !!autostart);
if (pidfile)
blobmsg_add_string(&req, "pidfile", pidfile);
if (infra)
blobmsg_add_string(&req, "infra", infra);
if (tmprwsize)
blobmsg_add_string(&req, "temp-overlay-size", tmprwsize);
if (writepath)
blobmsg_add_string(&req, "write-overlay-path", writepath);
if (!requiredmounts && usettings && usettings->volumes)
blobmsg_add_blob(&req, usettings->volumes);
if (requiredmounts) {
mntarr = blobmsg_open_array(&req, "volumes");
for (mnttok = requiredmounts; ; mnttok = NULL) {
curvol = strtok_r(mnttok, ",;", &tmp);
if (!curvol)
break;
blobmsg_add_string(&req, NULL, curvol);
}
blobmsg_close_array(&req, mntarr);
}
tmp = blobmsg_format_json_indent(req.head, true, 0);
if (tmp) {
dprintf(f, "%s\n", tmp);
free(tmp);
}
blob_buf_free(&req);
close(f);
return 1;
}
enum {
BLOCK_INFO_DEVICE,
BLOCK_INFO_UUID,
BLOCK_INFO_TARGET,
BLOCK_INFO_TYPE,
BLOCK_INFO_MOUNT,
__BLOCK_INFO_MAX,
};
static const struct blobmsg_policy block_info_policy[__BLOCK_INFO_MAX] = {
[BLOCK_INFO_DEVICE] = { .name = "device", .type = BLOBMSG_TYPE_STRING },
[BLOCK_INFO_UUID] = { .name = "uuid", .type = BLOBMSG_TYPE_STRING },
[BLOCK_INFO_TARGET] = { .name = "target", .type = BLOBMSG_TYPE_STRING },
[BLOCK_INFO_TYPE] = { .name = "type", .type = BLOBMSG_TYPE_STRING },
[BLOCK_INFO_MOUNT] = { .name = "mount", .type = BLOBMSG_TYPE_STRING },
};
/* check if device 'devname' is mounted according to blockd */
static bool checkblock(const char *uuid)
{
struct blob_attr *tb[__BLOCK_INFO_MAX];
struct blob_attr *cur;
int rem;
blobmsg_for_each_attr(cur, blockinfo, rem) {
blobmsg_parse(block_info_policy, __BLOCK_INFO_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[BLOCK_INFO_UUID] || !tb[BLOCK_INFO_MOUNT])
continue;
if (!strcmp(uuid, blobmsg_get_string(tb[BLOCK_INFO_UUID])))
return false;
}
return true;
}
enum {
UCI_FSTAB_UUID,
UCI_FSTAB_ANONYMOUS,
__UCI_FSTAB_MAX,
};
static const struct blobmsg_policy uci_fstab_policy[__UCI_FSTAB_MAX] = {
[UCI_FSTAB_UUID] = { .name = "uuid", .type = BLOBMSG_TYPE_STRING },
[UCI_FSTAB_ANONYMOUS] = { .name = ".anonymous", .type = BLOBMSG_TYPE_BOOL },
};
static const char *resolveuuid(const char *volname)
{
struct blob_attr *tb[__UCI_FSTAB_MAX];
struct blob_attr *cur;
const char *mntname;
char *tmpvolname, *replc;
int rem, res;
blobmsg_for_each_attr(cur, fstabinfo, rem) {
blobmsg_parse(uci_fstab_policy, __UCI_FSTAB_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[UCI_FSTAB_UUID])
continue;
if (tb[UCI_FSTAB_ANONYMOUS] && blobmsg_get_bool(tb[UCI_FSTAB_ANONYMOUS]))
continue;
mntname = blobmsg_name(cur);
if (!mntname)
continue;
tmpvolname = strdup(volname);
while ((replc = strchr(tmpvolname, '-')))
*replc = '_';
res = strcmp(tmpvolname, mntname);
free(tmpvolname);
if (!res)
return blobmsg_get_string(tb[UCI_FSTAB_UUID]);
};
return volname;
};
/* check status of each required volume */
static bool checkvolumes(struct blob_attr *volumes)
{
struct blob_attr *cur;
int rem;
blobmsg_for_each_attr(cur, volumes, rem) {
if (checkblock(resolveuuid(blobmsg_get_string(cur))))
return true;
}
return false;
}
static void block_cb(struct ubus_request *req, int type, struct blob_attr *msg)
{
blockinfo = blob_memdup(blobmsg_data(msg));
}
static void fstab_cb(struct ubus_request *req, int type, struct blob_attr *msg)
{
fstabinfo = blob_memdup(blobmsg_data(msg));
}
static int uxc_boot(void)
{
struct blob_attr *cur, *tb[__CONF_MAX];
struct runtime_state *rsstate = NULL;
struct settings *usettings = NULL;
static struct blob_buf req;
int rem, ret = 0;
char *name;
unsigned int id;
bool autostart;
ret = ubus_lookup_id(ctx, "block", &id);
if (ret)
return -ENOENT;
ret = ubus_invoke(ctx, id, "info", NULL, block_cb, NULL, 3000);
if (ret)
return -ENXIO;
ret = ubus_lookup_id(ctx, "uci", &id);
if (ret)
return -ENOENT;
blob_buf_init(&req, 0);
blobmsg_add_string(&req, "config", "fstab");
blobmsg_add_string(&req, "type", "mount");
ret = ubus_invoke(ctx, id, "get", req.head, fstab_cb, NULL, 3000);
if (ret)
return ret;
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[CONF_NAME] || !tb[CONF_PATH])
continue;
rsstate = avl_find_element(&runtime, blobmsg_get_string(tb[CONF_NAME]), rsstate, avl);
if (rsstate)
continue;
if (tb[CONF_AUTOSTART])
autostart = blobmsg_get_bool(tb[CONF_AUTOSTART]);
usettings = avl_find_element(&settings, blobmsg_get_string(tb[CONF_NAME]), usettings, avl);
if (usettings && (usettings->autostart >= 0))
autostart = !!(usettings->autostart);
if (!autostart)
continue;
/* make sure all volumes are ready before starting */
if (tb[CONF_VOLUMES])
if (checkvolumes(tb[CONF_VOLUMES]))
continue;
if (usettings && usettings->volumes)
if (checkvolumes(usettings->volumes))
continue;
name = strdup(blobmsg_get_string(tb[CONF_NAME]));
if (uxc_exists(name))
continue;
if (uxc_create(name, true))
++ret;
free(name);
}
return ret;
}
static int uxc_delete(char *name, bool force)
{
struct blob_attr *cur, *tb[__CONF_MAX];
struct runtime_state *rsstate = NULL;
struct settings *usettings = NULL;
static struct blob_buf req;
uint32_t id;
int rem, ret = 0;
const char *cfname = NULL;
const char *sfname = NULL;
struct stat sb;
blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
if (!tb[CONF_NAME] || !tb[CONF_PATH])
continue;
if (strcmp(name, blobmsg_get_string(tb[CONF_NAME])))
continue;
cfname = blobmsg_name(cur);
break;
}
if (!cfname)
return -ENOENT;
rsstate = avl_find_element(&runtime, name, rsstate, avl);
if (rsstate && rsstate->running) {
if (force) {
ret = uxc_kill(name, SIGKILL);
if (ret)
goto errout;
} else {
ret = -EWOULDBLOCK;
goto errout;
}
}
if (rsstate) {
ret = ubus_lookup_id(ctx, "container", &id);
if (ret)
goto errout;
blob_buf_init(&req, 0);
blobmsg_add_string(&req, "name", rsstate->container_name);
blobmsg_add_string(&req, "instance", rsstate->instance_name);
if (ubus_invoke(ctx, id, "delete", req.head, NULL, NULL, 3000)) {
blob_buf_free(&req);
ret = -EIO;
goto errout;
}
}
usettings = avl_find_element(&settings, name, usettings, avl);
if (usettings)
sfname = usettings->fname;
if (sfname) {
if (stat(sfname, &sb) == -1) {
ret = -ENOENT;
goto errout;
}
if (unlink(sfname) == -1) {
ret = -errno;
goto errout;
}
}
if (stat(cfname, &sb) == -1) {
ret = -ENOENT;
goto errout;
}
if (unlink(cfname) == -1)
ret = -errno;
errout:
return ret;
}
static void reload_conf(void)
{
blob_buf_free(&conf);
conf_load(false);
settings_free();
blob_buf_free(&settingsbuf);
conf_load(true);
settings_add();
}
int main(int argc, char **argv)
{
enum uxc_cmd cmd = CMD_UNKNOWN;
int ret = -EINVAL;
char *bundle = NULL;
char *pidfile = NULL;
char *tmprwsize = NULL;
char *writepath = NULL;
char *requiredmounts = NULL;
char *infra = NULL;
signed char autostart = -1;
bool force = false;
bool console = false;
int signal = SIGTERM;
int c;
if (argc < 2)
return usage();
ctx = ubus_connect(NULL);
if (!ctx)
return -ENODEV;
ret = conf_load(false);
if (ret < 0)
goto out;
ret = conf_load(true);
if (ret < 0)
goto conf_out;
ret = settings_add();
if (ret < 0)
goto settings_out;
ret = runtime_load();
if (ret)
goto settings_avl_out;
if ( argc > 1 && !strcmp("exec", argv[1]))
cmd = CMD_EXEC;
while (cmd != CMD_EXEC) {
int option_index = 0;
c = getopt_long(argc, argv, OPT_ARGS, long_options, &option_index);
if (c == -1)
break;
switch (c) {
case 'a':
autostart = 1;
break;
case 'b':
bundle = optarg;
break;
case 'c':
console = true;
break;
case 'f':
force = true;
break;
case 'j':
json_output = true;
break;
case 'p':
pidfile = optarg;
break;
case 't':
tmprwsize = optarg;
break;
case 'v':
verbose = true;
break;
case 'V':
printf("uxc %s\n", UXC_VERSION);
exit(0);
case 'w':
writepath = optarg;
break;
case 'm':
requiredmounts = optarg;
break;
case 'I':
infra = optarg;
break;
}
}
if (optind == argc && cmd != CMD_EXEC)
goto usage_out;
if (!strcmp("list", argv[optind]))
cmd = CMD_LIST;
else if (!strcmp("attach", argv[optind]))
cmd = CMD_ATTACH;
else if (!strcmp("exec", argv[optind]))
cmd = CMD_EXEC;
else if (!strcmp("boot", argv[optind]))
cmd = CMD_BOOT;
else if(!strcmp("start", argv[optind]))
cmd = CMD_START;
else if(!strcmp("state", argv[optind]))
cmd = CMD_STATE;
else if(!strcmp("kill", argv[optind]) || !strcmp("stop", argv[optind]))
cmd = CMD_KILL;
else if(!strcmp("restart", argv[optind]))
cmd = CMD_RESTART;
else if(!strcmp("enable", argv[optind]))
cmd = CMD_ENABLE;
else if(!strcmp("disable", argv[optind]))
cmd = CMD_DISABLE;
else if(!strcmp("delete", argv[optind]))
cmd = CMD_DELETE;
else if(!strcmp("create", argv[optind]))
cmd = CMD_CREATE;
switch (cmd) {
case CMD_ATTACH:
if (optind != argc - 2)
goto usage_out;
ret = uxc_attach(argv[optind + 1]);
break;
case CMD_EXEC:
if (argc < 3)
goto usage_out;
int i;
char *cmd = argc < 4 ? "/bin/sh" : argv[3];
int cnt = argc < 5 ? 2 : ( argc - 2 );
char **args = (char **)malloc(cnt * sizeof(char*));
if (argc > 3) {
for (i = 0; i < cnt - 1; i++ )
args[i] = argv[i + 3];
} else args[0] = cmd;
args[cnt - 1] = NULL;
ret = uxc_exec(argv[optind + 1], args);
break;
case CMD_LIST:
ret = uxc_list();
break;
case CMD_BOOT:
ret = uxc_boot();
break;
case CMD_START:
if (optind != argc - 2)
goto usage_out;
ret = uxc_start(argv[optind + 1], console);
break;
case CMD_RESTART:
if (optind == (argc - 3))
signal = atoi(argv[optind + 2]);
else if (optind != argc - 2)
goto usage_out;
uxc_kill(argv[optind + 1], signal);
runtime_free(); // poll runtime
sleep(1);
runtime_load();
ret = uxc_exists(argv[optind + 1]);
if (ret)
goto runtime_out;
ret = uxc_set(argv[optind + 1], bundle, autostart, pidfile, tmprwsize, writepath, requiredmounts, infra);
if (ret < 0)
goto runtime_out;
else if (ret > 0)
reload_conf();
ret = uxc_create(argv[optind + 1], false);
if (ret != 0)
goto runtime_out;
runtime_free(); // poll runtime again
sleep(1);
runtime_load();
ret = uxc_start(argv[optind + 1], console);
break;
case CMD_STATE:
if (optind != argc - 2)
goto usage_out;
ret = uxc_state(argv[optind + 1]);
break;
case CMD_KILL:
if (optind == (argc - 3))
signal = atoi(argv[optind + 2]);
else if (optind > argc - 2)
goto usage_out;
ret = uxc_kill(argv[optind + 1], signal);
break;
case CMD_ENABLE:
if (optind != argc - 2)
goto usage_out;
ret = uxc_set(argv[optind + 1], NULL, 1, NULL, NULL, NULL, NULL, NULL);
break;
case CMD_DISABLE:
if (optind != argc - 2)
goto usage_out;
ret = uxc_set(argv[optind + 1], NULL, 0, NULL, NULL, NULL, NULL, NULL);
break;
case CMD_DELETE:
if (optind != argc - 2)
goto usage_out;
ret = uxc_delete(argv[optind + 1], force);
break;
case CMD_CREATE:
if (optind != argc - 2)
goto usage_out;
ret = uxc_exists(argv[optind + 1]);
if (ret)
goto runtime_out;
ret = uxc_set(argv[optind + 1], bundle, autostart, pidfile, tmprwsize, writepath, requiredmounts, infra);
if (ret < 0)
goto runtime_out;
if (ret > 0)
reload_conf();
ret = uxc_create(argv[optind + 1], false);
break;
default:
goto usage_out;
}
goto runtime_out;
usage_out:
ret = usage();
runtime_out:
runtime_free();
settings_avl_out:
settings_free();
settings_out:
blob_buf_free(&settingsbuf);
conf_out:
blob_buf_free(&conf);
out:
ubus_free(ctx);
if (ret < 0)
fprintf(stderr, "uxc error: %s\n", strerror(-ret));
return ret;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment