Systemd/src/nspawn/nspawn-stub-pid1.c
Lennart Poettering 0c69794138 tree-wide: remove Lennart's copyright lines
These lines are generally out-of-date, incomplete and unnecessary. With
SPDX and git repository much more accurate and fine grained information
about licensing and authorship is available, hence let's drop the
per-file copyright notice. Of course, removing copyright lines of others
is problematic, hence this commit only removes my own lines and leaves
all others untouched. It might be nicer if sooner or later those could
go away too, making git the only and accurate source of authorship
information.
2018-06-14 10:20:20 +02:00

191 lines
7.2 KiB
C

/* SPDX-License-Identifier: LGPL-2.1+ */
#include <sys/reboot.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#include <unistd.h>
#include "fd-util.h"
#include "log.h"
#include "missing.h"
#include "nspawn-stub-pid1.h"
#include "process-util.h"
#include "signal-util.h"
#include "time-util.h"
#include "def.h"
static int reset_environ(const char *new_environment, size_t length) {
unsigned long start, end;
start = (unsigned long) new_environment;
end = start + length;
if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0)
return -errno;
if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0)
return -errno;
return 0;
}
int stub_pid1(sd_id128_t uuid) {
enum {
STATE_RUNNING,
STATE_REBOOT,
STATE_POWEROFF,
} state = STATE_RUNNING;
sigset_t fullmask, oldmask, waitmask;
usec_t quit_usec = USEC_INFINITY;
pid_t pid;
int r;
/* The new environment we set up, on the stack. */
char new_environment[] =
"container=systemd-nspawn\0"
"container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
/* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful
* for allowing arbitrary processes run in a container, and still have all zombies reaped. */
assert_se(sigfillset(&fullmask) >= 0);
assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0);
pid = fork();
if (pid < 0)
return log_error_errno(errno, "Failed to fork child pid: %m");
if (pid == 0) {
/* Return in the child */
assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0);
setsid();
return 0;
}
reset_all_signal_handlers();
log_close();
close_all_fds(NULL, 0);
log_open();
/* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also,
* set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ
* find them set. */
sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX);
reset_environ(new_environment, sizeof(new_environment));
(void) rename_process("(sd-stubinit)");
assert_se(sigemptyset(&waitmask) >= 0);
assert_se(sigset_add_many(&waitmask,
SIGCHLD, /* posix: process died */
SIGINT, /* sysv: ctrl-alt-del */
SIGRTMIN+3, /* systemd: halt */
SIGRTMIN+4, /* systemd: poweroff */
SIGRTMIN+5, /* systemd: reboot */
SIGRTMIN+6, /* systemd: kexec */
SIGRTMIN+13, /* systemd: halt */
SIGRTMIN+14, /* systemd: poweroff */
SIGRTMIN+15, /* systemd: reboot */
SIGRTMIN+16, /* systemd: kexec */
-1) >= 0);
/* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't
* support reexec/reloading in this stub process. */
for (;;) {
siginfo_t si;
usec_t current_usec;
si.si_pid = 0;
r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG);
if (r < 0) {
r = log_error_errno(errno, "Failed to reap children: %m");
goto finish;
}
current_usec = now(CLOCK_MONOTONIC);
if (si.si_pid == pid || current_usec >= quit_usec) {
/* The child we started ourselves died or we reached a timeout. */
if (state == STATE_REBOOT) { /* dispatch a queued reboot */
(void) reboot(RB_AUTOBOOT);
r = log_error_errno(errno, "Failed to reboot: %m");
goto finish;
} else if (state == STATE_POWEROFF)
(void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */
if (si.si_pid == pid && si.si_code == CLD_EXITED)
r = si.si_status; /* pass on exit code */
else
r = 255; /* signal, coredump, timeout, … */
goto finish;
}
if (si.si_pid != 0)
/* We reaped something. Retry until there's nothing more to reap. */
continue;
if (quit_usec == USEC_INFINITY)
r = sigwaitinfo(&waitmask, &si);
else {
struct timespec ts;
r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec));
}
if (r < 0) {
if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */
continue;
if (errno == EAGAIN) /* timeout reached */
continue;
r = log_error_errno(errno, "Failed to wait for signal: %m");
goto finish;
}
if (si.si_signo == SIGCHLD)
continue; /* Let's reap this */
if (state != STATE_RUNNING)
continue;
/* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a
* constant… */
if (si.si_signo == SIGRTMIN+3 ||
si.si_signo == SIGRTMIN+4 ||
si.si_signo == SIGRTMIN+13 ||
si.si_signo == SIGRTMIN+14)
state = STATE_POWEROFF;
else if (si.si_signo == SIGINT ||
si.si_signo == SIGRTMIN+5 ||
si.si_signo == SIGRTMIN+6 ||
si.si_signo == SIGRTMIN+15 ||
si.si_signo == SIGRTMIN+16)
state = STATE_REBOOT;
else
assert_not_reached("Got unexpected signal");
r = kill_and_sigcont(pid, SIGTERM);
/* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We
* do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those
* processes which handle both. That's because services tend to bind configuration reload or something
* else to SIGHUP. */
if (r != -ESRCH)
(void) kill(pid, SIGHUP);
quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
}
finish:
_exit(r < 0 ? EXIT_FAILURE : r);
}