Systemd/src/machine/machine.c
Zbigniew Jędrzejewski-Szmek 597da51bae tree-wide: make parse_ifindex simply return the index
We don't need a seperate output parameter that is of type int.  glibc() says
that the type is "unsigned", but the kernel thinks it's "int".  And the
"alternative names" interface also uses ints. So let's standarize on ints,
since it's clearly not realisitic to have interface numbers in the upper half
of unsigned int range.
2020-01-11 12:06:08 +01:00

771 lines
21 KiB
C

/* SPDX-License-Identifier: LGPL-2.1+ */
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include "sd-messages.h"
#include "alloc-util.h"
#include "bus-error.h"
#include "bus-util.h"
#include "env-file.h"
#include "errno-util.h"
#include "escape.h"
#include "extract-word.h"
#include "fd-util.h"
#include "fileio.h"
#include "format-util.h"
#include "hashmap.h"
#include "machine-dbus.h"
#include "machine.h"
#include "mkdir.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "serialize.h"
#include "special.h"
#include "stdio-util.h"
#include "string-table.h"
#include "terminal-util.h"
#include "tmpfile-util.h"
#include "unit-name.h"
#include "user-util.h"
#include "util.h"
Machine* machine_new(Manager *manager, MachineClass class, const char *name) {
Machine *m;
assert(manager);
assert(class < _MACHINE_CLASS_MAX);
assert(name);
/* Passing class == _MACHINE_CLASS_INVALID here is fine. It
* means as much as "we don't know yet", and that we'll figure
* it out later when loading the state file. */
m = new0(Machine, 1);
if (!m)
return NULL;
m->name = strdup(name);
if (!m->name)
goto fail;
if (class != MACHINE_HOST) {
m->state_file = path_join("/run/systemd/machines", m->name);
if (!m->state_file)
goto fail;
}
m->class = class;
if (hashmap_put(manager->machines, m->name, m) < 0)
goto fail;
m->manager = manager;
return m;
fail:
free(m->state_file);
free(m->name);
return mfree(m);
}
Machine* machine_free(Machine *m) {
if (!m)
return NULL;
while (m->operations)
operation_free(m->operations);
if (m->in_gc_queue)
LIST_REMOVE(gc_queue, m->manager->machine_gc_queue, m);
machine_release_unit(m);
free(m->scope_job);
(void) hashmap_remove(m->manager->machines, m->name);
if (m->manager->host_machine == m)
m->manager->host_machine = NULL;
if (m->leader > 0)
(void) hashmap_remove_value(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
sd_bus_message_unref(m->create_message);
free(m->name);
free(m->state_file);
free(m->service);
free(m->root_directory);
free(m->netif);
return mfree(m);
}
int machine_save(Machine *m) {
_cleanup_free_ char *temp_path = NULL;
_cleanup_fclose_ FILE *f = NULL;
int r;
assert(m);
if (!m->state_file)
return 0;
if (!m->started)
return 0;
r = mkdir_safe_label("/run/systemd/machines", 0755, 0, 0, MKDIR_WARN_MODE);
if (r < 0)
goto fail;
r = fopen_temporary(m->state_file, &f, &temp_path);
if (r < 0)
goto fail;
(void) fchmod(fileno(f), 0644);
fprintf(f,
"# This is private data. Do not parse.\n"
"NAME=%s\n",
m->name);
if (m->unit) {
_cleanup_free_ char *escaped;
escaped = cescape(m->unit);
if (!escaped) {
r = -ENOMEM;
goto fail;
}
fprintf(f, "SCOPE=%s\n", escaped); /* We continue to call this "SCOPE=" because it is internal only, and we want to stay compatible with old files */
}
if (m->scope_job)
fprintf(f, "SCOPE_JOB=%s\n", m->scope_job);
if (m->service) {
_cleanup_free_ char *escaped;
escaped = cescape(m->service);
if (!escaped) {
r = -ENOMEM;
goto fail;
}
fprintf(f, "SERVICE=%s\n", escaped);
}
if (m->root_directory) {
_cleanup_free_ char *escaped;
escaped = cescape(m->root_directory);
if (!escaped) {
r = -ENOMEM;
goto fail;
}
fprintf(f, "ROOT=%s\n", escaped);
}
if (!sd_id128_is_null(m->id))
fprintf(f, "ID=" SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(m->id));
if (m->leader != 0)
fprintf(f, "LEADER="PID_FMT"\n", m->leader);
if (m->class != _MACHINE_CLASS_INVALID)
fprintf(f, "CLASS=%s\n", machine_class_to_string(m->class));
if (dual_timestamp_is_set(&m->timestamp))
fprintf(f,
"REALTIME="USEC_FMT"\n"
"MONOTONIC="USEC_FMT"\n",
m->timestamp.realtime,
m->timestamp.monotonic);
if (m->n_netif > 0) {
size_t i;
fputs("NETIF=", f);
for (i = 0; i < m->n_netif; i++) {
if (i != 0)
fputc(' ', f);
fprintf(f, "%i", m->netif[i]);
}
fputc('\n', f);
}
r = fflush_and_check(f);
if (r < 0)
goto fail;
if (rename(temp_path, m->state_file) < 0) {
r = -errno;
goto fail;
}
if (m->unit) {
char *sl;
/* Create a symlink from the unit name to the machine
* name, so that we can quickly find the machine for
* each given unit. Ignore error. */
sl = strjoina("/run/systemd/machines/unit:", m->unit);
(void) symlink(m->name, sl);
}
return 0;
fail:
(void) unlink(m->state_file);
if (temp_path)
(void) unlink(temp_path);
return log_error_errno(r, "Failed to save machine data %s: %m", m->state_file);
}
static void machine_unlink(Machine *m) {
assert(m);
if (m->unit) {
char *sl;
sl = strjoina("/run/systemd/machines/unit:", m->unit);
(void) unlink(sl);
}
if (m->state_file)
(void) unlink(m->state_file);
}
int machine_load(Machine *m) {
_cleanup_free_ char *realtime = NULL, *monotonic = NULL, *id = NULL, *leader = NULL, *class = NULL, *netif = NULL;
int r;
assert(m);
if (!m->state_file)
return 0;
r = parse_env_file(NULL, m->state_file,
"SCOPE", &m->unit,
"SCOPE_JOB", &m->scope_job,
"SERVICE", &m->service,
"ROOT", &m->root_directory,
"ID", &id,
"LEADER", &leader,
"CLASS", &class,
"REALTIME", &realtime,
"MONOTONIC", &monotonic,
"NETIF", &netif);
if (r < 0) {
if (r == -ENOENT)
return 0;
return log_error_errno(r, "Failed to read %s: %m", m->state_file);
}
if (id)
sd_id128_from_string(id, &m->id);
if (leader)
parse_pid(leader, &m->leader);
if (class) {
MachineClass c;
c = machine_class_from_string(class);
if (c >= 0)
m->class = c;
}
if (realtime)
(void) deserialize_usec(realtime, &m->timestamp.realtime);
if (monotonic)
(void) deserialize_usec(monotonic, &m->timestamp.monotonic);
if (netif) {
size_t allocated = 0, nr = 0;
const char *p;
_cleanup_free_ int *ni = NULL;
p = netif;
for (;;) {
_cleanup_free_ char *word = NULL;
r = extract_first_word(&p, &word, NULL, 0);
if (r == 0)
break;
if (r == -ENOMEM)
return log_oom();
if (r < 0) {
log_warning_errno(r, "Failed to parse NETIF: %s", netif);
break;
}
r = parse_ifindex(word);
if (r < 0)
continue;
if (!GREEDY_REALLOC(ni, allocated, nr + 1))
return log_oom();
ni[nr++] = r;
}
free(m->netif);
m->netif = TAKE_PTR(ni);
m->n_netif = nr;
}
return r;
}
static int machine_start_scope(
Machine *machine,
sd_bus_message *more_properties,
sd_bus_error *error) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
_cleanup_free_ char *escaped = NULL, *unit = NULL;
const char *description;
int r;
assert(machine);
assert(machine->leader > 0);
assert(!machine->unit);
escaped = unit_name_escape(machine->name);
if (!escaped)
return log_oom();
unit = strjoin("machine-", escaped, ".scope");
if (!unit)
return log_oom();
r = sd_bus_message_new_method_call(
machine->manager->bus,
&m,
"org.freedesktop.systemd1",
"/org/freedesktop/systemd1",
"org.freedesktop.systemd1.Manager",
"StartTransientUnit");
if (r < 0)
return r;
r = sd_bus_message_append(m, "ss", unit, "fail");
if (r < 0)
return r;
r = sd_bus_message_open_container(m, 'a', "(sv)");
if (r < 0)
return r;
r = sd_bus_message_append(m, "(sv)", "Slice", "s", SPECIAL_MACHINE_SLICE);
if (r < 0)
return r;
description = strjoina(machine->class == MACHINE_VM ? "Virtual Machine " : "Container ", machine->name);
r = sd_bus_message_append(m, "(sv)", "Description", "s", description);
if (r < 0)
return r;
r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)(sv)",
"PIDs", "au", 1, machine->leader,
"Delegate", "b", 1,
"CollectMode", "s", "inactive-or-failed",
"AddRef", "b", 1,
"TasksMax", "t", UINT64_C(16384));
if (r < 0)
return r;
if (more_properties) {
r = sd_bus_message_copy(m, more_properties, true);
if (r < 0)
return r;
}
r = sd_bus_message_close_container(m);
if (r < 0)
return r;
r = sd_bus_message_append(m, "a(sa(sv))", 0);
if (r < 0)
return r;
r = sd_bus_call(NULL, m, 0, error, &reply);
if (r < 0)
return r;
machine->unit = TAKE_PTR(unit);
machine->referenced = true;
const char *job;
r = sd_bus_message_read(reply, "o", &job);
if (r < 0)
return r;
return free_and_strdup(&machine->scope_job, job);
}
static int machine_ensure_scope(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
int r;
assert(m);
assert(m->class != MACHINE_HOST);
if (!m->unit) {
r = machine_start_scope(m, properties, error);
if (r < 0)
return log_error_errno(r, "Failed to start machine scope: %s", bus_error_message(error, r));
}
assert(m->unit);
hashmap_put(m->manager->machine_units, m->unit, m);
return 0;
}
int machine_start(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
int r;
assert(m);
if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
return -EOPNOTSUPP;
if (m->started)
return 0;
r = hashmap_put(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
if (r < 0)
return r;
/* Create cgroup */
r = machine_ensure_scope(m, properties, error);
if (r < 0)
return r;
log_struct(LOG_INFO,
"MESSAGE_ID=" SD_MESSAGE_MACHINE_START_STR,
"NAME=%s", m->name,
"LEADER="PID_FMT, m->leader,
LOG_MESSAGE("New machine %s.", m->name));
if (!dual_timestamp_is_set(&m->timestamp))
dual_timestamp_get(&m->timestamp);
m->started = true;
/* Save new machine data */
machine_save(m);
machine_send_signal(m, true);
(void) manager_enqueue_nscd_cache_flush(m->manager);
return 0;
}
int machine_stop(Machine *m) {
int r;
assert(m);
if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
return -EOPNOTSUPP;
if (m->unit) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
char *job = NULL;
r = manager_stop_unit(m->manager, m->unit, &error, &job);
if (r < 0)
return log_error_errno(r, "Failed to stop machine scope: %s", bus_error_message(&error, r));
free_and_replace(m->scope_job, job);
}
m->stopping = true;
machine_save(m);
(void) manager_enqueue_nscd_cache_flush(m->manager);
return 0;
}
int machine_finalize(Machine *m) {
assert(m);
if (m->started) {
log_struct(LOG_INFO,
"MESSAGE_ID=" SD_MESSAGE_MACHINE_STOP_STR,
"NAME=%s", m->name,
"LEADER="PID_FMT, m->leader,
LOG_MESSAGE("Machine %s terminated.", m->name));
m->stopping = true; /* The machine is supposed to be going away. Don't try to kill it. */
}
machine_unlink(m);
machine_add_to_gc_queue(m);
if (m->started) {
machine_send_signal(m, false);
m->started = false;
}
return 0;
}
bool machine_may_gc(Machine *m, bool drop_not_started) {
assert(m);
if (m->class == MACHINE_HOST)
return false;
if (drop_not_started && !m->started)
return true;
if (m->scope_job && manager_job_is_active(m->manager, m->scope_job))
return false;
if (m->unit && manager_unit_is_active(m->manager, m->unit))
return false;
return true;
}
void machine_add_to_gc_queue(Machine *m) {
assert(m);
if (m->in_gc_queue)
return;
LIST_PREPEND(gc_queue, m->manager->machine_gc_queue, m);
m->in_gc_queue = true;
}
MachineState machine_get_state(Machine *s) {
assert(s);
if (s->class == MACHINE_HOST)
return MACHINE_RUNNING;
if (s->stopping)
return MACHINE_CLOSING;
if (s->scope_job)
return MACHINE_OPENING;
return MACHINE_RUNNING;
}
int machine_kill(Machine *m, KillWho who, int signo) {
assert(m);
if (!IN_SET(m->class, MACHINE_VM, MACHINE_CONTAINER))
return -EOPNOTSUPP;
if (!m->unit)
return -ESRCH;
if (who == KILL_LEADER) {
/* If we shall simply kill the leader, do so directly */
if (kill(m->leader, signo) < 0)
return -errno;
return 0;
}
/* Otherwise, make PID 1 do it for us, for the entire cgroup */
return manager_kill_unit(m->manager, m->unit, signo, NULL);
}
int machine_openpt(Machine *m, int flags, char **ret_slave) {
assert(m);
switch (m->class) {
case MACHINE_HOST:
return openpt_allocate(flags, ret_slave);
case MACHINE_CONTAINER:
if (m->leader <= 0)
return -EINVAL;
return openpt_allocate_in_namespace(m->leader, flags, ret_slave);
default:
return -EOPNOTSUPP;
}
}
int machine_open_terminal(Machine *m, const char *path, int mode) {
assert(m);
switch (m->class) {
case MACHINE_HOST:
return open_terminal(path, mode);
case MACHINE_CONTAINER:
if (m->leader <= 0)
return -EINVAL;
return open_terminal_in_namespace(m->leader, path, mode);
default:
return -EOPNOTSUPP;
}
}
void machine_release_unit(Machine *m) {
assert(m);
if (!m->unit)
return;
if (m->referenced) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
int r;
r = manager_unref_unit(m->manager, m->unit, &error);
if (r < 0)
log_warning_errno(r, "Failed to drop reference to machine scope, ignoring: %s",
bus_error_message(&error, r));
m->referenced = false;
}
(void) hashmap_remove(m->manager->machine_units, m->unit);
m->unit = mfree(m->unit);
}
int machine_get_uid_shift(Machine *m, uid_t *ret) {
char p[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1];
uid_t uid_base, uid_shift, uid_range;
gid_t gid_base, gid_shift, gid_range;
_cleanup_fclose_ FILE *f = NULL;
int k, r;
assert(m);
assert(ret);
/* Return the base UID/GID of the specified machine. Note that this only works for containers with simple
* mappings. In most cases setups should be simple like this, and administrators should only care about the
* basic offset a container has relative to the host. This is what this function exposes.
*
* If we encounter any more complex mappings we politely refuse this with ENXIO. */
if (m->class == MACHINE_HOST) {
*ret = 0;
return 0;
}
if (m->class != MACHINE_CONTAINER)
return -EOPNOTSUPP;
xsprintf(p, "/proc/" PID_FMT "/uid_map", m->leader);
f = fopen(p, "re");
if (!f) {
if (errno == ENOENT) {
/* If the file doesn't exist, user namespacing is off in the kernel, return a zero mapping hence. */
*ret = 0;
return 0;
}
return -errno;
}
/* Read the first line. There's at least one. */
errno = 0;
k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &uid_base, &uid_shift, &uid_range);
if (k != 3) {
if (ferror(f))
return errno_or_else(EIO);
return -EBADMSG;
}
/* Not a mapping starting at 0? Then it's a complex mapping we can't expose here. */
if (uid_base != 0)
return -ENXIO;
/* Insist that at least the nobody user is mapped, everything else is weird, and hence complex, and we don't support it */
if (uid_range < UID_NOBODY)
return -ENXIO;
/* If there's more than one line, then we don't support this mapping. */
r = safe_fgetc(f, NULL);
if (r < 0)
return r;
if (r != 0) /* Insist on EOF */
return -ENXIO;
fclose(f);
xsprintf(p, "/proc/" PID_FMT "/gid_map", m->leader);
f = fopen(p, "re");
if (!f)
return -errno;
/* Read the first line. There's at least one. */
errno = 0;
k = fscanf(f, GID_FMT " " GID_FMT " " GID_FMT "\n", &gid_base, &gid_shift, &gid_range);
if (k != 3) {
if (ferror(f))
return errno_or_else(EIO);
return -EBADMSG;
}
/* If there's more than one line, then we don't support this file. */
r = safe_fgetc(f, NULL);
if (r < 0)
return r;
if (r != 0) /* Insist on EOF */
return -ENXIO;
/* If the UID and GID mapping doesn't match, we don't support this mapping. */
if (uid_base != (uid_t) gid_base)
return -ENXIO;
if (uid_shift != (uid_t) gid_shift)
return -ENXIO;
if (uid_range != (uid_t) gid_range)
return -ENXIO;
*ret = uid_shift;
return 0;
}
static const char* const machine_class_table[_MACHINE_CLASS_MAX] = {
[MACHINE_CONTAINER] = "container",
[MACHINE_VM] = "vm",
[MACHINE_HOST] = "host",
};
DEFINE_STRING_TABLE_LOOKUP(machine_class, MachineClass);
static const char* const machine_state_table[_MACHINE_STATE_MAX] = {
[MACHINE_OPENING] = "opening",
[MACHINE_RUNNING] = "running",
[MACHINE_CLOSING] = "closing"
};
DEFINE_STRING_TABLE_LOOKUP(machine_state, MachineState);
static const char* const kill_who_table[_KILL_WHO_MAX] = {
[KILL_LEADER] = "leader",
[KILL_ALL] = "all"
};
DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);