Systemd/src/nspawn/nspawn-register.c

362 lines
12 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include "sd-bus.h"
#include "bus-error.h"
#include "bus-locator.h"
#include "bus-unit-util.h"
#include "bus-util.h"
#include "bus-wait-for-jobs.h"
#include "nspawn-register.h"
#include "special.h"
#include "stat-util.h"
#include "strv.h"
#include "util.h"
static int append_machine_properties(
sd_bus_message *m,
CustomMount *mounts,
unsigned n_mounts,
int kill_signal) {
unsigned j;
int r;
assert(m);
r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed");
if (r < 0)
return bus_log_create_error(r);
/* If you make changes here, also make sure to update systemd-nspawn@.service, to keep the device policies in
* sync regardless if we are run with or without the --keep-unit switch. */
r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
/* Allow the container to
* access and create the API
* device nodes, so that
* PrivateDevices= in the
* container can work
* fine */
"/dev/net/tun", "rwm",
/* Allow the container
* access to ptys. However,
* do not permit the
* container to ever create
* these device nodes. */
"char-pts", "rw");
if (r < 0)
return bus_log_create_error(r);
for (j = 0; j < n_mounts; j++) {
CustomMount *cm = mounts + j;
if (cm->type != CUSTOM_MOUNT_BIND)
continue;
r = is_device_node(cm->source);
if (r == -ENOENT) {
/* The bind source might only appear as the image is put together, hence don't complain */
log_debug_errno(r, "Bind mount source %s not found, ignoring: %m", cm->source);
continue;
}
if (r < 0)
return log_error_errno(r, "Failed to stat %s: %m", cm->source);
if (r) {
r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
cm->source, cm->read_only ? "r" : "rw");
if (r < 0)
return log_error_errno(r, "Failed to append message arguments: %m");
}
}
if (kill_signal != 0) {
r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal);
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed");
if (r < 0)
return bus_log_create_error(r);
}
return 0;
}
static int append_controller_property(sd_bus *bus, sd_bus_message *m) {
const char *unique;
int r;
assert(bus);
assert(m);
r = sd_bus_get_unique_name(bus, &unique);
if (r < 0)
return log_error_errno(r, "Failed to get unique name: %m");
r = sd_bus_message_append(m, "(sv)", "Controller", "s", unique);
if (r < 0)
return bus_log_create_error(r);
return 0;
}
int register_machine(
sd_bus *bus,
const char *machine_name,
pid_t pid,
const char *directory,
sd_id128_t uuid,
int local_ifindex,
const char *slice,
CustomMount *mounts,
unsigned n_mounts,
int kill_signal,
char **properties,
nspawn: add support for executing OCI runtime bundles with nspawn This is a pretty large patch, and adds support for OCI runtime bundles to nspawn. A new switch --oci-bundle= is added that takes a path to an OCI bundle. The JSON file included therein is read similar to a .nspawn settings files, however with a different feature set. Implementation-wise this mostly extends the pre-existing Settings object to carry additional properties for OCI. However, OCI supports some concepts .nspawn files did not support yet, which this patch also adds: 1. Support for "masking" files and directories. This functionatly is now also available via the new --inaccesible= cmdline command, and Inaccessible= in .nspawn files. 2. Support for mounting arbitrary file systems. (not exposed through nspawn cmdline nor .nspawn files, because probably not a good idea) 3. Ability to configure the console settings for a container. This functionality is now also available on the nspawn cmdline in the new --console= switch (not added to .nspawn for now, as it is something specific to the invocation really, not a property of the container) 4. Console width/height configuration. Not exposed through .nspawn/cmdline, but this may be controlled through $COLUMNS and $LINES like in most other UNIX tools. 5. UID/GID configuration by raw numbers. (not exposed in .nspawn and on the cmdline, since containers likely have different user tables, and the existing --user= switch appears to be the better option) 6. OCI hook commands (no exposed in .nspawn/cmdline, as very specific to OCI) 7. Creation of additional devices nodes in /dev. Most likely not a good idea, hence not exposed in .nspawn/cmdline. There's already --bind= to achieve the same, which is the better alternative. 8. Explicit syscall filters. This is not a good idea, due to the skewed arch support, hence not exposed through .nspawn/cmdline. 9. Configuration of some sysctls on a whitelist. Questionnable, not supported in .nspawn/cmdline for now. 10. Configuration of all 5 types of capabilities. Not a useful concept, since the kernel will reduce the caps on execve() anyway. Not exposed through .nspawn/cmdline as this is not very useful hence. Note that this only implements the OCI runtime logic itself. It does not provide a runc-compatible command line tool. This is left for a later PR. Only with that in place tools such as "buildah" can use the OCI support in nspawn as drop-in replacement. Currently still missing is OCI hook support, but it's already parsed and everything, and should be easy to add. Other than that it's OCI is implemented pretty comprehensively. There's a list of incompatibilities in the nspawn-oci.c file. In a later PR I'd like to convert this into proper markdown and add it to the documentation directory.
2018-04-25 11:23:37 +02:00
sd_bus_message *properties_message,
bool keep_unit,
const char *service) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
int r;
assert(bus);
if (keep_unit) {
r = bus_call_method(
bus,
bus_machine_mgr,
"RegisterMachineWithNetwork",
&error,
NULL,
"sayssusai",
machine_name,
SD_BUS_MESSAGE_APPEND_ID128(uuid),
service,
"container",
(uint32_t) pid,
strempty(directory),
local_ifindex > 0 ? 1 : 0, local_ifindex);
} else {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "CreateMachineWithNetwork");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_append(
m,
"sayssusai",
machine_name,
SD_BUS_MESSAGE_APPEND_ID128(uuid),
service,
"container",
(uint32_t) pid,
strempty(directory),
local_ifindex > 0 ? 1 : 0, local_ifindex);
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_open_container(m, 'a', "(sv)");
if (r < 0)
return bus_log_create_error(r);
if (!isempty(slice)) {
r = sd_bus_message_append(m, "(sv)", "Slice", "s", slice);
if (r < 0)
return bus_log_create_error(r);
}
r = append_controller_property(bus, m);
if (r < 0)
return r;
r = append_machine_properties(
m,
mounts,
n_mounts,
kill_signal);
if (r < 0)
return r;
nspawn: add support for executing OCI runtime bundles with nspawn This is a pretty large patch, and adds support for OCI runtime bundles to nspawn. A new switch --oci-bundle= is added that takes a path to an OCI bundle. The JSON file included therein is read similar to a .nspawn settings files, however with a different feature set. Implementation-wise this mostly extends the pre-existing Settings object to carry additional properties for OCI. However, OCI supports some concepts .nspawn files did not support yet, which this patch also adds: 1. Support for "masking" files and directories. This functionatly is now also available via the new --inaccesible= cmdline command, and Inaccessible= in .nspawn files. 2. Support for mounting arbitrary file systems. (not exposed through nspawn cmdline nor .nspawn files, because probably not a good idea) 3. Ability to configure the console settings for a container. This functionality is now also available on the nspawn cmdline in the new --console= switch (not added to .nspawn for now, as it is something specific to the invocation really, not a property of the container) 4. Console width/height configuration. Not exposed through .nspawn/cmdline, but this may be controlled through $COLUMNS and $LINES like in most other UNIX tools. 5. UID/GID configuration by raw numbers. (not exposed in .nspawn and on the cmdline, since containers likely have different user tables, and the existing --user= switch appears to be the better option) 6. OCI hook commands (no exposed in .nspawn/cmdline, as very specific to OCI) 7. Creation of additional devices nodes in /dev. Most likely not a good idea, hence not exposed in .nspawn/cmdline. There's already --bind= to achieve the same, which is the better alternative. 8. Explicit syscall filters. This is not a good idea, due to the skewed arch support, hence not exposed through .nspawn/cmdline. 9. Configuration of some sysctls on a whitelist. Questionnable, not supported in .nspawn/cmdline for now. 10. Configuration of all 5 types of capabilities. Not a useful concept, since the kernel will reduce the caps on execve() anyway. Not exposed through .nspawn/cmdline as this is not very useful hence. Note that this only implements the OCI runtime logic itself. It does not provide a runc-compatible command line tool. This is left for a later PR. Only with that in place tools such as "buildah" can use the OCI support in nspawn as drop-in replacement. Currently still missing is OCI hook support, but it's already parsed and everything, and should be easy to add. Other than that it's OCI is implemented pretty comprehensively. There's a list of incompatibilities in the nspawn-oci.c file. In a later PR I'd like to convert this into proper markdown and add it to the documentation directory.
2018-04-25 11:23:37 +02:00
if (properties_message) {
r = sd_bus_message_copy(m, properties_message, true);
if (r < 0)
return bus_log_create_error(r);
}
r = bus_append_unit_property_assignment_many(m, UNIT_SERVICE, properties);
if (r < 0)
return r;
r = sd_bus_message_close_container(m);
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_call(bus, m, 0, &error, NULL);
}
if (r < 0)
return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r));
return 0;
}
int unregister_machine(
sd_bus *bus,
const char *machine_name) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
int r;
assert(bus);
r = bus_call_method(bus, bus_machine_mgr, "UnregisterMachine", &error, NULL, "s", machine_name);
if (r < 0)
log_debug("Failed to unregister machine: %s", bus_error_message(&error, r));
return 0;
}
int allocate_scope(
sd_bus *bus,
const char *machine_name,
pid_t pid,
const char *slice,
CustomMount *mounts,
unsigned n_mounts,
int kill_signal,
nspawn: add support for executing OCI runtime bundles with nspawn This is a pretty large patch, and adds support for OCI runtime bundles to nspawn. A new switch --oci-bundle= is added that takes a path to an OCI bundle. The JSON file included therein is read similar to a .nspawn settings files, however with a different feature set. Implementation-wise this mostly extends the pre-existing Settings object to carry additional properties for OCI. However, OCI supports some concepts .nspawn files did not support yet, which this patch also adds: 1. Support for "masking" files and directories. This functionatly is now also available via the new --inaccesible= cmdline command, and Inaccessible= in .nspawn files. 2. Support for mounting arbitrary file systems. (not exposed through nspawn cmdline nor .nspawn files, because probably not a good idea) 3. Ability to configure the console settings for a container. This functionality is now also available on the nspawn cmdline in the new --console= switch (not added to .nspawn for now, as it is something specific to the invocation really, not a property of the container) 4. Console width/height configuration. Not exposed through .nspawn/cmdline, but this may be controlled through $COLUMNS and $LINES like in most other UNIX tools. 5. UID/GID configuration by raw numbers. (not exposed in .nspawn and on the cmdline, since containers likely have different user tables, and the existing --user= switch appears to be the better option) 6. OCI hook commands (no exposed in .nspawn/cmdline, as very specific to OCI) 7. Creation of additional devices nodes in /dev. Most likely not a good idea, hence not exposed in .nspawn/cmdline. There's already --bind= to achieve the same, which is the better alternative. 8. Explicit syscall filters. This is not a good idea, due to the skewed arch support, hence not exposed through .nspawn/cmdline. 9. Configuration of some sysctls on a whitelist. Questionnable, not supported in .nspawn/cmdline for now. 10. Configuration of all 5 types of capabilities. Not a useful concept, since the kernel will reduce the caps on execve() anyway. Not exposed through .nspawn/cmdline as this is not very useful hence. Note that this only implements the OCI runtime logic itself. It does not provide a runc-compatible command line tool. This is left for a later PR. Only with that in place tools such as "buildah" can use the OCI support in nspawn as drop-in replacement. Currently still missing is OCI hook support, but it's already parsed and everything, and should be easy to add. Other than that it's OCI is implemented pretty comprehensively. There's a list of incompatibilities in the nspawn-oci.c file. In a later PR I'd like to convert this into proper markdown and add it to the documentation directory.
2018-04-25 11:23:37 +02:00
char **properties,
sd_bus_message *properties_message) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
_cleanup_free_ char *scope = NULL;
const char *description, *object;
int r;
assert(bus);
r = bus_wait_for_jobs_new(bus, &w);
if (r < 0)
return log_error_errno(r, "Could not watch job: %m");
r = unit_name_mangle_with_suffix(machine_name, "as machine name", 0, ".scope", &scope);
if (r < 0)
return log_error_errno(r, "Failed to mangle scope name: %m");
r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_append(m, "ss", scope, "fail");
if (r < 0)
return bus_log_create_error(r);
/* Properties */
r = sd_bus_message_open_container(m, 'a', "(sv)");
if (r < 0)
return bus_log_create_error(r);
description = strjoina("Container ", machine_name);
r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)(sv)(sv)",
"PIDs", "au", 1, pid,
"Description", "s", description,
"Delegate", "b", 1,
"CollectMode", "s", "inactive-or-failed",
"AddRef", "b", 1,
"Slice", "s", isempty(slice) ? SPECIAL_MACHINE_SLICE : slice);
if (r < 0)
return bus_log_create_error(r);
r = append_controller_property(bus, m);
if (r < 0)
return r;
nspawn: add support for executing OCI runtime bundles with nspawn This is a pretty large patch, and adds support for OCI runtime bundles to nspawn. A new switch --oci-bundle= is added that takes a path to an OCI bundle. The JSON file included therein is read similar to a .nspawn settings files, however with a different feature set. Implementation-wise this mostly extends the pre-existing Settings object to carry additional properties for OCI. However, OCI supports some concepts .nspawn files did not support yet, which this patch also adds: 1. Support for "masking" files and directories. This functionatly is now also available via the new --inaccesible= cmdline command, and Inaccessible= in .nspawn files. 2. Support for mounting arbitrary file systems. (not exposed through nspawn cmdline nor .nspawn files, because probably not a good idea) 3. Ability to configure the console settings for a container. This functionality is now also available on the nspawn cmdline in the new --console= switch (not added to .nspawn for now, as it is something specific to the invocation really, not a property of the container) 4. Console width/height configuration. Not exposed through .nspawn/cmdline, but this may be controlled through $COLUMNS and $LINES like in most other UNIX tools. 5. UID/GID configuration by raw numbers. (not exposed in .nspawn and on the cmdline, since containers likely have different user tables, and the existing --user= switch appears to be the better option) 6. OCI hook commands (no exposed in .nspawn/cmdline, as very specific to OCI) 7. Creation of additional devices nodes in /dev. Most likely not a good idea, hence not exposed in .nspawn/cmdline. There's already --bind= to achieve the same, which is the better alternative. 8. Explicit syscall filters. This is not a good idea, due to the skewed arch support, hence not exposed through .nspawn/cmdline. 9. Configuration of some sysctls on a whitelist. Questionnable, not supported in .nspawn/cmdline for now. 10. Configuration of all 5 types of capabilities. Not a useful concept, since the kernel will reduce the caps on execve() anyway. Not exposed through .nspawn/cmdline as this is not very useful hence. Note that this only implements the OCI runtime logic itself. It does not provide a runc-compatible command line tool. This is left for a later PR. Only with that in place tools such as "buildah" can use the OCI support in nspawn as drop-in replacement. Currently still missing is OCI hook support, but it's already parsed and everything, and should be easy to add. Other than that it's OCI is implemented pretty comprehensively. There's a list of incompatibilities in the nspawn-oci.c file. In a later PR I'd like to convert this into proper markdown and add it to the documentation directory.
2018-04-25 11:23:37 +02:00
if (properties_message) {
r = sd_bus_message_copy(m, properties_message, true);
if (r < 0)
return bus_log_create_error(r);
}
r = append_machine_properties(
m,
mounts,
n_mounts,
kill_signal);
if (r < 0)
return r;
r = bus_append_unit_property_assignment_many(m, UNIT_SCOPE, properties);
if (r < 0)
return r;
r = sd_bus_message_close_container(m);
if (r < 0)
return bus_log_create_error(r);
/* No auxiliary units */
r = sd_bus_message_append(
m,
"a(sa(sv))",
0);
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_call(bus, m, 0, &error, &reply);
if (r < 0)
return log_error_errno(r, "Failed to allocate scope: %s", bus_error_message(&error, r));
r = sd_bus_message_read(reply, "o", &object);
if (r < 0)
return bus_log_parse_error(r);
r = bus_wait_for_jobs_one(w, object, false);
if (r < 0)
return r;
return 0;
}
int terminate_scope(
sd_bus *bus,
const char *machine_name) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_free_ char *scope = NULL;
int r;
r = unit_name_mangle_with_suffix(machine_name, "to terminate", 0, ".scope", &scope);
if (r < 0)
return log_error_errno(r, "Failed to mangle scope name: %m");
r = bus_call_method(bus, bus_systemd_mgr, "AbandonScope", &error, NULL, "s", scope);
if (r < 0) {
log_debug_errno(r, "Failed to abandon scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
sd_bus_error_free(&error);
}
r = bus_call_method(
bus,
bus_systemd_mgr,
"KillUnit",
&error,
NULL,
"ssi",
scope,
"all",
(int32_t) SIGKILL);
if (r < 0) {
log_debug_errno(r, "Failed to SIGKILL scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
sd_bus_error_free(&error);
}
r = bus_call_method(bus, bus_systemd_mgr, "UnrefUnit", &error, NULL, "s", scope);
if (r < 0)
log_debug_errno(r, "Failed to drop reference to scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
return 0;
}