nspawn: register a scope for the unit if --register=no is specified (#6166)
Previously, only when --register=yes was set (the default) the invoked container would get its own scope, created by machined on behalf of nspawn. With this change if --register=no is set nspawn will still get its own scope (which is a good thing, so that --slice= and --property= take effect), but this is not done through machined but by registering a scope unit directly in PID 1. Summary: --register=yes → allocate a new scope through machined (the default) --register=yes --keep-unit → use the unit we are already running in an register with machined --register=no → allocate a new scope directly, but no machined --register=no --keep-unit → do not allocate nor register anything Fixes: #5823
This commit is contained in:
parent
694859b5e7
commit
cd2dfc6fae
|
@ -398,24 +398,19 @@
|
|||
<varlistentry>
|
||||
<term><option>--slice=</option></term>
|
||||
|
||||
<listitem><para>Make the container part of the specified
|
||||
slice, instead of the default
|
||||
<filename>machine.slice</filename>. This is only applies if
|
||||
the machine is run in its own scope unit, i.e. if
|
||||
<option>--keep-unit</option> is not used.</para>
|
||||
<listitem><para>Make the container part of the specified slice, instead of the default
|
||||
<filename>machine.slice</filename>. This applies only if the machine is run in its own scope unit, i.e. if
|
||||
<option>--keep-unit</option> isn't used.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--property=</option></term>
|
||||
|
||||
<listitem><para>Set a unit property on the scope unit to
|
||||
register for the machine. This only applies if the machine is
|
||||
run in its own scope unit, i.e. if
|
||||
<option>--keep-unit</option> is not used. Takes unit property
|
||||
assignments in the same format as <command>systemctl
|
||||
set-property</command>. This is useful to set memory limits
|
||||
and similar for machines.</para>
|
||||
<listitem><para>Set a unit property on the scope unit to register for the machine. This applies only if the
|
||||
machine is run in its own scope unit, i.e. if <option>--keep-unit</option> isn't used. Takes unit property
|
||||
assignments in the same format as <command>systemctl set-property</command>. This is useful to set memory
|
||||
limits and similar for container.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
@ -888,18 +883,16 @@
|
|||
<varlistentry>
|
||||
<term><option>--register=</option></term>
|
||||
|
||||
<listitem><para>Controls whether the container is registered
|
||||
with
|
||||
<citerefentry><refentrytitle>systemd-machined</refentrytitle><manvolnum>8</manvolnum></citerefentry>.
|
||||
Takes a boolean argument, which defaults to <literal>yes</literal>.
|
||||
This option should be enabled when the container runs a full
|
||||
Operating System (more specifically: an init system), and is
|
||||
useful to ensure that the container is accessible via
|
||||
<citerefentry><refentrytitle>machinectl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
|
||||
and shown by tools such as
|
||||
<citerefentry project='man-pages'><refentrytitle>ps</refentrytitle><manvolnum>1</manvolnum></citerefentry>.
|
||||
If the container does not run an init system, it is
|
||||
recommended to set this option to <literal>no</literal>.</para></listitem>
|
||||
<listitem><para>Controls whether the container is registered with
|
||||
<citerefentry><refentrytitle>systemd-machined</refentrytitle><manvolnum>8</manvolnum></citerefentry>. Takes a
|
||||
boolean argument, which defaults to <literal>yes</literal>. This option should be enabled when the container
|
||||
runs a full Operating System (more specifically: a system and service manager as PID 1), and is useful to
|
||||
ensure that the container is accessible via
|
||||
<citerefentry><refentrytitle>machinectl</refentrytitle><manvolnum>1</manvolnum></citerefentry> and shown by
|
||||
tools such as <citerefentry
|
||||
project='man-pages'><refentrytitle>ps</refentrytitle><manvolnum>1</manvolnum></citerefentry>. If the container
|
||||
does not run a service manager, it is recommended to set this option to
|
||||
<literal>no</literal>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
@ -916,7 +909,9 @@
|
|||
service unit, and the service unit's sole purpose is to run a
|
||||
single <command>systemd-nspawn</command> container. This
|
||||
option is not available if run from a user
|
||||
session.</para></listitem>
|
||||
session.</para>
|
||||
<para>Note that passing <option>--keep-unit</option> disables the effect of <option>--slice=</option> and
|
||||
<option>--property=</option>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
|
|
@ -27,6 +27,77 @@
|
|||
#include "strv.h"
|
||||
#include "util.h"
|
||||
|
||||
static int append_machine_properties(
|
||||
sd_bus_message *m,
|
||||
CustomMount *mounts,
|
||||
unsigned n_mounts,
|
||||
int kill_signal,
|
||||
char **properties) {
|
||||
|
||||
unsigned j;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
|
||||
r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
/* If you make changes here, also make sure to update systemd-nspawn@.service, to keep the device policies in
|
||||
* sync regardless if we are run with or without the --keep-unit switch. */
|
||||
r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
|
||||
/* Allow the container to
|
||||
* access and create the API
|
||||
* device nodes, so that
|
||||
* PrivateDevices= in the
|
||||
* container can work
|
||||
* fine */
|
||||
"/dev/net/tun", "rwm",
|
||||
/* Allow the container
|
||||
* access to ptys. However,
|
||||
* do not permit the
|
||||
* container to ever create
|
||||
* these device nodes. */
|
||||
"char-pts", "rw");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
for (j = 0; j < n_mounts; j++) {
|
||||
CustomMount *cm = mounts + j;
|
||||
|
||||
if (cm->type != CUSTOM_MOUNT_BIND)
|
||||
continue;
|
||||
|
||||
r = is_device_node(cm->source);
|
||||
if (r == -ENOENT) {
|
||||
/* The bind source might only appear as the image is put together, hence don't complain */
|
||||
log_debug_errno(r, "Bind mount source %s not found, ignoring: %m", cm->source);
|
||||
continue;
|
||||
}
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to stat %s: %m", cm->source);
|
||||
|
||||
if (r) {
|
||||
r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
|
||||
cm->source, cm->read_only ? "r" : "rw");
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to append message arguments: %m");
|
||||
}
|
||||
}
|
||||
|
||||
if (kill_signal != 0) {
|
||||
r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int register_machine(
|
||||
const char *machine_name,
|
||||
pid_t pid,
|
||||
|
@ -68,7 +139,6 @@ int register_machine(
|
|||
local_ifindex > 0 ? 1 : 0, local_ifindex);
|
||||
} else {
|
||||
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
|
||||
unsigned j;
|
||||
|
||||
r = sd_bus_message_new_method_call(
|
||||
bus,
|
||||
|
@ -103,63 +173,14 @@ int register_machine(
|
|||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed");
|
||||
r = append_machine_properties(
|
||||
m,
|
||||
mounts,
|
||||
n_mounts,
|
||||
kill_signal,
|
||||
properties);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
/* If you make changes here, also make sure to update
|
||||
* systemd-nspawn@.service, to keep the device
|
||||
* policies in sync regardless if we are run with or
|
||||
* without the --keep-unit switch. */
|
||||
r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
|
||||
/* Allow the container to
|
||||
* access and create the API
|
||||
* device nodes, so that
|
||||
* PrivateDevices= in the
|
||||
* container can work
|
||||
* fine */
|
||||
"/dev/net/tun", "rwm",
|
||||
/* Allow the container
|
||||
* access to ptys. However,
|
||||
* do not permit the
|
||||
* container to ever create
|
||||
* these device nodes. */
|
||||
"char-pts", "rw");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
for (j = 0; j < n_mounts; j++) {
|
||||
CustomMount *cm = mounts + j;
|
||||
|
||||
if (cm->type != CUSTOM_MOUNT_BIND)
|
||||
continue;
|
||||
|
||||
r = is_device_node(cm->source);
|
||||
if (r == -ENOENT) {
|
||||
/* The bind source might only appear as the image is put together, hence don't complain */
|
||||
log_debug_errno(r, "Bind mount source %s not found, ignoring: %m", cm->source);
|
||||
continue;
|
||||
}
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to stat %s: %m", cm->source);
|
||||
|
||||
if (r) {
|
||||
r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
|
||||
cm->source, cm->read_only ? "r" : "rw");
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to append message arguments: %m");
|
||||
}
|
||||
}
|
||||
|
||||
if (kill_signal != 0) {
|
||||
r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
return r;
|
||||
|
||||
r = bus_append_unit_property_assignment_many(m, properties);
|
||||
if (r < 0)
|
||||
|
@ -229,3 +250,90 @@ int terminate_machine(pid_t pid) {
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int allocate_scope(
|
||||
const char *machine_name,
|
||||
pid_t pid,
|
||||
const char *slice,
|
||||
CustomMount *mounts,
|
||||
unsigned n_mounts,
|
||||
int kill_signal,
|
||||
char **properties) {
|
||||
|
||||
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
|
||||
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
|
||||
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
|
||||
_cleanup_free_ char *scope = NULL;
|
||||
const char *description;
|
||||
int r;
|
||||
|
||||
r = sd_bus_default_system(&bus);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to open system bus: %m");
|
||||
|
||||
r = unit_name_mangle_with_suffix(machine_name, UNIT_NAME_NOGLOB, ".scope", &scope);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to mangle scope name: %m");
|
||||
|
||||
r = sd_bus_message_new_method_call(
|
||||
bus,
|
||||
&m,
|
||||
"org.freedesktop.systemd1",
|
||||
"/org/freedesktop/systemd1",
|
||||
"org.freedesktop.systemd1.Manager",
|
||||
"StartTransientUnit");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
r = sd_bus_message_append(m, "ss", scope, "fail");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
/* Properties */
|
||||
r = sd_bus_message_open_container(m, 'a', "(sv)");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
description = strjoina("Container ", machine_name);
|
||||
|
||||
r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)",
|
||||
"PIDs", "au", 1, pid,
|
||||
"Description", "s", description,
|
||||
"Delegate", "b", 1,
|
||||
"Slice", "s", isempty(slice) ? "machine.slice" : slice);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
r = append_machine_properties(
|
||||
m,
|
||||
mounts,
|
||||
n_mounts,
|
||||
kill_signal,
|
||||
properties);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = bus_append_unit_property_assignment_many(m, properties);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_close_container(m);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
/* No auxiliary units */
|
||||
r = sd_bus_message_append(
|
||||
m,
|
||||
"a(sa(sv))",
|
||||
0);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
r = sd_bus_call(bus, m, 0, &error, NULL);
|
||||
if (r < 0) {
|
||||
log_error("Failed to allocate scope: %s", bus_error_message(&error, r));
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -27,3 +27,5 @@
|
|||
|
||||
int register_machine(const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, bool keep_unit, const char *service);
|
||||
int terminate_machine(pid_t pid);
|
||||
|
||||
int allocate_scope(const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties);
|
||||
|
|
|
@ -1083,8 +1083,8 @@ static int parse_argv(int argc, char *argv[]) {
|
|||
if (arg_userns_mode == USER_NAMESPACE_PICK)
|
||||
arg_userns_chown = true;
|
||||
|
||||
if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
|
||||
log_error("--keep-unit may not be used when invoked from a user session.");
|
||||
if (arg_keep_unit && arg_register && cg_pid_get_owner_uid(0, NULL) >= 0) {
|
||||
log_error("--keep-unit --register=yes may not be used when invoked from a user session.");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -3387,7 +3387,19 @@ static int run(int master,
|
|||
arg_container_service_name);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
} else if (!arg_keep_unit) {
|
||||
r = allocate_scope(
|
||||
arg_machine,
|
||||
*pid,
|
||||
arg_slice,
|
||||
arg_custom_mounts, arg_n_custom_mounts,
|
||||
arg_kill_signal,
|
||||
arg_property);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
} else if (arg_slice || arg_property)
|
||||
log_notice("Machine and scope registration turned off, --slice= and --property= settings will have no effect.");
|
||||
|
||||
r = sync_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift);
|
||||
if (r < 0)
|
||||
|
|
Loading…
Reference in a new issue