Merge pull request #10813 from poettering/cgroup-exec-start-pre

make sure ExecStartPre= and Delegate=yes mix well
This commit is contained in:
Lennart Poettering 2018-11-27 10:29:17 +01:00 committed by GitHub
commit de38f06f52
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 86 additions and 19 deletions

View file

@ -244,6 +244,19 @@ So, if you want to do your own raw cgroups kernel level access, then allocate a
scope unit, or a service unit (or just use the service unit you already have
for your service code), and turn on delegation for it.
(OK, here's one caveat: if you turn on delegation for a service, and that
service has `ExecStartPost=`, `ExecReload=`, `ExecStop=` or `ExecStopPost=`
set, then these commands will be executed within the `.control/` sub-cgroup of
your service's cgroup. This is necessary because by turning on delegation we
have to assume that the cgroup delegated to your service is now an *inner*
cgroup, which means that it may not directly contain any processes. Hence, if
your service has any of these four settings set, you must be prepared that a
`.control/` subcgroup might appear, managed by the service manager. This also
means that your service code should have moved itself further down the cgroup
tree by the time it notifies the service manager about start-up readiness, so
that the service's main cgroup is definitely an inner node by the time the
service manager might start `ExecStartPost=`.)
## Three Scenarios
Let's say you write a container manager, and you wonder what to do regarding

View file

@ -730,6 +730,9 @@
specific to the unified hierarchy while others are specific to the legacy hierarchy. Also note that the
kernel might support further controllers, which aren't covered here yet as delegation is either not supported
at all for them or not defined cleanly.</para>
<para>For further details on the delegation model consult <ulink
url="https://systemd.io/CGROUP_DELEGATION">Control Group APIs and Delegation</ulink>.</para>
</listitem>
</varlistentry>

View file

@ -2762,6 +2762,37 @@ static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p
static char *exec_command_line(char **argv);
static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
bool using_subcgroup;
char *p;
assert(params);
assert(ret);
if (!params->cgroup_path)
return -EINVAL;
/* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
* subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
* processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
* and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
* let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
* ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
* this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
* flag, which is only passed for the former statements, not for the latter. */
using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
if (using_subcgroup)
p = strjoin(params->cgroup_path, "/.control");
else
p = strdup(params->cgroup_path);
if (!p)
return -ENOMEM;
*ret = p;
return using_subcgroup;
}
static int exec_child(
Unit *unit,
const ExecCommand *command,
@ -2994,10 +3025,18 @@ static int exec_child(
}
if (params->cgroup_path) {
r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
_cleanup_free_ char *p = NULL;
r = exec_parameters_get_cgroup_path(params, &p);
if (r < 0) {
*exit_status = EXIT_CGROUP;
return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
}
r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
if (r < 0) {
*exit_status = EXIT_CGROUP;
return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
}
}
@ -3569,6 +3608,7 @@ int exec_spawn(Unit *unit,
pid_t *ret) {
int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
_cleanup_free_ char *subcgroup_path = NULL;
_cleanup_strv_free_ char **files_env = NULL;
size_t n_storage_fds = 0, n_socket_fds = 0;
_cleanup_free_ char *line = NULL;
@ -3621,6 +3661,17 @@ int exec_spawn(Unit *unit,
LOG_UNIT_ID(unit),
LOG_UNIT_INVOCATION_ID(unit));
if (params->cgroup_path) {
r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
if (r > 0) { /* We are using a child cgroup */
r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
}
}
pid = fork();
if (pid < 0)
return log_unit_error_errno(unit, errno, "Failed to fork: %m");
@ -3658,13 +3709,11 @@ int exec_spawn(Unit *unit,
log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
/* We add the new process to the cgroup both in the child (so
* that we can be sure that no user code is ever executed
* outside of the cgroup) and in the parent (so that we can be
* sure that when we kill the cgroup the process will be
* killed too). */
if (params->cgroup_path)
(void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
/* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
* executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
* process will be killed too). */
if (subcgroup_path)
(void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
exec_status_start(&command->exec_status, pid);

View file

@ -294,12 +294,13 @@ typedef enum ExecFlags {
EXEC_CHOWN_DIRECTORIES = 1 << 5, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
EXEC_NSS_BYPASS_BUS = 1 << 6, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */
EXEC_CGROUP_DELEGATE = 1 << 7,
EXEC_IS_CONTROL = 1 << 8,
EXEC_CONTROL_CGROUP = 1 << 9, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
/* The following are not used by execute.c, but by consumers internally */
EXEC_PASS_FDS = 1 << 8,
EXEC_IS_CONTROL = 1 << 9,
EXEC_SETENV_RESULT = 1 << 10,
EXEC_SET_WATCHDOG = 1 << 11,
EXEC_PASS_FDS = 1 << 10,
EXEC_SETENV_RESULT = 1 << 11,
EXEC_SET_WATCHDOG = 1 << 12,
} ExecFlags;
/* Parameters for a specific invocation of a command. This structure is put together right before a command is

View file

@ -1418,7 +1418,7 @@ static int service_spawn(
assert(c);
assert(_pid);
r = unit_prepare_exec(UNIT(s));
r = unit_prepare_exec(UNIT(s)); /* This realizes the cgroup, among other things */
if (r < 0)
return r;
@ -1777,7 +1777,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
r = service_spawn(s,
s->control_command,
s->timeout_stop_usec,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
&s->control_pid);
if (r < 0)
goto fail;
@ -1892,7 +1892,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
r = service_spawn(s,
s->control_command,
s->timeout_stop_usec,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
&s->control_pid);
if (r < 0)
goto fail;
@ -1970,7 +1970,7 @@ static void service_enter_start_post(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
&s->control_pid);
if (r < 0)
goto fail;
@ -2214,7 +2214,7 @@ static void service_enter_reload(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
&s->control_pid);
if (r < 0)
goto fail;
@ -2254,7 +2254,8 @@ static void service_run_next_control(Service *s) {
timeout,
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|
(IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)|
(IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0),
(IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0)|
(IN_SET(s->control_command_id, SERVICE_EXEC_START_POST, SERVICE_EXEC_RELOAD, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_CONTROL_CGROUP : 0),
&s->control_pid);
if (r < 0)
goto fail;