Merge pull request #10813 from poettering/cgroup-exec-start-pre
make sure ExecStartPre= and Delegate=yes mix well
This commit is contained in:
commit
de38f06f52
|
@ -244,6 +244,19 @@ So, if you want to do your own raw cgroups kernel level access, then allocate a
|
|||
scope unit, or a service unit (or just use the service unit you already have
|
||||
for your service code), and turn on delegation for it.
|
||||
|
||||
(OK, here's one caveat: if you turn on delegation for a service, and that
|
||||
service has `ExecStartPost=`, `ExecReload=`, `ExecStop=` or `ExecStopPost=`
|
||||
set, then these commands will be executed within the `.control/` sub-cgroup of
|
||||
your service's cgroup. This is necessary because by turning on delegation we
|
||||
have to assume that the cgroup delegated to your service is now an *inner*
|
||||
cgroup, which means that it may not directly contain any processes. Hence, if
|
||||
your service has any of these four settings set, you must be prepared that a
|
||||
`.control/` subcgroup might appear, managed by the service manager. This also
|
||||
means that your service code should have moved itself further down the cgroup
|
||||
tree by the time it notifies the service manager about start-up readiness, so
|
||||
that the service's main cgroup is definitely an inner node by the time the
|
||||
service manager might start `ExecStartPost=`.)
|
||||
|
||||
## Three Scenarios
|
||||
|
||||
Let's say you write a container manager, and you wonder what to do regarding
|
||||
|
|
|
@ -730,6 +730,9 @@
|
|||
specific to the unified hierarchy while others are specific to the legacy hierarchy. Also note that the
|
||||
kernel might support further controllers, which aren't covered here yet as delegation is either not supported
|
||||
at all for them or not defined cleanly.</para>
|
||||
|
||||
<para>For further details on the delegation model consult <ulink
|
||||
url="https://systemd.io/CGROUP_DELEGATION">Control Group APIs and Delegation</ulink>.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
|
|
@ -2762,6 +2762,37 @@ static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p
|
|||
|
||||
static char *exec_command_line(char **argv);
|
||||
|
||||
static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
|
||||
bool using_subcgroup;
|
||||
char *p;
|
||||
|
||||
assert(params);
|
||||
assert(ret);
|
||||
|
||||
if (!params->cgroup_path)
|
||||
return -EINVAL;
|
||||
|
||||
/* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
|
||||
* subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
|
||||
* processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
|
||||
* and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
|
||||
* let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
|
||||
* ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
|
||||
* this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
|
||||
* flag, which is only passed for the former statements, not for the latter. */
|
||||
|
||||
using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
|
||||
if (using_subcgroup)
|
||||
p = strjoin(params->cgroup_path, "/.control");
|
||||
else
|
||||
p = strdup(params->cgroup_path);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
*ret = p;
|
||||
return using_subcgroup;
|
||||
}
|
||||
|
||||
static int exec_child(
|
||||
Unit *unit,
|
||||
const ExecCommand *command,
|
||||
|
@ -2994,10 +3025,18 @@ static int exec_child(
|
|||
}
|
||||
|
||||
if (params->cgroup_path) {
|
||||
r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
|
||||
_cleanup_free_ char *p = NULL;
|
||||
|
||||
r = exec_parameters_get_cgroup_path(params, &p);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_CGROUP;
|
||||
return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
|
||||
return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
|
||||
}
|
||||
|
||||
r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_CGROUP;
|
||||
return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3569,6 +3608,7 @@ int exec_spawn(Unit *unit,
|
|||
pid_t *ret) {
|
||||
|
||||
int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
|
||||
_cleanup_free_ char *subcgroup_path = NULL;
|
||||
_cleanup_strv_free_ char **files_env = NULL;
|
||||
size_t n_storage_fds = 0, n_socket_fds = 0;
|
||||
_cleanup_free_ char *line = NULL;
|
||||
|
@ -3621,6 +3661,17 @@ int exec_spawn(Unit *unit,
|
|||
LOG_UNIT_ID(unit),
|
||||
LOG_UNIT_INVOCATION_ID(unit));
|
||||
|
||||
if (params->cgroup_path) {
|
||||
r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
|
||||
if (r > 0) { /* We are using a child cgroup */
|
||||
r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
|
||||
}
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0)
|
||||
return log_unit_error_errno(unit, errno, "Failed to fork: %m");
|
||||
|
@ -3658,13 +3709,11 @@ int exec_spawn(Unit *unit,
|
|||
|
||||
log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
|
||||
|
||||
/* We add the new process to the cgroup both in the child (so
|
||||
* that we can be sure that no user code is ever executed
|
||||
* outside of the cgroup) and in the parent (so that we can be
|
||||
* sure that when we kill the cgroup the process will be
|
||||
* killed too). */
|
||||
if (params->cgroup_path)
|
||||
(void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
|
||||
/* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
|
||||
* executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
|
||||
* process will be killed too). */
|
||||
if (subcgroup_path)
|
||||
(void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
|
||||
|
||||
exec_status_start(&command->exec_status, pid);
|
||||
|
||||
|
|
|
@ -294,12 +294,13 @@ typedef enum ExecFlags {
|
|||
EXEC_CHOWN_DIRECTORIES = 1 << 5, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
|
||||
EXEC_NSS_BYPASS_BUS = 1 << 6, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */
|
||||
EXEC_CGROUP_DELEGATE = 1 << 7,
|
||||
EXEC_IS_CONTROL = 1 << 8,
|
||||
EXEC_CONTROL_CGROUP = 1 << 9, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
|
||||
|
||||
/* The following are not used by execute.c, but by consumers internally */
|
||||
EXEC_PASS_FDS = 1 << 8,
|
||||
EXEC_IS_CONTROL = 1 << 9,
|
||||
EXEC_SETENV_RESULT = 1 << 10,
|
||||
EXEC_SET_WATCHDOG = 1 << 11,
|
||||
EXEC_PASS_FDS = 1 << 10,
|
||||
EXEC_SETENV_RESULT = 1 << 11,
|
||||
EXEC_SET_WATCHDOG = 1 << 12,
|
||||
} ExecFlags;
|
||||
|
||||
/* Parameters for a specific invocation of a command. This structure is put together right before a command is
|
||||
|
|
|
@ -1418,7 +1418,7 @@ static int service_spawn(
|
|||
assert(c);
|
||||
assert(_pid);
|
||||
|
||||
r = unit_prepare_exec(UNIT(s));
|
||||
r = unit_prepare_exec(UNIT(s)); /* This realizes the cgroup, among other things */
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
@ -1777,7 +1777,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
|
|||
r = service_spawn(s,
|
||||
s->control_command,
|
||||
s->timeout_stop_usec,
|
||||
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
|
||||
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
|
||||
&s->control_pid);
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
|
@ -1892,7 +1892,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
|
|||
r = service_spawn(s,
|
||||
s->control_command,
|
||||
s->timeout_stop_usec,
|
||||
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
|
||||
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
|
||||
&s->control_pid);
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
|
@ -1970,7 +1970,7 @@ static void service_enter_start_post(Service *s) {
|
|||
r = service_spawn(s,
|
||||
s->control_command,
|
||||
s->timeout_start_usec,
|
||||
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
|
||||
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
|
||||
&s->control_pid);
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
|
@ -2214,7 +2214,7 @@ static void service_enter_reload(Service *s) {
|
|||
r = service_spawn(s,
|
||||
s->control_command,
|
||||
s->timeout_start_usec,
|
||||
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
|
||||
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
|
||||
&s->control_pid);
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
|
@ -2254,7 +2254,8 @@ static void service_run_next_control(Service *s) {
|
|||
timeout,
|
||||
EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|
|
||||
(IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)|
|
||||
(IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0),
|
||||
(IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0)|
|
||||
(IN_SET(s->control_command_id, SERVICE_EXEC_START_POST, SERVICE_EXEC_RELOAD, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_CONTROL_CGROUP : 0),
|
||||
&s->control_pid);
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
|
|
Loading…
Reference in a new issue