Merge pull request #7464 from poettering/cgroup-control-fix
various cgroup fixes
This commit is contained in:
commit
b45f436f61
10
TODO
10
TODO
|
@ -24,6 +24,12 @@ Janitorial Clean-ups:
|
|||
|
||||
Features:
|
||||
|
||||
* add some special mode to LogsDirectory=/StateDirectory=… that allows
|
||||
declaring these directories without necessarily pulling in deps for them, or
|
||||
creating them when starting up. That way, we could declare that
|
||||
systemd-journald writes to /var/log/journal, which could be useful when we
|
||||
doing disk usage calculations and so on.
|
||||
|
||||
* add a new RuntimeDirectoryPreserve= mode that defines a similar lifecycle for
|
||||
the runtime dir as we maintain for the fdstore: i.e. keep it around as long
|
||||
as the unit is running or has a job queued.
|
||||
|
@ -45,8 +51,6 @@ Features:
|
|||
* add a way to lock down cgroup migration: a boolean, which when set for a unit
|
||||
makes sure the processes in it can never migrate out of it
|
||||
|
||||
* complain if a unit starts up and there are already processes in its cgroup
|
||||
|
||||
* blog about fd store and restartable services
|
||||
|
||||
* document Environment=SYSTEMD_LOG_LEVEL=debug drop-in in debugging document
|
||||
|
@ -303,8 +307,6 @@ Features:
|
|||
the specified range and generates sane error messages for incorrect
|
||||
specifications.
|
||||
|
||||
* do something about "/control" subcgroups in the unified cgroup hierarchy
|
||||
|
||||
* when we detect that there are waiting jobs but no running jobs, do something
|
||||
|
||||
* push CPUAffinity= also into the "cpuset" cgroup controller (only after the cpuset controller got ported to the unified hierarchy)
|
||||
|
|
|
@ -28,7 +28,7 @@ export LC_CTYPE=en_US.UTF-8
|
|||
|
||||
sysvinit_path=`realpath /etc/init.d`
|
||||
|
||||
[ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path"
|
||||
[ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path" -D "default-hierarchy=unified"
|
||||
ninja -C "$BUILDDIR" all
|
||||
[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" )
|
||||
ninja -C "$BUILDDIR" install
|
||||
|
|
|
@ -876,115 +876,87 @@ int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
|
|||
return r;
|
||||
}
|
||||
|
||||
int cg_set_group_access(
|
||||
int cg_set_access(
|
||||
const char *controller,
|
||||
const char *path,
|
||||
mode_t mode,
|
||||
uid_t uid,
|
||||
gid_t gid) {
|
||||
|
||||
_cleanup_free_ char *fs = NULL;
|
||||
int r;
|
||||
struct Attribute {
|
||||
const char *name;
|
||||
bool fatal;
|
||||
};
|
||||
|
||||
if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
|
||||
/* cgroupsv1, aka legacy/non-unified */
|
||||
static const struct Attribute legacy_attributes[] = {
|
||||
{ "cgroup.procs", true },
|
||||
{ "tasks", false },
|
||||
{ "cgroup.clone_children", false },
|
||||
{},
|
||||
};
|
||||
|
||||
/* cgroupsv2, aka unified */
|
||||
static const struct Attribute unified_attributes[] = {
|
||||
{ "cgroup.procs", true },
|
||||
{ "cgroup.subtree_control", true },
|
||||
{ "cgroup.threads", false },
|
||||
{},
|
||||
};
|
||||
|
||||
static const struct Attribute* const attributes[] = {
|
||||
[false] = legacy_attributes,
|
||||
[true] = unified_attributes,
|
||||
};
|
||||
|
||||
_cleanup_free_ char *fs = NULL;
|
||||
const struct Attribute *i;
|
||||
int r, unified;
|
||||
|
||||
assert(path);
|
||||
|
||||
if (uid == UID_INVALID && gid == GID_INVALID)
|
||||
return 0;
|
||||
|
||||
if (mode != MODE_INVALID)
|
||||
mode &= 0777;
|
||||
unified = cg_unified_controller(controller);
|
||||
if (unified < 0)
|
||||
return unified;
|
||||
|
||||
/* Configure access to the cgroup itself */
|
||||
r = cg_get_path(controller, path, NULL, &fs);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = chmod_and_chown(fs, mode, uid, gid);
|
||||
r = chmod_and_chown(fs, 0755, uid, gid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = cg_hybrid_unified();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
|
||||
r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to set group access on compatibility systemd cgroup %s, ignoring: %m", path);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cg_set_task_access(
|
||||
const char *controller,
|
||||
const char *path,
|
||||
mode_t mode,
|
||||
uid_t uid,
|
||||
gid_t gid) {
|
||||
|
||||
_cleanup_free_ char *fs = NULL;
|
||||
int r;
|
||||
|
||||
assert(path);
|
||||
|
||||
if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
|
||||
return 0;
|
||||
|
||||
if (mode != MODE_INVALID)
|
||||
mode &= 0666;
|
||||
|
||||
/* For both the legacy and unified hierarchies, "cgroup.procs" is the main entry point for PIDs */
|
||||
r = cg_get_path(controller, path, "cgroup.procs", &fs);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = chmod_and_chown(fs, mode, uid, gid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = cg_unified_controller(controller);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) {
|
||||
const char *fn;
|
||||
|
||||
/* Compatibility: on cgroupsv1 always keep values for the legacy files "tasks" and
|
||||
* "cgroup.clone_children" in sync with "cgroup.procs". Since this is legacy stuff, we don't care if
|
||||
* this fails. */
|
||||
|
||||
FOREACH_STRING(fn,
|
||||
"tasks",
|
||||
"cgroup.clone_children") {
|
||||
|
||||
fs = mfree(fs);
|
||||
|
||||
r = cg_get_path(controller, path, fn, &fs);
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to get path for %s of %s, ignoring: %m", fn, path);
|
||||
|
||||
r = chmod_and_chown(fs, mode, uid, gid);
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to to change ownership/access mode for %s of %s, ignoring: %m", fn, path);
|
||||
}
|
||||
} else {
|
||||
/* On the unified controller, we want to permit subtree controllers too. */
|
||||
|
||||
/* Configure access to the cgroup's attributes */
|
||||
for (i = attributes[unified]; i->name; i++) {
|
||||
fs = mfree(fs);
|
||||
r = cg_get_path(controller, path, "cgroup.subtree_control", &fs);
|
||||
|
||||
r = cg_get_path(controller, path, i->name, &fs);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = chmod_and_chown(fs, mode, uid, gid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
r = chmod_and_chown(fs, 0644, uid, gid);
|
||||
if (r < 0) {
|
||||
if (i->fatal)
|
||||
return r;
|
||||
|
||||
log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
|
||||
}
|
||||
}
|
||||
|
||||
r = cg_hybrid_unified();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
|
||||
/* Always propagate access mode from unified to legacy controller */
|
||||
|
||||
r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
|
||||
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
|
||||
r = cg_hybrid_unified();
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to set task access on compatibility systemd cgroup %s, ignoring: %m", path);
|
||||
return r;
|
||||
if (r > 0) {
|
||||
/* Always propagate access mode from unified to legacy controller */
|
||||
r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -188,8 +188,7 @@ int cg_set_attribute(const char *controller, const char *path, const char *attri
|
|||
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
|
||||
int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values);
|
||||
|
||||
int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid);
|
||||
int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid);
|
||||
int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid);
|
||||
|
||||
int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags);
|
||||
int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size);
|
||||
|
|
|
@ -677,9 +677,11 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_
|
|||
"Failed to set %s: %m", file);
|
||||
}
|
||||
|
||||
static void cgroup_apply_firewall(Unit *u, CGroupContext *c) {
|
||||
static void cgroup_apply_firewall(Unit *u) {
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
|
||||
if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is
|
||||
* not recursive we don't ever touch the bpf on them */
|
||||
return;
|
||||
|
@ -1031,7 +1033,7 @@ static void cgroup_context_apply(
|
|||
}
|
||||
|
||||
if (apply_bpf)
|
||||
cgroup_apply_firewall(u, c);
|
||||
cgroup_apply_firewall(u);
|
||||
}
|
||||
|
||||
CGroupMask cgroup_context_get_mask(CGroupContext *c) {
|
||||
|
@ -1392,6 +1394,31 @@ int unit_watch_cgroup(Unit *u) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int unit_pick_cgroup_path(Unit *u) {
|
||||
_cleanup_free_ char *path = NULL;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
|
||||
if (u->cgroup_path)
|
||||
return 0;
|
||||
|
||||
if (!UNIT_HAS_CGROUP_CONTEXT(u))
|
||||
return -EINVAL;
|
||||
|
||||
path = unit_default_cgroup_path(u);
|
||||
if (!path)
|
||||
return log_oom();
|
||||
|
||||
r = unit_set_cgroup_path(u, path);
|
||||
if (r == -EEXIST)
|
||||
return log_unit_error_errno(u, r, "Control group %s exists already.", path);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(u, r, "Failed to set unit's control group path to %s: %m", path);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int unit_create_cgroup(
|
||||
Unit *u,
|
||||
CGroupMask target_mask,
|
||||
|
@ -1407,19 +1434,10 @@ static int unit_create_cgroup(
|
|||
if (!c)
|
||||
return 0;
|
||||
|
||||
if (!u->cgroup_path) {
|
||||
_cleanup_free_ char *path = NULL;
|
||||
|
||||
path = unit_default_cgroup_path(u);
|
||||
if (!path)
|
||||
return log_oom();
|
||||
|
||||
r = unit_set_cgroup_path(u, path);
|
||||
if (r == -EEXIST)
|
||||
return log_unit_error_errno(u, r, "Control group %s exists already.", path);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(u, r, "Failed to set unit's control group path to %s: %m", path);
|
||||
}
|
||||
/* Figure out our cgroup path */
|
||||
r = unit_pick_cgroup_path(u);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* First, create our own group */
|
||||
r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);
|
||||
|
@ -1503,6 +1521,27 @@ static bool unit_has_mask_realized(
|
|||
(!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
|
||||
}
|
||||
|
||||
static void unit_add_to_cgroup_realize_queue(Unit *u) {
|
||||
assert(u);
|
||||
|
||||
if (u->in_cgroup_realize_queue)
|
||||
return;
|
||||
|
||||
LIST_PREPEND(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
|
||||
u->in_cgroup_realize_queue = true;
|
||||
}
|
||||
|
||||
static void unit_remove_from_cgroup_realize_queue(Unit *u) {
|
||||
assert(u);
|
||||
|
||||
if (!u->in_cgroup_realize_queue)
|
||||
return;
|
||||
|
||||
LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
|
||||
u->in_cgroup_realize_queue = false;
|
||||
}
|
||||
|
||||
|
||||
/* Check if necessary controllers and attributes for a unit are in place.
|
||||
*
|
||||
* If so, do nothing.
|
||||
|
@ -1516,10 +1555,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
|
|||
|
||||
assert(u);
|
||||
|
||||
if (u->in_cgroup_realize_queue) {
|
||||
LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
|
||||
u->in_cgroup_realize_queue = false;
|
||||
}
|
||||
unit_remove_from_cgroup_realize_queue(u);
|
||||
|
||||
target_mask = unit_get_target_mask(u);
|
||||
enable_mask = unit_get_enable_mask(u);
|
||||
|
@ -1552,16 +1588,6 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void unit_add_to_cgroup_realize_queue(Unit *u) {
|
||||
assert(u);
|
||||
|
||||
if (u->in_cgroup_realize_queue)
|
||||
return;
|
||||
|
||||
LIST_PREPEND(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
|
||||
u->in_cgroup_realize_queue = true;
|
||||
}
|
||||
|
||||
unsigned manager_dispatch_cgroup_realize_queue(Manager *m) {
|
||||
ManagerState state;
|
||||
unsigned n = 0;
|
||||
|
@ -1575,6 +1601,12 @@ unsigned manager_dispatch_cgroup_realize_queue(Manager *m) {
|
|||
while ((i = m->cgroup_realize_queue)) {
|
||||
assert(i->in_cgroup_realize_queue);
|
||||
|
||||
if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(i))) {
|
||||
/* Maybe things changed, and the unit is not actually active anymore? */
|
||||
unit_remove_from_cgroup_realize_queue(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
r = unit_realize_cgroup_now(i, state);
|
||||
if (r < 0)
|
||||
log_warning_errno(r, "Failed to realize cgroups for queued unit %s, ignoring: %m", i->id);
|
||||
|
@ -2351,7 +2383,6 @@ int unit_get_ip_accounting(
|
|||
fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
|
||||
u->ip_accounting_ingress_map_fd :
|
||||
u->ip_accounting_egress_map_fd;
|
||||
|
||||
if (fd < 0)
|
||||
return -ENODATA;
|
||||
|
||||
|
@ -2421,7 +2452,7 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
|
|||
if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
|
||||
m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
|
||||
|
||||
if ((u->cgroup_realized_mask & m) == 0)
|
||||
if ((u->cgroup_realized_mask & m) == 0) /* NOP? */
|
||||
return;
|
||||
|
||||
u->cgroup_realized_mask &= ~m;
|
||||
|
@ -2434,7 +2465,7 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
|
|||
if (!UNIT_HAS_CGROUP_CONTEXT(u))
|
||||
return;
|
||||
|
||||
if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED)
|
||||
if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED) /* NOP? */
|
||||
return;
|
||||
|
||||
u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
|
||||
|
|
|
@ -169,6 +169,7 @@ void unit_update_cgroup_members_masks(Unit *u);
|
|||
|
||||
char *unit_default_cgroup_path(Unit *u);
|
||||
int unit_set_cgroup_path(Unit *u, const char *path);
|
||||
int unit_pick_cgroup_path(Unit *u);
|
||||
|
||||
int unit_realize_cgroup(Unit *u);
|
||||
void unit_release_cgroup(Unit *u);
|
||||
|
|
|
@ -3009,17 +3009,12 @@ static int exec_child(
|
|||
}
|
||||
}
|
||||
|
||||
/* If delegation is enabled we'll pass ownership of the cgroup
|
||||
* (but only in systemd's own controller hierarchy!) to the
|
||||
* user of the new process. */
|
||||
/* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroupsv1
|
||||
* this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
|
||||
* safe. On cgroupsv2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
|
||||
* touch a single hierarchy too. */
|
||||
if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
|
||||
r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_CGROUP;
|
||||
return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
|
||||
}
|
||||
|
||||
r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
|
||||
r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_CGROUP;
|
||||
return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
|
||||
|
|
|
@ -938,9 +938,6 @@ static void mount_enter_mounting(Mount *m) {
|
|||
|
||||
assert(m);
|
||||
|
||||
m->control_command_id = MOUNT_EXEC_MOUNT;
|
||||
m->control_command = m->exec_command + MOUNT_EXEC_MOUNT;
|
||||
|
||||
r = unit_fail_if_symlink(UNIT(m), m->where);
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
|
@ -949,6 +946,11 @@ static void mount_enter_mounting(Mount *m) {
|
|||
|
||||
unit_warn_if_dir_nonempty(UNIT(m), m->where);
|
||||
|
||||
unit_warn_leftover_processes(UNIT(m));
|
||||
|
||||
m->control_command_id = MOUNT_EXEC_MOUNT;
|
||||
m->control_command = m->exec_command + MOUNT_EXEC_MOUNT;
|
||||
|
||||
/* Create the source directory for bind-mounts if needed */
|
||||
p = get_mount_parameters_fragment(m);
|
||||
if (p && mount_is_bind(p))
|
||||
|
|
|
@ -460,18 +460,6 @@ static int scope_deserialize_item(Unit *u, const char *key, const char *value, F
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool scope_check_gc(Unit *u) {
|
||||
assert(u);
|
||||
|
||||
/* Never clean up scopes that still have a process around,
|
||||
* even if the scope is formally dead. */
|
||||
|
||||
if (!u->cgroup_path)
|
||||
return false;
|
||||
|
||||
return cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path) <= 0;
|
||||
}
|
||||
|
||||
static void scope_notify_cgroup_empty_event(Unit *u) {
|
||||
Scope *s = SCOPE(u);
|
||||
assert(u);
|
||||
|
@ -639,8 +627,6 @@ const UnitVTable scope_vtable = {
|
|||
.active_state = scope_active_state,
|
||||
.sub_state_to_string = scope_sub_state_to_string,
|
||||
|
||||
.check_gc = scope_check_gc,
|
||||
|
||||
.sigchld_event = scope_sigchld_event,
|
||||
|
||||
.reset_failed = scope_reset_failed,
|
||||
|
|
|
@ -1343,11 +1343,6 @@ static int service_spawn(
|
|||
if (!final_env)
|
||||
return -ENOMEM;
|
||||
|
||||
if ((flags & EXEC_IS_CONTROL) && UNIT(s)->cgroup_path) {
|
||||
exec_params.cgroup_path = strjoina(UNIT(s)->cgroup_path, "/control");
|
||||
(void) cg_create(SYSTEMD_CGROUP_CONTROLLER, exec_params.cgroup_path);
|
||||
}
|
||||
|
||||
/* System services should get a new keyring by default. */
|
||||
SET_FLAG(exec_params.flags, EXEC_NEW_KEYRING, MANAGER_IS_SYSTEM(UNIT(s)->manager));
|
||||
|
||||
|
@ -1789,39 +1784,22 @@ fail:
|
|||
service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
|
||||
}
|
||||
|
||||
static void service_kill_control_processes(Service *s) {
|
||||
static void service_kill_control_process(Service *s) {
|
||||
int r;
|
||||
|
||||
assert(s);
|
||||
|
||||
if (s->control_pid > 0) {
|
||||
r = kill_and_sigcont(s->control_pid, SIGKILL);
|
||||
if (r < 0) {
|
||||
_cleanup_free_ char *comm = NULL;
|
||||
if (s->control_pid <= 0)
|
||||
return;
|
||||
|
||||
(void) get_process_comm(s->control_pid, &comm);
|
||||
r = kill_and_sigcont(s->control_pid, SIGKILL);
|
||||
if (r < 0) {
|
||||
_cleanup_free_ char *comm = NULL;
|
||||
|
||||
log_unit_debug_errno(UNIT(s), r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m",
|
||||
s->control_pid, strna(comm));
|
||||
}
|
||||
}
|
||||
(void) get_process_comm(s->control_pid, &comm);
|
||||
|
||||
if (UNIT(s)->cgroup_path) {
|
||||
_cleanup_set_free_ Set *pid_set = NULL;
|
||||
char *p;
|
||||
|
||||
if (s->control_pid > 0) {
|
||||
r = set_make(&pid_set, PID_TO_PTR(s->control_pid), NULL);
|
||||
if (r < 0) {
|
||||
log_oom();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
p = strjoina(UNIT(s)->cgroup_path, "/control");
|
||||
r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, CGROUP_SIGCONT|CGROUP_IGNORE_SELF|CGROUP_REMOVE, pid_set, NULL, NULL);
|
||||
if (r < 0)
|
||||
log_unit_debug_errno(UNIT(s), r, "Failed to send SIGKILL to processes of control group %s: %m", p);
|
||||
log_unit_debug_errno(UNIT(s), r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m",
|
||||
s->control_pid, strna(comm));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1836,10 +1814,7 @@ static void service_enter_start(Service *s) {
|
|||
service_unwatch_control_pid(s);
|
||||
service_unwatch_main_pid(s);
|
||||
|
||||
/* We want to ensure that nobody leaks processes from
|
||||
* START_PRE here, so let's go on a killing spree, People
|
||||
* should not spawn long running processes from START_PRE. */
|
||||
service_kill_control_processes(s);
|
||||
unit_warn_leftover_processes(UNIT(s));
|
||||
|
||||
if (s->type == SERVICE_FORKING) {
|
||||
s->control_command_id = SERVICE_EXEC_START;
|
||||
|
@ -1927,9 +1902,8 @@ static void service_enter_start_pre(Service *s) {
|
|||
|
||||
s->control_command = s->exec_command[SERVICE_EXEC_START_PRE];
|
||||
if (s->control_command) {
|
||||
/* Before we start anything, let's clear up what might
|
||||
* be left from previous runs. */
|
||||
service_kill_control_processes(s);
|
||||
|
||||
unit_warn_leftover_processes(UNIT(s));
|
||||
|
||||
s->control_command_id = SERVICE_EXEC_START_PRE;
|
||||
|
||||
|
@ -2746,10 +2720,11 @@ static bool service_check_gc(Unit *u) {
|
|||
|
||||
assert(s);
|
||||
|
||||
/* Never clean up services that still have a process around,
|
||||
* even if the service is formally dead. */
|
||||
if (cgroup_good(s) > 0 ||
|
||||
main_pid_good(s) > 0 ||
|
||||
/* Never clean up services that still have a process around, even if the service is formally dead. Note that
|
||||
* unit_check_gc() already checked our cgroup for us, we just check our two additional PIDs, too, in case they
|
||||
* have moved outside of the cgroup. */
|
||||
|
||||
if (main_pid_good(s) > 0 ||
|
||||
control_pid_good(s) > 0)
|
||||
return true;
|
||||
|
||||
|
@ -3084,11 +3059,6 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
|
|||
if (s->result == SERVICE_SUCCESS)
|
||||
s->result = f;
|
||||
|
||||
/* Immediately get rid of the cgroup, so that the
|
||||
* kernel doesn't delay the cgroup empty messages for
|
||||
* the service cgroup any longer than necessary */
|
||||
service_kill_control_processes(s);
|
||||
|
||||
if (s->control_command &&
|
||||
s->control_command->command_next &&
|
||||
f == SERVICE_SUCCESS) {
|
||||
|
@ -3251,7 +3221,7 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
|
|||
|
||||
case SERVICE_RELOAD:
|
||||
log_unit_warning(UNIT(s), "Reload operation timed out. Killing reload process.");
|
||||
service_kill_control_processes(s);
|
||||
service_kill_control_process(s);
|
||||
s->reload_result = SERVICE_FAILURE_TIMEOUT;
|
||||
service_enter_running(s, SERVICE_SUCCESS);
|
||||
break;
|
||||
|
|
|
@ -2187,6 +2187,9 @@ static void socket_enter_start_pre(Socket *s) {
|
|||
assert(s);
|
||||
|
||||
socket_unwatch_control_pid(s);
|
||||
|
||||
unit_warn_leftover_processes(UNIT(s));
|
||||
|
||||
s->control_command_id = SOCKET_EXEC_START_PRE;
|
||||
s->control_command = s->exec_command[SOCKET_EXEC_START_PRE];
|
||||
|
||||
|
|
|
@ -734,6 +734,8 @@ static void swap_enter_activating(Swap *s) {
|
|||
|
||||
assert(s);
|
||||
|
||||
unit_warn_leftover_processes(UNIT(s));
|
||||
|
||||
s->control_command_id = SWAP_EXEC_ACTIVATE;
|
||||
s->control_command = s->exec_command + SWAP_EXEC_ACTIVATE;
|
||||
|
||||
|
|
|
@ -108,6 +108,7 @@ Unit *unit_new(Manager *m, size_t size) {
|
|||
u->ref_uid = UID_INVALID;
|
||||
u->ref_gid = GID_INVALID;
|
||||
u->cpu_usage_last = NSEC_INFINITY;
|
||||
u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
|
||||
|
||||
u->ip_accounting_ingress_map_fd = -1;
|
||||
u->ip_accounting_egress_map_fd = -1;
|
||||
|
@ -333,6 +334,7 @@ int unit_set_description(Unit *u, const char *description) {
|
|||
|
||||
bool unit_check_gc(Unit *u) {
|
||||
UnitActiveState state;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
|
||||
|
@ -380,6 +382,17 @@ bool unit_check_gc(Unit *u) {
|
|||
assert_not_reached("Unknown garbage collection mode");
|
||||
}
|
||||
|
||||
if (u->cgroup_path) {
|
||||
/* If the unit has a cgroup, then check whether there's anything in it. If so, we should stay
|
||||
* around. Units with active processes should never be collected. */
|
||||
|
||||
r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
|
||||
if (r < 0)
|
||||
log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", u->cgroup_path);
|
||||
if (r <= 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
if (UNIT_VTABLE(u)->check_gc)
|
||||
if (UNIT_VTABLE(u)->check_gc(u))
|
||||
return true;
|
||||
|
@ -5183,6 +5196,31 @@ int unit_prepare_exec(Unit *u) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void log_leftover(pid_t pid, int sig, void *userdata) {
|
||||
_cleanup_free_ char *comm = NULL;
|
||||
|
||||
(void) get_process_comm(pid, &comm);
|
||||
|
||||
if (comm && comm[0] == '(') /* Most likely our own helper process (PAM?), ignore */
|
||||
return;
|
||||
|
||||
log_unit_warning(userdata,
|
||||
"Found left-over process " PID_FMT " (%s) in control group while starting unit. Ignoring.\n"
|
||||
"This usually indicates unclean termination of a previous run, or service implementation deficiencies.",
|
||||
pid, strna(comm));
|
||||
}
|
||||
|
||||
void unit_warn_leftover_processes(Unit *u) {
|
||||
assert(u);
|
||||
|
||||
(void) unit_pick_cgroup_path(u);
|
||||
|
||||
if (!u->cgroup_path)
|
||||
return;
|
||||
|
||||
(void) cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, 0, 0, NULL, log_leftover, u);
|
||||
}
|
||||
|
||||
static const char* const collect_mode_table[_COLLECT_MODE_MAX] = {
|
||||
[COLLECT_INACTIVE] = "inactive",
|
||||
[COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed",
|
||||
|
|
|
@ -768,6 +768,8 @@ void unit_unlink_state_files(Unit *u);
|
|||
|
||||
int unit_prepare_exec(Unit *u);
|
||||
|
||||
void unit_warn_leftover_processes(Unit *u);
|
||||
|
||||
/* Macros which append UNIT= or USER_UNIT= to the message */
|
||||
|
||||
#define log_unit_full(unit, level, error, ...) \
|
||||
|
|
Loading…
Reference in a new issue