From a479c21ed2fda095a2e2f4b52de75487c76d332c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 1 May 2020 14:00:42 +0200 Subject: [PATCH 1/8] cgroup: Make realize_queue behave FIFO The current implementation is LIFO, which is a) confusing b) prevents some ordered operations on the cgroup tree (e.g. removing children before parents). Fix it quickly. Current list implementation turns this from O(1) to O(n) operation. Rework the lists later. --- src/core/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 031b28a684..f146f65055 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2112,7 +2112,7 @@ void unit_add_to_cgroup_realize_queue(Unit *u) { if (u->in_cgroup_realize_queue) return; - LIST_PREPEND(cgroup_realize_queue, u->manager->cgroup_realize_queue, u); + LIST_APPEND(cgroup_realize_queue, u->manager->cgroup_realize_queue, u); u->in_cgroup_realize_queue = true; } From 30ad3ca0865ba18d3b88da1558cc4ddfeb9701b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 1 May 2020 14:00:42 +0200 Subject: [PATCH 2/8] cgroup: Add root slice to cgroup realization queue When we're disabling controller on a direct child of root cgroup, we forgot to add root slice into cgroup realization queue, which prevented proper disabling of the controller (on unified hierarchy). The mechanism relying on "bounce from bottom and propagate up" in unit_create_cgroup doesn't work on unified hierarchy (leaves needn't be enabled). Drop it as we rely on the ancestors to be queued -- that's now intentional but was artifact of combining the two patches: cb5e3bc37d ("cgroup: Don't explicitly check for member in UNIT_BEFORE") v240~78 65f6b6bdcb ("core: fix re-realization of cgroup siblings") v245-rc1~153^2 Fixes: #14917 --- src/core/cgroup.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index f146f65055..97785680a7 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1858,23 +1858,6 @@ static int unit_create_cgroup( if (r < 0) log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", u->cgroup_path); - /* If we just turned off a controller, this might release the controller for our parent too, let's - * enqueue the parent for re-realization in that case again. */ - if (UNIT_ISSET(u->slice)) { - CGroupMask turned_off; - - turned_off = (u->cgroup_realized ? u->cgroup_enabled_mask & ~result_mask : 0); - if (turned_off != 0) { - Unit *parent; - - /* Force the parent to propagate the enable mask to the kernel again, by invalidating - * the controller we just turned off. */ - - for (parent = UNIT_DEREF(u->slice); parent; parent = UNIT_DEREF(parent->slice)) - unit_invalidate_cgroup(parent, turned_off); - } - } - /* Remember what's actually enabled now */ u->cgroup_enabled_mask = result_mask; } @@ -2317,8 +2300,7 @@ unsigned manager_dispatch_cgroup_realize_queue(Manager *m) { static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { Unit *slice; - /* This adds the siblings of the specified unit and the siblings of all parent units to the cgroup - * queue. (But neither the specified unit itself nor the parents.) + /* This adds the path from the specified unit to root slice to the queue and siblings at each level. * * Propagation of realization "side-ways" (i.e. towards siblings) is relevant on cgroup-v1 where * scheduling becomes very weird if two units that own processes reside in the same slice, but one is @@ -2334,7 +2316,6 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { void *v; HASHMAP_FOREACH_KEY(v, m, slice->dependencies[UNIT_BEFORE], i) { - /* Skip units that have a dependency on the slice but aren't actually in it. */ if (UNIT_DEREF(m->slice) != slice) continue; @@ -2360,6 +2341,9 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { u = slice; } + + /* Root slice comes last */ + unit_add_to_cgroup_realize_queue(u); } int unit_realize_cgroup(Unit *u) { @@ -2377,9 +2361,10 @@ int unit_realize_cgroup(Unit *u) { * get as much resources as all our group together. This call * will synchronously create the parent cgroups, but will * defer work on the siblings to the next event loop - * iteration. */ + * iteration. When removing a realized controller, it may become unnecessary in ancestors, + * so we also ensure deferred bottom up (de)realization of ancestors. + */ - /* Add all sibling slices to the cgroup queue. */ unit_add_siblings_to_cgroup_realize_queue(u); /* And realize this one now (and apply the values) */ From 7b63961415920b30a4e9f6d4c87bda20a1d8ec67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 1 May 2020 14:00:42 +0200 Subject: [PATCH 3/8] cgroup: Swap cgroup v1 deletion and migration When we are about to derealize a controller on v1 cgroup, we first attempt to delete the controller cgroup and migrate afterwards. This doesn't work in practice because populated cgroup cannot be deleted. Furthermore, we leave out slices from migration completely, so (un)setting a control value on them won't realize their controller cgroup. Rework actual realization, unit_create_cgroup() becomes unit_update_cgroup() and make sure that controller hierarchies are reduced when given controller cgroup ceased to be needed. Note that with this we introduce slight deviation between v1 and v2 code -- when a descendant unit turns off a delegated controller, we attempt to disable it in ancestor slices. On v2 this may fail (kernel enforced, because of child cgroups using the controller), on v1 we'll migrate whole subtree and trim the subhierachy. (Previously, we wouldn't take away delegated controller, however, derealization was broken anyway.) Fixes: #14149 --- src/core/cgroup.c | 44 ++++++++++++++++++++++++----------- src/shared/cgroup-setup.c | 49 ++++++++++++++++++++------------------- src/shared/cgroup-setup.h | 3 ++- 3 files changed, 57 insertions(+), 39 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 97785680a7..6750bbbe1d 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1629,7 +1629,10 @@ const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) { } static const char *migrate_callback(CGroupMask mask, void *userdata) { - return unit_get_realized_cgroup_path(userdata, mask); + /* If not realized at all, migrate to root (""). + * It may happen if we're upgrading from older version that didn't clean up. + */ + return strempty(unit_get_realized_cgroup_path(userdata, mask)); } char *unit_default_cgroup_path(const Unit *u) { @@ -1820,13 +1823,14 @@ int unit_pick_cgroup_path(Unit *u) { return 0; } -static int unit_create_cgroup( +static int unit_update_cgroup( Unit *u, CGroupMask target_mask, CGroupMask enable_mask, ManagerState state) { - bool created; + bool created, is_root_slice; + CGroupMask migrate_mask = 0; int r; assert(u); @@ -1849,7 +1853,9 @@ static int unit_create_cgroup( (void) unit_watch_cgroup(u); (void) unit_watch_cgroup_memory(u); - /* Preserve enabled controllers in delegated units, adjust others. */ + + /* For v2 we preserve enabled controllers in delegated units, adjust others, + * for v1 we figure out which controller hierarchies need migration. */ if (created || !u->cgroup_realized || !unit_cgroup_delegate(u)) { CGroupMask result_mask = 0; @@ -1860,20 +1866,30 @@ static int unit_create_cgroup( /* Remember what's actually enabled now */ u->cgroup_enabled_mask = result_mask; + + migrate_mask = u->cgroup_realized_mask ^ target_mask; } /* Keep track that this is now realized */ u->cgroup_realized = true; u->cgroup_realized_mask = target_mask; - if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) { - - /* Then, possibly move things over, but not if - * subgroups may contain processes, which is the case - * for slice and delegation units. */ - r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u); + /* Migrate processes in controller hierarchies both downwards (enabling) and upwards (disabling). + * + * Unnecessary controller cgroups are trimmed (after emptied by upward migration). + * We perform migration also with whole slices for cases when users don't care about leave + * granularity. Since delegated_mask is subset of target mask, we won't trim slice subtree containing + * delegated units. + */ + if (cg_all_unified() == 0) { + r = cg_migrate_v1_controllers(u->manager->cgroup_supported, migrate_mask, u->cgroup_path, migrate_callback, u); if (r < 0) - log_unit_warning_errno(u, r, "Failed to migrate cgroup from to %s, ignoring: %m", u->cgroup_path); + log_unit_warning_errno(u, r, "Failed to migrate controller cgroups from %s, ignoring: %m", u->cgroup_path); + + is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE); + r = cg_trim_v1_controllers(u->manager->cgroup_supported, ~target_mask, u->cgroup_path, !is_root_slice); + if (r < 0) + log_unit_warning_errno(u, r, "Failed to delete controller cgroups %s, ignoring: %m", u->cgroup_path); } /* Set attributes */ @@ -2136,7 +2152,7 @@ static int unit_realize_cgroup_now_enable(Unit *u, ManagerState state) { new_target_mask = u->cgroup_realized_mask | target_mask; new_enable_mask = u->cgroup_enabled_mask | enable_mask; - return unit_create_cgroup(u, new_target_mask, new_enable_mask, state); + return unit_update_cgroup(u, new_target_mask, new_enable_mask, state); } /* Controllers can only be disabled depth-first, from the leaves of the @@ -2180,7 +2196,7 @@ static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) { new_target_mask = m->cgroup_realized_mask & target_mask; new_enable_mask = m->cgroup_enabled_mask & enable_mask; - r = unit_create_cgroup(m, new_target_mask, new_enable_mask, state); + r = unit_update_cgroup(m, new_target_mask, new_enable_mask, state); if (r < 0) return r; } @@ -2259,7 +2275,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { } /* Now actually deal with the cgroup we were trying to realise and set attributes */ - r = unit_create_cgroup(u, target_mask, enable_mask, state); + r = unit_update_cgroup(u, target_mask, enable_mask, state); if (r < 0) return r; diff --git a/src/shared/cgroup-setup.c b/src/shared/cgroup-setup.c index e8398cbde5..cd2ba6149e 100644 --- a/src/shared/cgroup-setup.c +++ b/src/shared/cgroup-setup.c @@ -608,8 +608,6 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path n = cgroup_controller_to_string(c); if (FLAGS_SET(mask, bit)) (void) cg_create(n, path); - else - (void) cg_trim(n, path, true); done |= CGROUP_MASK_EXTEND_JOINED(bit); } @@ -674,29 +672,20 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, return r; } -int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { +int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) { CGroupController c; CGroupMask done; int r = 0, q; - if (!path_equal(from, to)) { - r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE); - if (r < 0) - return r; - } - - q = cg_all_unified(); - if (q < 0) - return q; - if (q > 0) - return r; + assert(to_callback); supported &= CGROUP_MASK_V1; + mask = CGROUP_MASK_EXTEND_JOINED(mask); done = 0; for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - const char *p = NULL; + const char *to = NULL; if (!FLAGS_SET(supported, bit)) continue; @@ -704,21 +693,20 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to if (FLAGS_SET(done, bit)) continue; - if (to_callback) - p = to_callback(bit, userdata); - if (!p) - p = to; + if (!FLAGS_SET(mask, bit)) + continue; - (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0); - done |= CGROUP_MASK_EXTEND_JOINED(bit); + to = to_callback(bit, userdata); + + /* Remember first error and try continuing */ + q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0); + r = (r < 0) ? r : q; } return r; } int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { - CGroupController c; - CGroupMask done; int r, q; r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); @@ -731,7 +719,16 @@ int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) if (q > 0) return r; + return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root); +} + +int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) { + CGroupController c; + CGroupMask done; + int r = 0, q; + supported &= CGROUP_MASK_V1; + mask = CGROUP_MASK_EXTEND_JOINED(mask); done = 0; for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { @@ -743,7 +740,11 @@ int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) if (FLAGS_SET(done, bit)) continue; - (void) cg_trim(cgroup_controller_to_string(c), path, delete_root); + if (FLAGS_SET(mask, bit)) { + /* Remember first error and try continuing */ + q = cg_trim(cgroup_controller_to_string(c), path, delete_root); + r = (r < 0) ? r : q; + } done |= CGROUP_MASK_EXTEND_JOINED(bit); } diff --git a/src/shared/cgroup-setup.h b/src/shared/cgroup-setup.h index 6e9b6857d8..72c2b6ee6a 100644 --- a/src/shared/cgroup-setup.h +++ b/src/shared/cgroup-setup.h @@ -29,6 +29,7 @@ int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const ch int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path); int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); -int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata); +int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata); int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); +int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root); int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask); From fb46fca7e0c484026bbc9e196aee118800186768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Mon, 1 Jun 2020 17:30:35 +0200 Subject: [PATCH 4/8] cgroup: Eager realization in unit_free unit_free(u) realizes direct parent and invalidates members mask of all ancestors. This isn't sufficient in v1 controller hierarchies since siblings of the freed unit may have existed only because of the removed unit. We cannot be lazy about the siblings because if parent(u) is also removed, it'd migrate and rmdir cgroups for siblings(u). However, realized masks of siblings(u) won't reflect this change. This was a non-issue earlier, because we weren't removing cgroup directories properly (effectively matching the stale realized mask), removal failed because of tasks left by missing migration (see previous commit). Therefore, ensure realization of all units necessary to clean up after the free'd unit. Fixes: #14149 --- src/core/cgroup.c | 16 +++++++++++----- src/core/cgroup.h | 2 +- src/core/unit.c | 16 ++++++++-------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 6750bbbe1d..84512963a4 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2105,7 +2105,7 @@ static bool unit_has_mask_enables_realized( ((u->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (u->cgroup_enabled_mask & CGROUP_MASK_V2); } -void unit_add_to_cgroup_realize_queue(Unit *u) { +static void unit_add_to_cgroup_realize_queue(Unit *u) { assert(u); if (u->in_cgroup_realize_queue) @@ -2313,10 +2313,12 @@ unsigned manager_dispatch_cgroup_realize_queue(Manager *m) { return n; } -static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { +void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { Unit *slice; + Unit *p = u; /* This adds the path from the specified unit to root slice to the queue and siblings at each level. + * The unit itself is excluded (assuming it's handled separately). * * Propagation of realization "side-ways" (i.e. towards siblings) is relevant on cgroup-v1 where * scheduling becomes very weird if two units that own processes reside in the same slice, but one is @@ -2326,7 +2328,7 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { * at all are always realized in *all* their hierarchies, and it is sufficient for a unit's sibling * to be realized for the unit itself to be realized too. */ - while ((slice = UNIT_DEREF(u->slice))) { + while ((slice = UNIT_DEREF(p->slice))) { Iterator i; Unit *m; void *v; @@ -2335,6 +2337,9 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { /* Skip units that have a dependency on the slice but aren't actually in it. */ if (UNIT_DEREF(m->slice) != slice) continue; + /* The origin must be handled separately by caller */ + if (m == u) + continue; /* No point in doing cgroup application for units without active processes. */ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m))) @@ -2355,11 +2360,12 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { unit_add_to_cgroup_realize_queue(m); } - u = slice; + p = slice; } /* Root slice comes last */ - unit_add_to_cgroup_realize_queue(u); + if (p != u) + unit_add_to_cgroup_realize_queue(p); } int unit_realize_cgroup(Unit *u) { diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 52d028e740..d6e170cdc6 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -212,7 +212,7 @@ CGroupMask unit_get_enable_mask(Unit *u); void unit_invalidate_cgroup_members_masks(Unit *u); -void unit_add_to_cgroup_realize_queue(Unit *u); +void unit_add_siblings_to_cgroup_realize_queue(Unit *u); const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask); char *unit_default_cgroup_path(const Unit *u); diff --git a/src/core/unit.c b/src/core/unit.c index 2c09def06f..c16e7562d8 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -620,14 +620,6 @@ void unit_free(Unit *u) { if (!u) return; - if (UNIT_ISSET(u->slice)) { - /* A unit is being dropped from the tree, make sure our parent slice recalculates the member mask */ - unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice)); - - /* And make sure the parent is realized again, updating cgroup memberships */ - unit_add_to_cgroup_realize_queue(UNIT_DEREF(u->slice)); - } - u->transient_file = safe_fclose(u->transient_file); if (!MANAGER_IS_RELOADING(u->manager)) @@ -669,6 +661,14 @@ void unit_free(Unit *u) { for (UnitDependency d = 0; d < _UNIT_DEPENDENCY_MAX; d++) bidi_set_free(u, u->dependencies[d]); + /* A unit is being dropped from the tree, make sure our siblings and ancestor slices are realized + * properly. Do this after we detach the unit from slice tree in order to eliminate its effect on + * controller masks. */ + if (UNIT_ISSET(u->slice)) { + unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice)); + unit_add_siblings_to_cgroup_realize_queue(u); + } + if (u->on_console) manager_unref_console(u->manager); From f23ba94db31feb0ede6524fdabd0efdb8344e90b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Mon, 1 Jun 2020 17:33:51 +0200 Subject: [PATCH 5/8] cgroup: Implicit unit_invalidate_cgroup_members_masks Merge members mask invalidation into unit_add_siblings_to_cgroup_realize_queue, this way unit_realize_cgroup needn't be called with members mask invalidation. We have to retain the members mask invalidation in unit_load -- although active units would have cgroups (re)realized (unit_load queues for realization), the realization would happen with potentially stale mask. --- src/core/cgroup.c | 5 +++++ src/core/dbus-mount.c | 1 - src/core/dbus-scope.c | 1 - src/core/dbus-service.c | 1 - src/core/dbus-slice.c | 1 - src/core/dbus-socket.c | 1 - src/core/dbus-swap.c | 1 - src/core/unit.c | 4 +--- 8 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 84512963a4..c159310748 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2319,6 +2319,8 @@ void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { /* This adds the path from the specified unit to root slice to the queue and siblings at each level. * The unit itself is excluded (assuming it's handled separately). + * The function must invalidate cgroup_members_mask of all ancestors in order to calculate up to date + * masks. * * Propagation of realization "side-ways" (i.e. towards siblings) is relevant on cgroup-v1 where * scheduling becomes very weird if two units that own processes reside in the same slice, but one is @@ -2333,6 +2335,9 @@ void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { Unit *m; void *v; + /* Children of slice likely changed when we're called */ + slice->cgroup_members_mask_valid = false; + HASHMAP_FOREACH_KEY(v, m, slice->dependencies[UNIT_BEFORE], i) { /* Skip units that have a dependency on the slice but aren't actually in it. */ if (UNIT_DEREF(m->slice) != slice) diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c index bab12cc4ff..a4fa44dbc9 100644 --- a/src/core/dbus-mount.c +++ b/src/core/dbus-mount.c @@ -149,7 +149,6 @@ int bus_mount_set_property( int bus_mount_commit_properties(Unit *u) { assert(u); - unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c index aecfda6535..d752cd58aa 100644 --- a/src/core/dbus-scope.c +++ b/src/core/dbus-scope.c @@ -190,7 +190,6 @@ int bus_scope_set_property( int bus_scope_commit_properties(Unit *u) { assert(u); - unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 3cc453dff5..09f88b385b 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -457,7 +457,6 @@ int bus_service_set_property( int bus_service_commit_properties(Unit *u) { assert(u); - unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-slice.c b/src/core/dbus-slice.c index effd5fa5d7..28a6a4fe5a 100644 --- a/src/core/dbus-slice.c +++ b/src/core/dbus-slice.c @@ -28,7 +28,6 @@ int bus_slice_set_property( int bus_slice_commit_properties(Unit *u) { assert(u); - unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c index f01489e29a..90a95c996d 100644 --- a/src/core/dbus-socket.c +++ b/src/core/dbus-socket.c @@ -469,7 +469,6 @@ int bus_socket_set_property( int bus_socket_commit_properties(Unit *u) { assert(u); - unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c index cb4824b6bd..d132c08f0e 100644 --- a/src/core/dbus-swap.c +++ b/src/core/dbus-swap.c @@ -70,7 +70,6 @@ int bus_swap_set_property( int bus_swap_commit_properties(Unit *u) { assert(u); - unit_invalidate_cgroup_members_masks(u); unit_realize_cgroup(u); return 0; diff --git a/src/core/unit.c b/src/core/unit.c index c16e7562d8..12465742fd 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -664,10 +664,8 @@ void unit_free(Unit *u) { /* A unit is being dropped from the tree, make sure our siblings and ancestor slices are realized * properly. Do this after we detach the unit from slice tree in order to eliminate its effect on * controller masks. */ - if (UNIT_ISSET(u->slice)) { - unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice)); + if (UNIT_ISSET(u->slice)) unit_add_siblings_to_cgroup_realize_queue(u); - } if (u->on_console) manager_unref_console(u->manager); From 4c591f3996dc8a3c06f95b13c044d97d94b4be4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Thu, 21 May 2020 13:24:43 +0200 Subject: [PATCH 6/8] cgroup: Introduce family queueing instead of siblings The unit_add_siblings_to_cgroup_realize_queue does more than mere siblings queueing, hence define a family of a unit as (immediate) children of the unit and immediate children of all ancestors. Working with this abstraction simplifies the queuing calls and it shouldn't change the functionality. --- src/core/cgroup.c | 73 +++++++++++++++++++++-------------------------- src/core/cgroup.h | 2 +- src/core/unit.c | 7 ++--- 3 files changed, 36 insertions(+), 46 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index c159310748..143809068a 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2313,37 +2313,34 @@ unsigned manager_dispatch_cgroup_realize_queue(Manager *m) { return n; } -void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { - Unit *slice; - Unit *p = u; +void unit_add_family_to_cgroup_realize_queue(Unit *u) { + assert(u); + assert(u->type == UNIT_SLICE); - /* This adds the path from the specified unit to root slice to the queue and siblings at each level. - * The unit itself is excluded (assuming it's handled separately). - * The function must invalidate cgroup_members_mask of all ancestors in order to calculate up to date - * masks. + /* Family of a unit for is defined as (immediate) children of the unit and immediate children of all + * its ancestors. * - * Propagation of realization "side-ways" (i.e. towards siblings) is relevant on cgroup-v1 where - * scheduling becomes very weird if two units that own processes reside in the same slice, but one is - * realized in the "cpu" hierarchy and one is not (for example because one has CPUWeight= set and the - * other does not), because that means individual processes need to be scheduled against whole - * cgroups. Let's avoid this asymmetry by always ensuring that units below a slice that are realized - * at all are always realized in *all* their hierarchies, and it is sufficient for a unit's sibling - * to be realized for the unit itself to be realized too. */ + * Ideally we would enqueue ancestor path only (bottom up). However, on cgroup-v1 scheduling becomes + * very weird if two units that own processes reside in the same slice, but one is realized in the + * "cpu" hierarchy and one is not (for example because one has CPUWeight= set and the other does + * not), because that means individual processes need to be scheduled against whole cgroups. Let's + * avoid this asymmetry by always ensuring that siblings of a unit are always realized in their v1 + * controller hierarchies too (if unit requires the controller to be realized). + * + * The function must invalidate cgroup_members_mask of all ancestors in order to calculate up to date + * masks. */ - while ((slice = UNIT_DEREF(p->slice))) { + do { Iterator i; Unit *m; void *v; - /* Children of slice likely changed when we're called */ - slice->cgroup_members_mask_valid = false; + /* Children of u likely changed when we're called */ + u->cgroup_members_mask_valid = false; - HASHMAP_FOREACH_KEY(v, m, slice->dependencies[UNIT_BEFORE], i) { + HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) { /* Skip units that have a dependency on the slice but aren't actually in it. */ - if (UNIT_DEREF(m->slice) != slice) - continue; - /* The origin must be handled separately by caller */ - if (m == u) + if (UNIT_DEREF(m->slice) != u) continue; /* No point in doing cgroup application for units without active processes. */ @@ -2365,12 +2362,9 @@ void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { unit_add_to_cgroup_realize_queue(m); } - p = slice; - } - - /* Root slice comes last */ - if (p != u) - unit_add_to_cgroup_realize_queue(p); + /* Parent comes after children */ + unit_add_to_cgroup_realize_queue(u); + } while ((u = UNIT_DEREF(u->slice))); } int unit_realize_cgroup(Unit *u) { @@ -2379,20 +2373,17 @@ int unit_realize_cgroup(Unit *u) { if (!UNIT_HAS_CGROUP_CONTEXT(u)) return 0; - /* So, here's the deal: when realizing the cgroups for this - * unit, we need to first create all parents, but there's more - * actually: for the weight-based controllers we also need to - * make sure that all our siblings (i.e. units that are in the - * same slice as we are) have cgroups, too. Otherwise, things - * would become very uneven as each of their processes would - * get as much resources as all our group together. This call - * will synchronously create the parent cgroups, but will - * defer work on the siblings to the next event loop - * iteration. When removing a realized controller, it may become unnecessary in ancestors, - * so we also ensure deferred bottom up (de)realization of ancestors. - */ + /* So, here's the deal: when realizing the cgroups for this unit, we need to first create all + * parents, but there's more actually: for the weight-based controllers we also need to make sure + * that all our siblings (i.e. units that are in the same slice as we are) have cgroups, too. On the + * other hand, when a controller is removed from realized set, it may become unnecessary in siblings + * and ancestors and they should be (de)realized too. + * + * This call will defer work on the siblings and derealized ancestors to the next event loop + * iteration and synchronously creates the parent cgroups (unit_realize_cgroup_now). */ - unit_add_siblings_to_cgroup_realize_queue(u); + if (UNIT_ISSET(u->slice)) + unit_add_family_to_cgroup_realize_queue(UNIT_DEREF(u->slice)); /* And realize this one now (and apply the values) */ return unit_realize_cgroup_now(u, manager_state(u->manager)); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index d6e170cdc6..be9632d8b6 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -212,7 +212,7 @@ CGroupMask unit_get_enable_mask(Unit *u); void unit_invalidate_cgroup_members_masks(Unit *u); -void unit_add_siblings_to_cgroup_realize_queue(Unit *u); +void unit_add_family_to_cgroup_realize_queue(Unit *u); const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask); char *unit_default_cgroup_path(const Unit *u); diff --git a/src/core/unit.c b/src/core/unit.c index 12465742fd..5d6854b91a 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -661,11 +661,10 @@ void unit_free(Unit *u) { for (UnitDependency d = 0; d < _UNIT_DEPENDENCY_MAX; d++) bidi_set_free(u, u->dependencies[d]); - /* A unit is being dropped from the tree, make sure our siblings and ancestor slices are realized - * properly. Do this after we detach the unit from slice tree in order to eliminate its effect on - * controller masks. */ + /* A unit is being dropped from the tree, make sure our family is realized properly. Do this after we + * detach the unit from slice tree in order to eliminate its effect on controller masks. */ if (UNIT_ISSET(u->slice)) - unit_add_siblings_to_cgroup_realize_queue(u); + unit_add_family_to_cgroup_realize_queue(UNIT_DEREF(u->slice)); if (u->on_console) manager_unref_console(u->manager); From 12b975e06587e7e3c9c60fef74856574eba5c96f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 1 May 2020 14:00:42 +0200 Subject: [PATCH 7/8] cgroup: Reduce unit_get_ancestor_disable_mask use The usage in unit_get_own_mask is redundant, we only need apply disable_mask at the end befor application, i.e. calculating enable or target mask. (IOW, we allow all configurations, but disabling affects effective controls.) Modify tests accordingly and add testing of enable mask. This is intended as cleanup, with no effect but changing unit_dump output. --- src/core/cgroup.c | 2 +- src/test/test-cgroup-mask.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 143809068a..860f36052b 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1469,7 +1469,7 @@ CGroupMask unit_get_own_mask(Unit *u) { if (!c) return 0; - return (unit_get_cgroup_mask(u) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u)) & ~unit_get_ancestor_disable_mask(u); + return unit_get_cgroup_mask(u) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u); } CGroupMask unit_get_delegate_mask(Unit *u) { diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c index daafba4ef6..27bfcdc17f 100644 --- a/src/test/test-cgroup-mask.c +++ b/src/test/test-cgroup-mask.c @@ -85,7 +85,7 @@ static int test_cgroup_mask(void) { ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent_deep), CGROUP_MASK_MEMORY); ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(nomem_parent), 0); - ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); + ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(root), 0); /* Verify aggregation of member masks */ @@ -94,7 +94,7 @@ static int test_cgroup_mask(void) { ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(grandchild), 0); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent_deep), 0); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY)); - ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(nomem_parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); + ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(nomem_parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(nomem_leaf), 0); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); @@ -105,7 +105,7 @@ static int test_cgroup_mask(void) { ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent_deep), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(nomem_parent), (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); - ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); + ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); /* Verify aggregation of target masks. */ @@ -118,6 +118,16 @@ static int test_cgroup_mask(void) { ASSERT_CGROUP_MASK(unit_get_target_mask(nomem_leaf), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_IO | CGROUP_MASK_BLKIO) & m->cgroup_supported)); ASSERT_CGROUP_MASK(unit_get_target_mask(root), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + /* Verify aggregation of enable masks. */ + ASSERT_CGROUP_MASK(unit_get_enable_mask(son), 0); + ASSERT_CGROUP_MASK(unit_get_enable_mask(daughter), 0); + ASSERT_CGROUP_MASK(unit_get_enable_mask(grandchild), 0); + ASSERT_CGROUP_MASK(unit_get_enable_mask(parent_deep), 0); + ASSERT_CGROUP_MASK(unit_get_enable_mask(parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + ASSERT_CGROUP_MASK(unit_get_enable_mask(nomem_parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_IO | CGROUP_MASK_BLKIO) & m->cgroup_supported)); + ASSERT_CGROUP_MASK(unit_get_enable_mask(nomem_leaf), 0); + ASSERT_CGROUP_MASK(unit_get_enable_mask(root), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + return 0; } From d9ef594454d41adf28b2cbc900cb7c781ce40160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 1 May 2020 14:00:42 +0200 Subject: [PATCH 8/8] cgroup: Cleanup function usage Some masks shouldn't be needed externally, so keep their functions in the module (others would fit there too but they're used in tests) to think twice if something would depend on them. Drop unused function cg_attach_many_everywhere. Use cgroup_realized instead of cgroup_path when we actually ask for realized. This should not cause any functional changes. --- src/core/cgroup.c | 20 ++++++++++---------- src/core/cgroup.h | 2 -- src/shared/cgroup-setup.c | 17 ----------------- src/shared/cgroup-setup.h | 1 - 4 files changed, 10 insertions(+), 30 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 860f36052b..a6a9e0c7d4 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1495,6 +1495,14 @@ CGroupMask unit_get_delegate_mask(Unit *u) { return CGROUP_MASK_EXTEND_JOINED(c->delegate_controllers); } +static CGroupMask unit_get_subtree_mask(Unit *u) { + + /* Returns the mask of this subtree, meaning of the group + * itself and its children. */ + + return unit_get_own_mask(u) | unit_get_members_mask(u); +} + CGroupMask unit_get_members_mask(Unit *u) { assert(u); @@ -1532,7 +1540,7 @@ CGroupMask unit_get_siblings_mask(Unit *u) { return unit_get_subtree_mask(u); /* we are the top-level slice */ } -CGroupMask unit_get_disable_mask(Unit *u) { +static CGroupMask unit_get_disable_mask(Unit *u) { CGroupContext *c; c = unit_get_cgroup_context(u); @@ -1557,14 +1565,6 @@ CGroupMask unit_get_ancestor_disable_mask(Unit *u) { return mask; } -CGroupMask unit_get_subtree_mask(Unit *u) { - - /* Returns the mask of this subtree, meaning of the group - * itself and its children. */ - - return unit_get_own_mask(u) | unit_get_members_mask(u); -} - CGroupMask unit_get_target_mask(Unit *u) { CGroupMask mask; @@ -2177,7 +2177,7 @@ static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) { /* The cgroup for this unit might not actually be fully * realised yet, in which case it isn't holding any controllers * open anyway. */ - if (!m->cgroup_path) + if (!m->cgroup_realized) continue; /* We must disable those below us first in order to release the diff --git a/src/core/cgroup.h b/src/core/cgroup.h index be9632d8b6..9ac5c8bfc0 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -203,8 +203,6 @@ CGroupMask unit_get_own_mask(Unit *u); CGroupMask unit_get_delegate_mask(Unit *u); CGroupMask unit_get_members_mask(Unit *u); CGroupMask unit_get_siblings_mask(Unit *u); -CGroupMask unit_get_subtree_mask(Unit *u); -CGroupMask unit_get_disable_mask(Unit *u); CGroupMask unit_get_ancestor_disable_mask(Unit *u); CGroupMask unit_get_target_mask(Unit *u); diff --git a/src/shared/cgroup-setup.c b/src/shared/cgroup-setup.c index cd2ba6149e..856e776296 100644 --- a/src/shared/cgroup-setup.c +++ b/src/shared/cgroup-setup.c @@ -655,23 +655,6 @@ int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_m return 0; } -int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) { - Iterator i; - void *pidp; - int r = 0; - - SET_FOREACH(pidp, pids, i) { - pid_t pid = PTR_TO_PID(pidp); - int q; - - q = cg_attach_everywhere(supported, path, pid, path_callback, userdata); - if (q < 0 && r >= 0) - r = q; - } - - return r; -} - int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) { CGroupController c; CGroupMask done; diff --git a/src/shared/cgroup-setup.h b/src/shared/cgroup-setup.h index 72c2b6ee6a..43ce32e30d 100644 --- a/src/shared/cgroup-setup.h +++ b/src/shared/cgroup-setup.h @@ -28,7 +28,6 @@ int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const ch int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path); int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); -int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata); int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root);