From fdb3decaa7ca791906f87dafc9d641c4cee4ec26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 1 Aug 2019 13:14:45 +0200 Subject: [PATCH] util-lib: move some functions from basic/cgroup-util to shared/cgroup-setup This way less stuff needs to be in basic. Initially, I wanted to move all the parts of cgroup-utils.[ch] that depend on efivars.[ch] to shared, because efivars.[ch] is in shared/. Later on, I decide to split efivars.[ch], so the move done in this patch is not necessary anymore. Nevertheless, it is still valid on its own. If at some point we want to expose libbasic, it is better to to not have stuff that belong in libshared there. --- src/basic/cgroup-util.c | 846 ---------------------------------- src/basic/cgroup-util.h | 26 -- src/core/cgroup.c | 1 + src/core/execute.c | 1 + src/core/load-fragment.c | 2 +- src/core/mount-setup.c | 1 + src/core/unit.c | 1 + src/login/pam_systemd.c | 2 +- src/nspawn/nspawn-cgroup.c | 1 + src/shared/bus-unit-util.c | 1 + src/shared/cgroup-setup.c | 860 +++++++++++++++++++++++++++++++++++ src/shared/cgroup-setup.h | 34 ++ src/shared/meson.build | 2 + src/shutdown/shutdown.c | 1 + src/test/meson.build | 4 + src/test/test-cgroup-setup.c | 67 +++ src/test/test-cgroup-util.c | 52 --- src/test/test-cgroup.c | 1 + src/test/test-helper.c | 2 +- 19 files changed, 978 insertions(+), 927 deletions(-) create mode 100644 src/shared/cgroup-setup.c create mode 100644 src/shared/cgroup-setup.h create mode 100644 src/test/test-cgroup-setup.c diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 2865cd518e..6582b92d67 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -31,7 +31,6 @@ #include "mkdir.h" #include "parse-util.h" #include "path-util.h" -#include "proc-cmdline.h" #include "process-util.h" #include "set.h" #include "special.h" @@ -410,173 +409,6 @@ int cg_kill_recursive( return ret; } -int cg_migrate( - const char *cfrom, - const char *pfrom, - const char *cto, - const char *pto, - CGroupFlags flags) { - - bool done = false; - _cleanup_set_free_ Set *s = NULL; - int r, ret = 0; - pid_t my_pid; - - assert(cfrom); - assert(pfrom); - assert(cto); - assert(pto); - - s = set_new(NULL); - if (!s) - return -ENOMEM; - - my_pid = getpid_cached(); - - do { - _cleanup_fclose_ FILE *f = NULL; - pid_t pid = 0; - done = true; - - r = cg_enumerate_processes(cfrom, pfrom, &f); - if (r < 0) { - if (ret >= 0 && r != -ENOENT) - return r; - - return ret; - } - - while ((r = cg_read_pid(f, &pid)) > 0) { - - /* This might do weird stuff if we aren't a - * single-threaded program. However, we - * luckily know we are not */ - if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) - continue; - - if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) - continue; - - /* Ignore kernel threads. Since they can only - * exist in the root cgroup, we only check for - * them there. */ - if (cfrom && - empty_or_root(pfrom) && - is_kernel_thread(pid) > 0) - continue; - - r = cg_attach(cto, pto, pid); - if (r < 0) { - if (ret >= 0 && r != -ESRCH) - ret = r; - } else if (ret == 0) - ret = 1; - - done = false; - - r = set_put(s, PID_TO_PTR(pid)); - if (r < 0) { - if (ret >= 0) - return r; - - return ret; - } - } - - if (r < 0) { - if (ret >= 0) - return r; - - return ret; - } - } while (!done); - - return ret; -} - -int cg_migrate_recursive( - const char *cfrom, - const char *pfrom, - const char *cto, - const char *pto, - CGroupFlags flags) { - - _cleanup_closedir_ DIR *d = NULL; - int r, ret = 0; - char *fn; - - assert(cfrom); - assert(pfrom); - assert(cto); - assert(pto); - - ret = cg_migrate(cfrom, pfrom, cto, pto, flags); - - r = cg_enumerate_subgroups(cfrom, pfrom, &d); - if (r < 0) { - if (ret >= 0 && r != -ENOENT) - return r; - - return ret; - } - - while ((r = cg_read_subgroup(d, &fn)) > 0) { - _cleanup_free_ char *p = NULL; - - p = path_join(empty_to_root(pfrom), fn); - free(fn); - if (!p) - return -ENOMEM; - - r = cg_migrate_recursive(cfrom, p, cto, pto, flags); - if (r != 0 && ret >= 0) - ret = r; - } - - if (r < 0 && ret >= 0) - ret = r; - - if (flags & CGROUP_REMOVE) { - r = cg_rmdir(cfrom, pfrom); - if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY)) - return r; - } - - return ret; -} - -int cg_migrate_recursive_fallback( - const char *cfrom, - const char *pfrom, - const char *cto, - const char *pto, - CGroupFlags flags) { - - int r; - - assert(cfrom); - assert(pfrom); - assert(cto); - assert(pto); - - r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags); - if (r < 0) { - char prefix[strlen(pto) + 1]; - - /* This didn't work? Then let's try all prefixes of the destination */ - - PATH_FOREACH_PREFIX(prefix, pto) { - int q; - - q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags); - if (q >= 0) - return q; - } - } - - return r; -} - static const char *controller_to_dirname(const char *controller) { const char *e; @@ -742,253 +574,6 @@ int cg_get_path_and_check(const char *controller, const char *path, const char * return cg_get_path(controller, path, suffix, fs); } -static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { - assert(path); - assert(sb); - assert(ftwbuf); - - if (typeflag != FTW_DP) - return 0; - - if (ftwbuf->level < 1) - return 0; - - (void) rmdir(path); - return 0; -} - -int cg_trim(const char *controller, const char *path, bool delete_root) { - _cleanup_free_ char *fs = NULL; - int r = 0, q; - - assert(path); - - r = cg_get_path(controller, path, NULL, &fs); - if (r < 0) - return r; - - errno = 0; - if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) { - if (errno == ENOENT) - r = 0; - else - r = errno_or_else(EIO); - } - - if (delete_root) { - if (rmdir(fs) < 0 && errno != ENOENT) - return -errno; - } - - q = cg_hybrid_unified(); - if (q < 0) - return q; - if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root); - if (q < 0) - log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path); - } - - return r; -} - -/* Create a cgroup in the hierarchy of controller. - * Returns 0 if the group already existed, 1 on success, negative otherwise. - */ -int cg_create(const char *controller, const char *path) { - _cleanup_free_ char *fs = NULL; - int r; - - r = cg_get_path_and_check(controller, path, NULL, &fs); - if (r < 0) - return r; - - r = mkdir_parents(fs, 0755); - if (r < 0) - return r; - - r = mkdir_errno_wrapper(fs, 0755); - if (r == -EEXIST) - return 0; - if (r < 0) - return r; - - r = cg_hybrid_unified(); - if (r < 0) - return r; - - if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); - if (r < 0) - log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path); - } - - return 1; -} - -int cg_create_and_attach(const char *controller, const char *path, pid_t pid) { - int r, q; - - assert(pid >= 0); - - r = cg_create(controller, path); - if (r < 0) - return r; - - q = cg_attach(controller, path, pid); - if (q < 0) - return q; - - /* This does not remove the cgroup on failure */ - return r; -} - -int cg_attach(const char *controller, const char *path, pid_t pid) { - _cleanup_free_ char *fs = NULL; - char c[DECIMAL_STR_MAX(pid_t) + 2]; - int r; - - assert(path); - assert(pid >= 0); - - r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs); - if (r < 0) - return r; - - if (pid == 0) - pid = getpid_cached(); - - xsprintf(c, PID_FMT "\n", pid); - - r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER); - if (r < 0) - return r; - - r = cg_hybrid_unified(); - if (r < 0) - return r; - - if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid); - if (r < 0) - log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path); - } - - return 0; -} - -int cg_attach_fallback(const char *controller, const char *path, pid_t pid) { - int r; - - assert(controller); - assert(path); - assert(pid >= 0); - - r = cg_attach(controller, path, pid); - if (r < 0) { - char prefix[strlen(path) + 1]; - - /* This didn't work? Then let's try all prefixes of - * the destination */ - - PATH_FOREACH_PREFIX(prefix, path) { - int q; - - q = cg_attach(controller, prefix, pid); - if (q >= 0) - return q; - } - } - - return r; -} - -int cg_set_access( - const char *controller, - const char *path, - uid_t uid, - gid_t gid) { - - struct Attribute { - const char *name; - bool fatal; - }; - - /* cgroup v1, aka legacy/non-unified */ - static const struct Attribute legacy_attributes[] = { - { "cgroup.procs", true }, - { "tasks", false }, - { "cgroup.clone_children", false }, - {}, - }; - - /* cgroup v2, aka unified */ - static const struct Attribute unified_attributes[] = { - { "cgroup.procs", true }, - { "cgroup.subtree_control", true }, - { "cgroup.threads", false }, - {}, - }; - - static const struct Attribute* const attributes[] = { - [false] = legacy_attributes, - [true] = unified_attributes, - }; - - _cleanup_free_ char *fs = NULL; - const struct Attribute *i; - int r, unified; - - assert(path); - - if (uid == UID_INVALID && gid == GID_INVALID) - return 0; - - unified = cg_unified_controller(controller); - if (unified < 0) - return unified; - - /* Configure access to the cgroup itself */ - r = cg_get_path(controller, path, NULL, &fs); - if (r < 0) - return r; - - r = chmod_and_chown(fs, 0755, uid, gid); - if (r < 0) - return r; - - /* Configure access to the cgroup's attributes */ - for (i = attributes[unified]; i->name; i++) { - fs = mfree(fs); - - r = cg_get_path(controller, path, i->name, &fs); - if (r < 0) - return r; - - r = chmod_and_chown(fs, 0644, uid, gid); - if (r < 0) { - if (i->fatal) - return r; - - log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs); - } - } - - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - r = cg_hybrid_unified(); - if (r < 0) - return r; - if (r > 0) { - /* Always propagate access mode from unified to legacy controller */ - r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid); - if (r < 0) - log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path); - } - } - - return 0; -} - int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) { _cleanup_free_ char *fs = NULL; int r; @@ -2143,194 +1728,6 @@ fail: done: memcpy(ret_values, v, sizeof(char*) * n); return 0; - -} - -int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { - CGroupController c; - CGroupMask done; - bool created; - int r; - - /* This one will create a cgroup in our private tree, but also - * duplicate it in the trees specified in mask, and remove it - * in all others. - * - * Returns 0 if the group already existed in the systemd hierarchy, - * 1 on success, negative otherwise. - */ - - /* First create the cgroup in our own hierarchy. */ - r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path); - if (r < 0) - return r; - created = r; - - /* If we are in the unified hierarchy, we are done now */ - r = cg_all_unified(); - if (r < 0) - return r; - if (r > 0) - return created; - - supported &= CGROUP_MASK_V1; - mask = CGROUP_MASK_EXTEND_JOINED(mask); - done = 0; - - /* Otherwise, do the same in the other hierarchies */ - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - const char *n; - - if (!FLAGS_SET(supported, bit)) - continue; - - if (FLAGS_SET(done, bit)) - continue; - - n = cgroup_controller_to_string(c); - if (FLAGS_SET(mask, bit)) - (void) cg_create(n, path); - else - (void) cg_trim(n, path, true); - - done |= CGROUP_MASK_EXTEND_JOINED(bit); - } - - return created; -} - -int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { - CGroupController c; - CGroupMask done; - int r; - - r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); - if (r < 0) - return r; - - r = cg_all_unified(); - if (r < 0) - return r; - if (r > 0) - return 0; - - supported &= CGROUP_MASK_V1; - done = 0; - - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - const char *p = NULL; - - if (!FLAGS_SET(supported, bit)) - continue; - - if (FLAGS_SET(done, bit)) - continue; - - if (path_callback) - p = path_callback(bit, userdata); - if (!p) - p = path; - - (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid); - done |= CGROUP_MASK_EXTEND_JOINED(bit); - } - - return 0; -} - -int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) { - Iterator i; - void *pidp; - int r = 0; - - SET_FOREACH(pidp, pids, i) { - pid_t pid = PTR_TO_PID(pidp); - int q; - - q = cg_attach_everywhere(supported, path, pid, path_callback, userdata); - if (q < 0 && r >= 0) - r = q; - } - - return r; -} - -int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { - CGroupController c; - CGroupMask done; - int r = 0, q; - - if (!path_equal(from, to)) { - r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE); - if (r < 0) - return r; - } - - q = cg_all_unified(); - if (q < 0) - return q; - if (q > 0) - return r; - - supported &= CGROUP_MASK_V1; - done = 0; - - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - const char *p = NULL; - - if (!FLAGS_SET(supported, bit)) - continue; - - if (FLAGS_SET(done, bit)) - continue; - - if (to_callback) - p = to_callback(bit, userdata); - if (!p) - p = to; - - (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0); - done |= CGROUP_MASK_EXTEND_JOINED(bit); - } - - return r; -} - -int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { - CGroupController c; - CGroupMask done; - int r, q; - - r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); - if (r < 0) - return r; - - q = cg_all_unified(); - if (q < 0) - return q; - if (q > 0) - return r; - - supported &= CGROUP_MASK_V1; - done = 0; - - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - - if (!FLAGS_SET(supported, bit)) - continue; - - if (FLAGS_SET(done, bit)) - continue; - - (void) cg_trim(cgroup_controller_to_string(c), path, delete_root); - done |= CGROUP_MASK_EXTEND_JOINED(bit); - } - - return r; } int cg_mask_to_string(CGroupMask mask, char **ret) { @@ -2626,209 +2023,6 @@ int cg_hybrid_unified(void) { return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232; } -int cg_enable_everywhere( - CGroupMask supported, - CGroupMask mask, - const char *p, - CGroupMask *ret_result_mask) { - - _cleanup_fclose_ FILE *f = NULL; - _cleanup_free_ char *fs = NULL; - CGroupController c; - CGroupMask ret = 0; - int r; - - assert(p); - - if (supported == 0) { - if (ret_result_mask) - *ret_result_mask = 0; - return 0; - } - - r = cg_all_unified(); - if (r < 0) - return r; - if (r == 0) { - /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim - * complete success right away. (If you wonder why we return the full mask here, rather than zero: the - * caller tends to use the returned mask later on to compare if all controllers where properly joined, - * and if not requeues realization. This use is the primary purpose of the return value, hence let's - * minimize surprises here and reduce triggers for re-realization by always saying we fully - * succeeded.) */ - if (ret_result_mask) - *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with - * CGROUP_MASK_V2: The 'supported' mask - * might contain pure-V1 or BPF - * controllers, and we never want to - * claim that we could enable those with - * cgroup.subtree_control */ - return 0; - } - - r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); - if (r < 0) - return r; - - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - const char *n; - - if (!FLAGS_SET(CGROUP_MASK_V2, bit)) - continue; - - if (!FLAGS_SET(supported, bit)) - continue; - - n = cgroup_controller_to_string(c); - { - char s[1 + strlen(n) + 1]; - - s[0] = FLAGS_SET(mask, bit) ? '+' : '-'; - strcpy(s + 1, n); - - if (!f) { - f = fopen(fs, "we"); - if (!f) - return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p); - } - - r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER); - if (r < 0) { - log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m", - FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs); - clearerr(f); - - /* If we can't turn off a controller, leave it on in the reported resulting mask. This - * happens for example when we attempt to turn off a controller up in the tree that is - * used down in the tree. */ - if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY - * only here, and not follow the same logic - * for other errors such as EINVAL or - * EOPNOTSUPP or anything else. That's - * because EBUSY indicates that the - * controllers is currently enabled and - * cannot be disabled because something down - * the hierarchy is still using it. Any other - * error most likely means something like "I - * never heard of this controller" or - * similar. In the former case it's hence - * safe to assume the controller is still on - * after the failed operation, while in the - * latter case it's safer to assume the - * controller is unknown and hence certainly - * not enabled. */ - ret |= bit; - } else { - /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */ - if (FLAGS_SET(mask, bit)) - ret |= bit; - } - } - } - - /* Let's return the precise set of controllers now enabled for the cgroup. */ - if (ret_result_mask) - *ret_result_mask = ret; - - return 0; -} - -bool cg_is_unified_wanted(void) { - static thread_local int wanted = -1; - bool b; - const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL; - _cleanup_free_ char *c = NULL; - int r; - - /* If we have a cached value, return that. */ - if (wanted >= 0) - return wanted; - - /* If the hierarchy is already mounted, then follow whatever - * was chosen for it. */ - r = cg_unified_cached(true); - if (r >= 0) - return (wanted = r >= CGROUP_UNIFIED_ALL); - - /* If we were explicitly passed systemd.unified_cgroup_hierarchy, - * respect that. */ - r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b); - if (r > 0) - return (wanted = b); - - /* If we passed cgroup_no_v1=all with no other instructions, it seems - * highly unlikely that we want to use hybrid or legacy hierarchy. */ - r = proc_cmdline_get_key("cgroup_no_v1", 0, &c); - if (r > 0 && streq_ptr(c, "all")) - return (wanted = true); - - return (wanted = is_default); -} - -bool cg_is_legacy_wanted(void) { - static thread_local int wanted = -1; - - /* If we have a cached value, return that. */ - if (wanted >= 0) - return wanted; - - /* Check if we have cgroup v2 already mounted. */ - if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) - return (wanted = false); - - /* Otherwise, assume that at least partial legacy is wanted, - * since cgroup v2 should already be mounted at this point. */ - return (wanted = true); -} - -bool cg_is_hybrid_wanted(void) { - static thread_local int wanted = -1; - int r; - bool b; - const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD; - /* We default to true if the default is "hybrid", obviously, - * but also when the default is "unified", because if we get - * called, it means that unified hierarchy was not mounted. */ - - /* If we have a cached value, return that. */ - if (wanted >= 0) - return wanted; - - /* If the hierarchy is already mounted, then follow whatever - * was chosen for it. */ - if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) - return (wanted = false); - - /* Otherwise, let's see what the kernel command line has to say. - * Since checking is expensive, cache a non-error result. */ - r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b); - - /* The meaning of the kernel option is reversed wrt. to the return value - * of this function, hence the negation. */ - return (wanted = r > 0 ? !b : is_default); -} - -int cg_weight_parse(const char *s, uint64_t *ret) { - uint64_t u; - int r; - - if (isempty(s)) { - *ret = CGROUP_WEIGHT_INVALID; - return 0; - } - - r = safe_atou64(s, &u); - if (r < 0) - return r; - - if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX) - return -ERANGE; - - *ret = u; - return 0; -} - const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = { [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX, [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX, @@ -2845,46 +2039,6 @@ static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType); -int cg_cpu_shares_parse(const char *s, uint64_t *ret) { - uint64_t u; - int r; - - if (isempty(s)) { - *ret = CGROUP_CPU_SHARES_INVALID; - return 0; - } - - r = safe_atou64(s, &u); - if (r < 0) - return r; - - if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX) - return -ERANGE; - - *ret = u; - return 0; -} - -int cg_blkio_weight_parse(const char *s, uint64_t *ret) { - uint64_t u; - int r; - - if (isempty(s)) { - *ret = CGROUP_BLKIO_WEIGHT_INVALID; - return 0; - } - - r = safe_atou64(s, &u); - if (r < 0) - return r; - - if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX) - return -ERANGE; - - *ret = u; - return 0; -} - bool is_cgroup_fs(const struct statfs *s) { return is_fs_type(s, CGROUP_SUPER_MAGIC) || is_fs_type(s, CGROUP2_SUPER_MAGIC); diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index ba8df8139d..1472265204 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -172,10 +172,6 @@ typedef int (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata); int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); -int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); -int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); -int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); - int cg_split_spec(const char *spec, char **controller, char **path); int cg_mangle_path(const char *path, char **result); @@ -184,15 +180,8 @@ int cg_get_path_and_check(const char *controller, const char *path, const char * int cg_pid_get_path(const char *controller, pid_t pid, char **path); -int cg_trim(const char *controller, const char *path, bool delete_root); - int cg_rmdir(const char *controller, const char *path); -int cg_create(const char *controller, const char *path); -int cg_attach(const char *controller, const char *path, pid_t pid); -int cg_attach_fallback(const char *controller, const char *path, pid_t pid); -int cg_create_and_attach(const char *controller, const char *path, pid_t pid); - int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value); int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret); int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values); @@ -240,13 +229,6 @@ int cg_slice_to_path(const char *unit, char **ret); typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata); -int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path); -int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); -int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); -int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata); -int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); -int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask); - int cg_mask_supported(CGroupMask *ret); int cg_mask_from_string(const char *s, CGroupMask *ret); int cg_mask_to_string(CGroupMask mask, char **ret); @@ -263,16 +245,8 @@ static inline int cg_unified(void) { return cg_unified_cached(true); } -bool cg_is_unified_wanted(void); -bool cg_is_legacy_wanted(void); -bool cg_is_hybrid_wanted(void); - const char* cgroup_controller_to_string(CGroupController c) _const_; CGroupController cgroup_controller_from_string(const char *s) _pure_; -int cg_weight_parse(const char *s, uint64_t *ret); -int cg_cpu_shares_parse(const char *s, uint64_t *ret); -int cg_blkio_weight_parse(const char *s, uint64_t *ret); - bool is_cgroup_fs(const struct statfs *s); bool fd_is_cgroup_fs(int fd); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 7b0a41fbc8..00690920cd 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -11,6 +11,7 @@ #include "bpf-firewall.h" #include "btrfs-util.h" #include "bus-error.h" +#include "cgroup-setup.h" #include "cgroup-util.h" #include "cgroup.h" #include "fd-util.h" diff --git a/src/core/execute.c b/src/core/execute.c index 4c90007778..f769d02be4 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -48,6 +48,7 @@ #include "cap-list.h" #include "capability-util.h" #include "chown-recursive.h" +#include "cgroup-setup.h" #include "cpu-set-util.h" #include "def.h" #include "env-file.h" diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index a7951a5757..e517c8dfbf 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -24,7 +24,7 @@ #include "bus-util.h" #include "cap-list.h" #include "capability-util.h" -#include "cgroup.h" +#include "cgroup-setup.h" #include "conf-parser.h" #include "cpu-set-util.h" #include "env-util.h" diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 72cf5a2caa..790f1e234e 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -11,6 +11,7 @@ #include "bus-util.h" #include "cgroup-util.h" #include "conf-files.h" +#include "cgroup-setup.h" #include "dev-setup.h" #include "efivars.h" #include "fd-util.h" diff --git a/src/core/unit.c b/src/core/unit.c index 52a1acafab..004d787d8f 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -15,6 +15,7 @@ #include "bpf-firewall.h" #include "bus-common-errors.h" #include "bus-util.h" +#include "cgroup-setup.h" #include "cgroup-util.h" #include "dbus-unit.h" #include "dbus.h" diff --git a/src/login/pam_systemd.c b/src/login/pam_systemd.c index 3f762cbbc3..766d651c3f 100644 --- a/src/login/pam_systemd.c +++ b/src/login/pam_systemd.c @@ -20,7 +20,7 @@ #include "bus-error.h" #include "bus-internal.h" #include "bus-util.h" -#include "cgroup-util.h" +#include "cgroup-setup.h" #include "errno-util.h" #include "fd-util.h" #include "fileio.h" diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 0462b46413..f5048d9473 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -3,6 +3,7 @@ #include #include "alloc-util.h" +#include "cgroup-setup.h" #include "fd-util.h" #include "fileio.h" #include "format-util.h" diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index e53b9d5ea2..286ef89878 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -5,6 +5,7 @@ #include "bus-unit-util.h" #include "bus-util.h" #include "cap-list.h" +#include "cgroup-setup.h" #include "cgroup-util.h" #include "condition.h" #include "cpu-set-util.h" diff --git a/src/shared/cgroup-setup.c b/src/shared/cgroup-setup.c new file mode 100644 index 0000000000..ddcd156801 --- /dev/null +++ b/src/shared/cgroup-setup.c @@ -0,0 +1,860 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include +#include + +#include "cgroup-setup.h" +#include "cgroup-util.h" +#include "errno-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "proc-cmdline.h" +#include "stdio-util.h" +#include "string-util.h" +#include "fs-util.h" +#include "mkdir.h" +#include "process-util.h" +#include "fileio.h" +#include "user-util.h" +#include "fd-util.h" + +bool cg_is_unified_wanted(void) { + static thread_local int wanted = -1; + bool b; + const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL; + _cleanup_free_ char *c = NULL; + int r; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* If the hierarchy is already mounted, then follow whatever was chosen for it. */ + r = cg_unified_cached(true); + if (r >= 0) + return (wanted = r >= CGROUP_UNIFIED_ALL); + + /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */ + r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b); + if (r > 0) + return (wanted = b); + + /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to + * use hybrid or legacy hierarchy. */ + r = proc_cmdline_get_key("cgroup_no_v1", 0, &c); + if (r > 0 && streq_ptr(c, "all")) + return (wanted = true); + + return (wanted = is_default); +} + +bool cg_is_legacy_wanted(void) { + static thread_local int wanted = -1; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* Check if we have cgroup v2 already mounted. */ + if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) + return (wanted = false); + + /* Otherwise, assume that at least partial legacy is wanted, + * since cgroup v2 should already be mounted at this point. */ + return (wanted = true); +} + +bool cg_is_hybrid_wanted(void) { + static thread_local int wanted = -1; + int r; + bool b; + const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD; + /* We default to true if the default is "hybrid", obviously, but also when the default is "unified", + * because if we get called, it means that unified hierarchy was not mounted. */ + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* If the hierarchy is already mounted, then follow whatever was chosen for it. */ + if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) + return (wanted = false); + + /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache + * a non-error result. */ + r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b); + + /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the + * negation. */ + return (wanted = r > 0 ? !b : is_default); +} + +int cg_weight_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_WEIGHT_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +int cg_cpu_shares_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_CPU_SHARES_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +int cg_blkio_weight_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_BLKIO_WEIGHT_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + + +static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { + assert(path); + assert(sb); + assert(ftwbuf); + + if (typeflag != FTW_DP) + return 0; + + if (ftwbuf->level < 1) + return 0; + + (void) rmdir(path); + return 0; +} + +int cg_trim(const char *controller, const char *path, bool delete_root) { + _cleanup_free_ char *fs = NULL; + int r = 0, q; + + assert(path); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + errno = 0; + if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) { + if (errno == ENOENT) + r = 0; + else + r = errno_or_else(EIO); + } + + if (delete_root) { + if (rmdir(fs) < 0 && errno != ENOENT) + return -errno; + } + + q = cg_hybrid_unified(); + if (q < 0) + return q; + if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root); + if (q < 0) + log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path); + } + + return r; +} + +/* Create a cgroup in the hierarchy of controller. + * Returns 0 if the group already existed, 1 on success, negative otherwise. + */ +int cg_create(const char *controller, const char *path) { + _cleanup_free_ char *fs = NULL; + int r; + + r = cg_get_path_and_check(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = mkdir_parents(fs, 0755); + if (r < 0) + return r; + + r = mkdir_errno_wrapper(fs, 0755); + if (r == -EEXIST) + return 0; + if (r < 0) + return r; + + r = cg_hybrid_unified(); + if (r < 0) + return r; + + if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path); + } + + return 1; +} + +int cg_create_and_attach(const char *controller, const char *path, pid_t pid) { + int r, q; + + assert(pid >= 0); + + r = cg_create(controller, path); + if (r < 0) + return r; + + q = cg_attach(controller, path, pid); + if (q < 0) + return q; + + /* This does not remove the cgroup on failure */ + return r; +} + +int cg_attach(const char *controller, const char *path, pid_t pid) { + _cleanup_free_ char *fs = NULL; + char c[DECIMAL_STR_MAX(pid_t) + 2]; + int r; + + assert(path); + assert(pid >= 0); + + r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs); + if (r < 0) + return r; + + if (pid == 0) + pid = getpid_cached(); + + xsprintf(c, PID_FMT "\n", pid); + + r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + r = cg_hybrid_unified(); + if (r < 0) + return r; + + if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid); + if (r < 0) + log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path); + } + + return 0; +} + +int cg_attach_fallback(const char *controller, const char *path, pid_t pid) { + int r; + + assert(controller); + assert(path); + assert(pid >= 0); + + r = cg_attach(controller, path, pid); + if (r < 0) { + char prefix[strlen(path) + 1]; + + /* This didn't work? Then let's try all prefixes of + * the destination */ + + PATH_FOREACH_PREFIX(prefix, path) { + int q; + + q = cg_attach(controller, prefix, pid); + if (q >= 0) + return q; + } + } + + return r; +} + +int cg_set_access( + const char *controller, + const char *path, + uid_t uid, + gid_t gid) { + + struct Attribute { + const char *name; + bool fatal; + }; + + /* cgroup v1, aka legacy/non-unified */ + static const struct Attribute legacy_attributes[] = { + { "cgroup.procs", true }, + { "tasks", false }, + { "cgroup.clone_children", false }, + {}, + }; + + /* cgroup v2, aka unified */ + static const struct Attribute unified_attributes[] = { + { "cgroup.procs", true }, + { "cgroup.subtree_control", true }, + { "cgroup.threads", false }, + {}, + }; + + static const struct Attribute* const attributes[] = { + [false] = legacy_attributes, + [true] = unified_attributes, + }; + + _cleanup_free_ char *fs = NULL; + const struct Attribute *i; + int r, unified; + + assert(path); + + if (uid == UID_INVALID && gid == GID_INVALID) + return 0; + + unified = cg_unified_controller(controller); + if (unified < 0) + return unified; + + /* Configure access to the cgroup itself */ + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = chmod_and_chown(fs, 0755, uid, gid); + if (r < 0) + return r; + + /* Configure access to the cgroup's attributes */ + for (i = attributes[unified]; i->name; i++) { + fs = mfree(fs); + + r = cg_get_path(controller, path, i->name, &fs); + if (r < 0) + return r; + + r = chmod_and_chown(fs, 0644, uid, gid); + if (r < 0) { + if (i->fatal) + return r; + + log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs); + } + } + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_hybrid_unified(); + if (r < 0) + return r; + if (r > 0) { + /* Always propagate access mode from unified to legacy controller */ + r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid); + if (r < 0) + log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path); + } + } + + return 0; +} + +int cg_migrate( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + bool done = false; + _cleanup_set_free_ Set *s = NULL; + int r, ret = 0; + pid_t my_pid; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + s = set_new(NULL); + if (!s) + return -ENOMEM; + + my_pid = getpid_cached(); + + do { + _cleanup_fclose_ FILE *f = NULL; + pid_t pid = 0; + done = true; + + r = cg_enumerate_processes(cfrom, pfrom, &f); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_pid(f, &pid)) > 0) { + + /* This might do weird stuff if we aren't a + * single-threaded program. However, we + * luckily know we are not */ + if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) + continue; + + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) + continue; + + /* Ignore kernel threads. Since they can only + * exist in the root cgroup, we only check for + * them there. */ + if (cfrom && + empty_or_root(pfrom) && + is_kernel_thread(pid) > 0) + continue; + + r = cg_attach(cto, pto, pid); + if (r < 0) { + if (ret >= 0 && r != -ESRCH) + ret = r; + } else if (ret == 0) + ret = 1; + + done = false; + + r = set_put(s, PID_TO_PTR(pid)); + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } + + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } while (!done); + + return ret; +} + +int cg_migrate_recursive( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + _cleanup_closedir_ DIR *d = NULL; + int r, ret = 0; + char *fn; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + ret = cg_migrate(cfrom, pfrom, cto, pto, flags); + + r = cg_enumerate_subgroups(cfrom, pfrom, &d); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + _cleanup_free_ char *p = NULL; + + p = path_join(empty_to_root(pfrom), fn); + free(fn); + if (!p) + return -ENOMEM; + + r = cg_migrate_recursive(cfrom, p, cto, pto, flags); + if (r != 0 && ret >= 0) + ret = r; + } + + if (r < 0 && ret >= 0) + ret = r; + + if (flags & CGROUP_REMOVE) { + r = cg_rmdir(cfrom, pfrom); + if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY)) + return r; + } + + return ret; +} + +int cg_migrate_recursive_fallback( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + int r; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags); + if (r < 0) { + char prefix[strlen(pto) + 1]; + + /* This didn't work? Then let's try all prefixes of the destination */ + + PATH_FOREACH_PREFIX(prefix, pto) { + int q; + + q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags); + if (q >= 0) + return q; + } + } + + return r; +} + +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { + CGroupController c; + CGroupMask done; + bool created; + int r; + + /* This one will create a cgroup in our private tree, but also + * duplicate it in the trees specified in mask, and remove it + * in all others. + * + * Returns 0 if the group already existed in the systemd hierarchy, + * 1 on success, negative otherwise. + */ + + /* First create the cgroup in our own hierarchy. */ + r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path); + if (r < 0) + return r; + created = r; + + /* If we are in the unified hierarchy, we are done now */ + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + return created; + + supported &= CGROUP_MASK_V1; + mask = CGROUP_MASK_EXTEND_JOINED(mask); + done = 0; + + /* Otherwise, do the same in the other hierarchies */ + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + n = cgroup_controller_to_string(c); + if (FLAGS_SET(mask, bit)) + (void) cg_create(n, path); + else + (void) cg_trim(n, path, true); + + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return created; +} + +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { + CGroupController c; + CGroupMask done; + int r; + + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); + if (r < 0) + return r; + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + return 0; + + supported &= CGROUP_MASK_V1; + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *p = NULL; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + if (path_callback) + p = path_callback(bit, userdata); + if (!p) + p = path; + + (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return 0; +} + +int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) { + Iterator i; + void *pidp; + int r = 0; + + SET_FOREACH(pidp, pids, i) { + pid_t pid = PTR_TO_PID(pidp); + int q; + + q = cg_attach_everywhere(supported, path, pid, path_callback, userdata); + if (q < 0 && r >= 0) + r = q; + } + + return r; +} + +int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { + CGroupController c; + CGroupMask done; + int r = 0, q; + + if (!path_equal(from, to)) { + r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE); + if (r < 0) + return r; + } + + q = cg_all_unified(); + if (q < 0) + return q; + if (q > 0) + return r; + + supported &= CGROUP_MASK_V1; + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *p = NULL; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + if (to_callback) + p = to_callback(bit, userdata); + if (!p) + p = to; + + (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return r; +} + +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { + CGroupController c; + CGroupMask done; + int r, q; + + r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); + if (r < 0) + return r; + + q = cg_all_unified(); + if (q < 0) + return q; + if (q > 0) + return r; + + supported &= CGROUP_MASK_V1; + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + (void) cg_trim(cgroup_controller_to_string(c), path, delete_root); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return r; +} + +int cg_enable_everywhere( + CGroupMask supported, + CGroupMask mask, + const char *p, + CGroupMask *ret_result_mask) { + + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *fs = NULL; + CGroupController c; + CGroupMask ret = 0; + int r; + + assert(p); + + if (supported == 0) { + if (ret_result_mask) + *ret_result_mask = 0; + return 0; + } + + r = cg_all_unified(); + if (r < 0) + return r; + if (r == 0) { + /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim + * complete success right away. (If you wonder why we return the full mask here, rather than zero: the + * caller tends to use the returned mask later on to compare if all controllers where properly joined, + * and if not requeues realization. This use is the primary purpose of the return value, hence let's + * minimize surprises here and reduce triggers for re-realization by always saying we fully + * succeeded.) */ + if (ret_result_mask) + *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with + * CGROUP_MASK_V2: The 'supported' mask + * might contain pure-V1 or BPF + * controllers, and we never want to + * claim that we could enable those with + * cgroup.subtree_control */ + return 0; + } + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); + if (r < 0) + return r; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(CGROUP_MASK_V2, bit)) + continue; + + if (!FLAGS_SET(supported, bit)) + continue; + + n = cgroup_controller_to_string(c); + { + char s[1 + strlen(n) + 1]; + + s[0] = FLAGS_SET(mask, bit) ? '+' : '-'; + strcpy(s + 1, n); + + if (!f) { + f = fopen(fs, "we"); + if (!f) + return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p); + } + + r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) { + log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m", + FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs); + clearerr(f); + + /* If we can't turn off a controller, leave it on in the reported resulting mask. This + * happens for example when we attempt to turn off a controller up in the tree that is + * used down in the tree. */ + if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY + * only here, and not follow the same logic + * for other errors such as EINVAL or + * EOPNOTSUPP or anything else. That's + * because EBUSY indicates that the + * controllers is currently enabled and + * cannot be disabled because something down + * the hierarchy is still using it. Any other + * error most likely means something like "I + * never heard of this controller" or + * similar. In the former case it's hence + * safe to assume the controller is still on + * after the failed operation, while in the + * latter case it's safer to assume the + * controller is unknown and hence certainly + * not enabled. */ + ret |= bit; + } else { + /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */ + if (FLAGS_SET(mask, bit)) + ret |= bit; + } + } + } + + /* Let's return the precise set of controllers now enabled for the cgroup. */ + if (ret_result_mask) + *ret_result_mask = ret; + + return 0; +} diff --git a/src/shared/cgroup-setup.h b/src/shared/cgroup-setup.h new file mode 100644 index 0000000000..6e9b6857d8 --- /dev/null +++ b/src/shared/cgroup-setup.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include +#include +#include + +#include "cgroup-util.h" + +bool cg_is_unified_wanted(void); +bool cg_is_legacy_wanted(void); +bool cg_is_hybrid_wanted(void); + +int cg_weight_parse(const char *s, uint64_t *ret); +int cg_cpu_shares_parse(const char *s, uint64_t *ret); +int cg_blkio_weight_parse(const char *s, uint64_t *ret); + +int cg_trim(const char *controller, const char *path, bool delete_root); + +int cg_create(const char *controller, const char *path); +int cg_attach(const char *controller, const char *path, pid_t pid); +int cg_attach_fallback(const char *controller, const char *path, pid_t pid); +int cg_create_and_attach(const char *controller, const char *path, pid_t pid); + +int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); + +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path); +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); +int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); +int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata); +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); +int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask); diff --git a/src/shared/meson.build b/src/shared/meson.build index e9005a30e3..63a3f88d50 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -33,6 +33,8 @@ shared_sources = files(''' bus-wait-for-units.h calendarspec.c calendarspec.h + cgroup-setup.c + cgroup-setup.h cgroup-show.c cgroup-show.h clean-ipc.c diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c index 0eb17989d0..08215fd3ee 100644 --- a/src/shutdown/shutdown.c +++ b/src/shutdown/shutdown.c @@ -17,6 +17,7 @@ #include "alloc-util.h" #include "async.h" +#include "cgroup-setup.h" #include "cgroup-util.h" #include "def.h" #include "exec-util.h" diff --git a/src/test/meson.build b/src/test/meson.build index a8c3e59098..5132145b41 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -615,6 +615,10 @@ tests += [ [], []], + [['src/test/test-cgroup-setup.c'], + [], + []], + [['src/test/test-env-file.c'], [], []], diff --git a/src/test/test-cgroup-setup.c b/src/test/test-cgroup-setup.c new file mode 100644 index 0000000000..330631a910 --- /dev/null +++ b/src/test/test-cgroup-setup.c @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "alloc-util.h" +#include "build.h" +#include "cgroup-setup.h" +#include "log.h" +#include "proc-cmdline.h" +#include "string-util.h" +#include "tests.h" + +static void test_is_wanted_print(bool header) { + _cleanup_free_ char *cmdline = NULL; + + log_info("-- %s --", __func__); + assert_se(proc_cmdline(&cmdline) >= 0); + log_info("cmdline: %s", cmdline); + if (header) { + log_info(_CGROUP_HIERARCHY_); + (void) system("findmnt -n /sys/fs/cgroup"); + } + + log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted())); + log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted())); + log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted())); + log_info(" "); +} + +static void test_is_wanted(void) { + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0 " + "systemd.legacy_systemd_cgroup_controller", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0 " + "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0); + test_is_wanted_print(false); + + /* cgroup_no_v1=all implies unified cgroup hierarchy, unless otherwise + * explicitly specified. */ + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "cgroup_no_v1=all", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "cgroup_no_v1=all " + "systemd.unified_cgroup_hierarchy=0", 1) >= 0); + test_is_wanted_print(false); +} + +int main(void) { + test_setup_logging(LOG_DEBUG); + + test_is_wanted_print(true); + test_is_wanted_print(false); /* run twice to test caching */ + test_is_wanted(); + + return 0; +} diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c index f45c5ff760..e4fbb9edce 100644 --- a/src/test/test-cgroup-util.c +++ b/src/test/test-cgroup-util.c @@ -333,55 +333,6 @@ static void test_fd_is_cgroup_fs(void) { fd = safe_close(fd); } -static void test_is_wanted_print(bool header) { - _cleanup_free_ char *cmdline = NULL; - - log_info("-- %s --", __func__); - assert_se(proc_cmdline(&cmdline) >= 0); - log_info("cmdline: %s", cmdline); - if (header) { - - log_info(_CGROUP_HIERARCHY_); - (void) system("findmnt -n /sys/fs/cgroup"); - } - - log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted())); - log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted())); - log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted())); - log_info(" "); -} - -static void test_is_wanted(void) { - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "systemd.unified_cgroup_hierarchy", 1) >= 0); - test_is_wanted_print(false); - - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "systemd.unified_cgroup_hierarchy=0", 1) >= 0); - test_is_wanted_print(false); - - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "systemd.unified_cgroup_hierarchy=0 " - "systemd.legacy_systemd_cgroup_controller", 1) >= 0); - test_is_wanted_print(false); - - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "systemd.unified_cgroup_hierarchy=0 " - "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0); - test_is_wanted_print(false); - - /* cgroup_no_v1=all implies unified cgroup hierarchy, unless otherwise - * explicitly specified. */ - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "cgroup_no_v1=all", 1) >= 0); - test_is_wanted_print(false); - - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "cgroup_no_v1=all " - "systemd.unified_cgroup_hierarchy=0", 1) >= 0); - test_is_wanted_print(false); -} - static void test_cg_tests(void) { int all, hybrid, systemd, r; @@ -477,9 +428,6 @@ int main(void) { TEST_REQ_RUNNING_SYSTEMD(test_mask_supported()); TEST_REQ_RUNNING_SYSTEMD(test_is_cgroup_fs()); TEST_REQ_RUNNING_SYSTEMD(test_fd_is_cgroup_fs()); - test_is_wanted_print(true); - test_is_wanted_print(false); /* run twice to test caching */ - test_is_wanted(); test_cg_tests(); test_cg_get_keyed_attribute(); diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c index 5cdfd2dc54..1891df0eb9 100644 --- a/src/test/test-cgroup.c +++ b/src/test/test-cgroup.c @@ -3,6 +3,7 @@ #include #include +#include "cgroup-setup.h" #include "cgroup-util.h" #include "path-util.h" #include "process-util.h" diff --git a/src/test/test-helper.c b/src/test/test-helper.c index 5b79d12f07..dc8c80a14b 100644 --- a/src/test/test-helper.c +++ b/src/test/test-helper.c @@ -3,7 +3,7 @@ #include "test-helper.h" #include "random-util.h" #include "alloc-util.h" -#include "cgroup-util.h" +#include "cgroup-setup.h" #include "string-util.h" int enter_cgroup_subroot(void) {