From fdaa23af94f0e785036c52d6a603a3e2c1aff81e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Nov 2018 20:07:45 +0100 Subject: [PATCH 1/8] cgtop: use automatic clean-up --- src/cgtop/cgtop.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 69a5fde732..cf2de7eb5a 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -98,6 +98,8 @@ static void group_hashmap_free(Hashmap *h) { hashmap_free(h); } +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, group_hashmap_free); + static const char *maybe_format_bytes(char *buf, size_t l, bool is_valid, uint64_t t) { if (!is_valid) return "-"; @@ -924,13 +926,13 @@ static const char* counting_what(void) { } int main(int argc, char *argv[]) { - int r; - Hashmap *a = NULL, *b = NULL; + _cleanup_(group_hashmap_freep) Hashmap *a = NULL, *b = NULL; unsigned iteration = 0; usec_t last_refresh = 0; bool quit = false, immediate_refresh = false; _cleanup_free_ char *root = NULL; CGroupMask mask; + int r; log_parse_environment(); log_open(); @@ -1138,8 +1140,6 @@ int main(int argc, char *argv[]) { r = 0; finish: - group_hashmap_free(a); - group_hashmap_free(b); return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; } From 63a0cbbac3723c0684807fb39fc37f6c180d4a17 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Nov 2018 20:08:17 +0100 Subject: [PATCH 2/8] cgtop: use FOREACH_STRING() for fun and profit --- src/cgtop/cgtop.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index cf2de7eb5a..efbfd240a6 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -501,31 +501,14 @@ static int refresh_one( } static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration) { + const char *c; int r; - assert(a); - - r = refresh_one(SYSTEMD_CGROUP_CONTROLLER, root, a, b, iteration, 0, NULL); - if (r < 0) - return r; - r = refresh_one("cpu", root, a, b, iteration, 0, NULL); - if (r < 0) - return r; - r = refresh_one("cpuacct", root, a, b, iteration, 0, NULL); - if (r < 0) - return r; - r = refresh_one("memory", root, a, b, iteration, 0, NULL); - if (r < 0) - return r; - r = refresh_one("io", root, a, b, iteration, 0, NULL); - if (r < 0) - return r; - r = refresh_one("blkio", root, a, b, iteration, 0, NULL); - if (r < 0) - return r; - r = refresh_one("pids", root, a, b, iteration, 0, NULL); - if (r < 0) - return r; + FOREACH_STRING(c, SYSTEMD_CGROUP_CONTROLLER, "cpu", "cpuacct", "memory", "io", "blkio", "pids") { + r = refresh_one(c, root, a, b, iteration, 0, NULL); + if (r < 0) + return r; + } return 0; } From 8d33dca2ff77c7f58a15badb53a5bb313d9b79d4 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Nov 2018 20:09:03 +0100 Subject: [PATCH 3/8] core: fix capitalization of CPUShares= settings --- src/core/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index e62e05f0ff..baa356b32b 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -835,7 +835,7 @@ static void cgroup_context_apply( weight = cgroup_cpu_shares_to_weight(shares); - log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s", + log_cgroup_compat(u, "Applying [Startup]CPUShares %" PRIu64 " as [Startup]CPUWeight %" PRIu64 " on %s", shares, weight, path); } else weight = CGROUP_WEIGHT_DEFAULT; @@ -849,7 +849,7 @@ static void cgroup_context_apply( shares = cgroup_cpu_weight_to_shares(weight); - log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s", + log_cgroup_compat(u, "Applying [Startup]CPUWeight %" PRIu64 " as [Startup]CPUShares %" PRIu64 " on %s", weight, shares, path); } else if (has_shares) shares = cgroup_context_cpu_shares(c, state); From 143fadf369a18449464956206226761e49be1928 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Nov 2018 21:07:43 +0100 Subject: [PATCH 4/8] core: remove JoinControllers= configuration setting This removes the ability to configure which cgroup controllers to mount together. Instead, we'll now hardcode that "cpu" and "cpuacct" are mounted together as well as "net_cls" and "net_prio". The concept of mounting controllers together has no future as it does not exist to cgroupsv2. Moreover, the current logic is systematically broken, as revealed by the discussions in #10507. Also, we surveyed Red Hat customers and couldn't find a single user of the concept (which isn't particularly surprising, as it is broken...) This reduced the (already way too complex) cgroup handling for us, since we now know whenever we make a change to a cgroup for one controller to which other controllers it applies. --- man/systemd-system.conf.xml | 23 --- src/core/main.c | 6 +- src/core/mount-setup.c | 146 +++++++++++--------- src/core/mount-setup.h | 2 +- src/core/system.conf.in | 1 - src/shared/conf-parser.c | 115 --------------- src/shared/conf-parser.h | 1 - src/test/test-conf-parser.c | 40 ------ test/fuzz/fuzz-unit-file/directives.service | 1 - 9 files changed, 80 insertions(+), 255 deletions(-) diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml index 5ce2c6fb96..ea17111bc5 100644 --- a/man/systemd-system.conf.xml +++ b/man/systemd-system.conf.xml @@ -106,29 +106,6 @@ systemd.exec5. - - JoinControllers=cpu,cpuacct net_cls,netprio - - Configures controllers that shall be mounted - in a single hierarchy. By default, systemd will mount all - controllers which are enabled in the kernel in individual - hierarchies, with the exception of those listed in this - setting. Takes a space-separated list of comma-separated - controller names, in order to allow multiple joined - hierarchies. Defaults to 'cpu,cpuacct'. Pass an empty string - to ensure that systemd mounts all controllers in separate - hierarchies. - - Note that this option is only applied once, at very - early boot. If you use an initial RAM disk (initrd) that uses - systemd, it might hence be necessary to rebuild the initrd if - this option is changed, and make sure the new configuration - file is included in it. Otherwise, the initrd might mount the - controller hierarchies in a different configuration than - intended, and the main system cannot remount them - anymore. - - RuntimeWatchdogSec= ShutdownWatchdogSec= diff --git a/src/core/main.c b/src/core/main.c index 11537c8140..cabcb9ec16 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -100,7 +100,6 @@ static ShowStatus arg_show_status = _SHOW_STATUS_INVALID; static bool arg_switched_root = false; static PagerFlags arg_pager_flags = 0; static bool arg_service_watchdogs = true; -static char ***arg_join_controllers = NULL; static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL; static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT; static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC; @@ -667,7 +666,7 @@ static int parse_config_file(void) { { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot }, { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status }, { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL }, - { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers }, + { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL }, { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog }, { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog }, { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device }, @@ -1956,7 +1955,7 @@ static int initialize_runtime( install_crash_handler(); if (!skip_setup) { - r = mount_cgroup_controllers(arg_join_controllers); + r = mount_cgroup_controllers(); if (r < 0) { *ret_error_message = "Failed to mount cgroup hierarchies"; return r; @@ -2081,7 +2080,6 @@ static void free_arguments(void) { arg_default_unit = mfree(arg_default_unit); arg_confirm_spawn = mfree(arg_confirm_spawn); - arg_join_controllers = strv_free_free(arg_join_controllers); arg_default_environment = strv_free(arg_default_environment); arg_syscall_archs = set_free(arg_syscall_archs); } diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 16880e6157..e15d94d98a 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -229,76 +229,105 @@ int mount_setup_early(void) { return mount_points_setup(N_EARLY_MOUNT, false); } -int mount_cgroup_controllers(char ***join_controllers) { +static const char *join_with(const char *controller) { + + static const char* const pairs[] = { + "cpu", "cpuacct", + "net_cls", "net_prio", + NULL + }; + + const char *const *x, *const *y; + + assert(controller); + + /* This will lookup which controller to mount another controller with. Input is a controller name, and output + * is the other controller name. The function works both ways: you can input one and get the other, and input + * the other to get the one. */ + + STRV_FOREACH_PAIR(x, y, pairs) { + if (streq(controller, *x)) + return *y; + if (streq(controller, *y)) + return *x; + } + + return NULL; +} + +static int symlink_controller(const char *target, const char *alias) { + const char *a; + int r; + + assert(target); + assert(alias); + + a = strjoina("/sys/fs/cgroup/", alias); + + r = symlink_idempotent(target, a, false); + if (r < 0) + return log_error_errno(r, "Failed to create symlink %s: %m", a); + +#ifdef SMACK_RUN_LABEL + const char *p; + + p = strjoina("/sys/fs/cgroup/", target); + + r = mac_smack_copy(a, p); + if (r < 0 && r != -EOPNOTSUPP) + return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", p, a); +#endif + + return 0; +} + +int mount_cgroup_controllers(void) { _cleanup_set_free_free_ Set *controllers = NULL; - bool has_argument = !!join_controllers; int r; if (!cg_is_legacy_wanted()) return 0; /* Mount all available cgroup controllers that are built into the kernel. */ - - if (!has_argument) - /* The defaults: - * mount "cpu" + "cpuacct" together, and "net_cls" + "net_prio". - * - * We'd like to add "cpuset" to the mix, but "cpuset" doesn't really - * work for groups with no initialized attributes. - */ - join_controllers = (char**[]) { - STRV_MAKE("cpu", "cpuacct"), - STRV_MAKE("net_cls", "net_prio"), - NULL, - }; - r = cg_kernel_controllers(&controllers); if (r < 0) return log_error_errno(r, "Failed to enumerate cgroup controllers: %m"); for (;;) { _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL; + const char *other_controller; MountPoint p = { .what = "cgroup", .type = "cgroup", .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV, .mode = MNT_IN_CONTAINER, }; - char ***k = NULL; controller = set_steal_first(controllers); if (!controller) break; - for (k = join_controllers; *k; k++) - if (strv_find(*k, controller)) - break; + /* Check if we shall mount this together with another controller */ + other_controller = join_with(controller); + if (other_controller) { + _cleanup_free_ char *c = NULL; - if (*k) { - char **i, **j; + /* Check if the other controller is actually available in the kernel too */ + c = set_remove(controllers, other_controller); + if (c) { - for (i = *k, j = *k; *i; i++) { - - if (!streq(*i, controller)) { - _cleanup_free_ char *t; - - t = set_remove(controllers, *i); - if (!t) { - if (has_argument) - free(*i); - continue; - } - } - - *(j++) = *i; + /* Join the two controllers into one string, and maintain a stable ordering */ + if (strcmp(controller, other_controller) < 0) + options = strjoin(controller, ",", other_controller); + else + options = strjoin(other_controller, ",", controller); + if (!options) + return log_oom(); } + } - *j = NULL; - - options = strv_join(*k, ","); - if (!options) - return log_oom(); - } else + /* The simple case, where there's only one controller to mount together */ + if (!options) options = TAKE_PTR(controller); where = strappend("/sys/fs/cgroup/", options); @@ -312,35 +341,14 @@ int mount_cgroup_controllers(char ***join_controllers) { if (r < 0) return r; - if (r > 0 && *k) { - char **i; - - for (i = *k; *i; i++) { - _cleanup_free_ char *t = NULL; - - t = strappend("/sys/fs/cgroup/", *i); - if (!t) - return log_oom(); - - r = symlink(options, t); - if (r >= 0) { -#ifdef SMACK_RUN_LABEL - _cleanup_free_ char *src; - src = strappend("/sys/fs/cgroup/", options); - if (!src) - return log_oom(); - r = mac_smack_copy(t, src); - if (r < 0 && r != -EOPNOTSUPP) - return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", src, t); -#endif - } else if (errno != EEXIST) - return log_error_errno(errno, "Failed to create symlink %s: %m", t); - } - } + /* Create symlinks from the individual controller names, in case we have a joined mount */ + if (controller) + (void) symlink_controller(options, controller); + if (other_controller) + (void) symlink_controller(options, other_controller); } - /* Now that we mounted everything, let's make the tmpfs the - * cgroup file systems are mounted into read-only. */ + /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */ (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); return 0; diff --git a/src/core/mount-setup.h b/src/core/mount-setup.h index 43cd8908de..b4ca2cf4b4 100644 --- a/src/core/mount-setup.h +++ b/src/core/mount-setup.h @@ -6,7 +6,7 @@ int mount_setup_early(void); int mount_setup(bool loaded_policy); -int mount_cgroup_controllers(char ***join_controllers); +int mount_cgroup_controllers(void); bool mount_point_is_api(const char *path); bool mount_point_ignore(const char *path); diff --git a/src/core/system.conf.in b/src/core/system.conf.in index ef1bbbd948..0a58737b82 100644 --- a/src/core/system.conf.in +++ b/src/core/system.conf.in @@ -23,7 +23,6 @@ #CrashReboot=no #CtrlAltDelBurstAction=reboot-force #CPUAffinity=1 2 -#JoinControllers=cpu,cpuacct net_cls,net_prio #RuntimeWatchdogSec=0 #ShutdownWatchdogSec=10min #WatchdogDevice= diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c index 493ece9d09..b417ac2d6d 100644 --- a/src/shared/conf-parser.c +++ b/src/shared/conf-parser.c @@ -1013,121 +1013,6 @@ int config_parse_ip_port( return 0; } -int config_parse_join_controllers( - const char *unit, - const char *filename, - unsigned line, - const char *section, - unsigned section_line, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { - - char ****ret = data; - const char *whole_rvalue = rvalue; - unsigned n = 0; - _cleanup_(strv_free_freep) char ***controllers = NULL; - - assert(filename); - assert(lvalue); - assert(rvalue); - assert(ret); - - for (;;) { - _cleanup_free_ char *word = NULL; - char **l; - int r; - - r = extract_first_word(&rvalue, &word, NULL, EXTRACT_QUOTES); - if (r < 0) { - log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, whole_rvalue); - return r; - } - if (r == 0) - break; - - l = strv_split(word, ","); - if (!l) - return log_oom(); - strv_uniq(l); - - if (strv_length(l) <= 1) { - strv_free(l); - continue; - } - - if (!controllers) { - controllers = new(char**, 2); - if (!controllers) { - strv_free(l); - return log_oom(); - } - - controllers[0] = l; - controllers[1] = NULL; - - n = 1; - } else { - char ***a; - char ***t; - - t = new0(char**, n+2); - if (!t) { - strv_free(l); - return log_oom(); - } - - n = 0; - - for (a = controllers; *a; a++) - if (strv_overlap(*a, l)) { - if (strv_extend_strv(&l, *a, false) < 0) { - strv_free(l); - strv_free_free(t); - return log_oom(); - } - - } else { - char **c; - - c = strv_copy(*a); - if (!c) { - strv_free(l); - strv_free_free(t); - return log_oom(); - } - - t[n++] = c; - } - - t[n++] = strv_uniq(l); - - strv_free_free(controllers); - controllers = t; - } - } - if (!isempty(rvalue)) - log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring."); - - /* As a special case, return a single empty strv, to override the default */ - if (!controllers) { - controllers = new(char**, 2); - if (!controllers) - return log_oom(); - controllers[0] = strv_new(NULL); - if (!controllers[0]) - return log_oom(); - controllers[1] = NULL; - } - - strv_free_free(*ret); - *ret = TAKE_PTR(controllers); - - return 0; -} - int config_parse_mtu( const char *unit, const char *filename, diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h index 16f042d894..865db4278b 100644 --- a/src/shared/conf-parser.h +++ b/src/shared/conf-parser.h @@ -137,7 +137,6 @@ CONFIG_PARSER_PROTOTYPE(config_parse_personality); CONFIG_PARSER_PROTOTYPE(config_parse_permille); CONFIG_PARSER_PROTOTYPE(config_parse_ifname); CONFIG_PARSER_PROTOTYPE(config_parse_ip_port); -CONFIG_PARSER_PROTOTYPE(config_parse_join_controllers); CONFIG_PARSER_PROTOTYPE(config_parse_mtu); CONFIG_PARSER_PROTOTYPE(config_parse_rlimit); diff --git a/src/test/test-conf-parser.c b/src/test/test-conf-parser.c index 368e02cb33..497becff73 100644 --- a/src/test/test-conf-parser.c +++ b/src/test/test-conf-parser.c @@ -210,45 +210,6 @@ static void test_config_parse_iec_uint64(void) { assert_se(config_parse_iec_uint64(NULL, "/this/file", 11, "Section", 22, "Size", 0, "4.5M", &offset, NULL) == 0); } -static void test_config_parse_join_controllers(void) { - int r; - _cleanup_(strv_free_freep) char ***c = NULL; - char ***c2; - - /* Test normal operation */ - r = config_parse_join_controllers(NULL, "example.conf", 11, "Section", 10, "JoinControllers", 0, "cpu,cpuacct net_cls,netprio", &c, NULL); - assert_se(r == 0); - assert_se(c); - assert_se(strv_length(c[0]) == 2); - assert_se(strv_equal(c[0], STRV_MAKE("cpu", "cpuacct"))); - assert_se(strv_length(c[1]) == 2); - assert_se(strv_equal(c[1], STRV_MAKE("net_cls", "netprio"))); - assert_se(c[2] == NULL); - - /* Test special case of no mounted controllers */ - r = config_parse_join_controllers(NULL, "example.conf", 12, "Section", 10, "JoinControllers", 0, "", &c, NULL); - assert_se(r == 0); - assert_se(c); - assert_se(strv_equal(c[0], STRV_MAKE_EMPTY)); - assert_se(c[1] == NULL); - - /* Test merging of overlapping lists */ - r = config_parse_join_controllers(NULL, "example.conf", 13, "Section", 10, "JoinControllers", 0, "a,b b,c", &c, NULL); - assert_se(r == 0); - assert_se(c); - assert_se(strv_length(c[0]) == 3); - assert_se(strv_contains(c[0], "a")); - assert_se(strv_contains(c[0], "b")); - assert_se(strv_contains(c[0], "c")); - assert_se(c[1] == NULL); - - /* Test ignoring of bad lines */ - c2 = c; - r = config_parse_join_controllers(NULL, "example.conf", 14, "Section", 10, "JoinControllers", 0, "a,\"b ", &c, NULL); - assert_se(r < 0); - assert_se(c == c2); -} - #define x10(x) x x x x x x x x x x #define x100(x) x10(x10(x)) #define x1000(x) x10(x100(x)) @@ -407,7 +368,6 @@ int main(int argc, char **argv) { test_config_parse_sec(); test_config_parse_nsec(); test_config_parse_iec_uint64(); - test_config_parse_join_controllers(); for (i = 0; i < ELEMENTSOF(config_file); i++) test_config_parse(i, config_file[i]); diff --git a/test/fuzz/fuzz-unit-file/directives.service b/test/fuzz/fuzz-unit-file/directives.service index fd830dde41..f454fd313e 100644 --- a/test/fuzz/fuzz-unit-file/directives.service +++ b/test/fuzz/fuzz-unit-file/directives.service @@ -690,7 +690,6 @@ HibernateMode= HibernateState= HybridSleepMode= HybridSleepState= -JoinControllers= LogColor= LogLevel= LogLocation= From c01ef321af6a66f9bd43ee75e23a6a433fa97c10 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Nov 2018 21:14:54 +0100 Subject: [PATCH 5/8] cgroup: add new helper that knows which controllers are mounted together --- src/basic/cgroup-util.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 9b410f1f58..463a8dc84a 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -60,6 +60,15 @@ typedef enum CGroupMask { _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1 } CGroupMask; +static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) { + /* We always mount "cpu" and "cpuacct" in the same hierarchy. Hence, when one bit is set also set the other */ + + if (mask & (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT)) + mask |= (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT); + + return mask; +} + /* Special values for all weight knobs on unified hierarchy */ #define CGROUP_WEIGHT_INVALID ((uint64_t) -1) #define CGROUP_WEIGHT_MIN UINT64_C(1) From fae9bc298ac1dc37021d576c0793800c6e625f89 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Nov 2018 21:15:19 +0100 Subject: [PATCH 6/8] cgroup: when determining which controllers we need, always extend the mask according to cpu/cpuacct joint mounting Note that for cgroup_context_get_mask() this doesn't actually change much, but it does prepare the ground for #10507 later on. --- src/core/cgroup.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index baa356b32b..45a7581d45 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1175,13 +1175,15 @@ static void cgroup_context_apply( CGroupMask cgroup_context_get_mask(CGroupContext *c) { CGroupMask mask = 0; - /* Figure out which controllers we need */ + /* Figure out which controllers we need, based on the cgroup context object */ - if (c->cpu_accounting || - cgroup_context_has_cpu_weight(c) || + if (c->cpu_accounting) + mask |= CGROUP_MASK_CPUACCT; + + if (cgroup_context_has_cpu_weight(c) || cgroup_context_has_cpu_shares(c) || c->cpu_quota_per_sec_usec != USEC_INFINITY) - mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; + mask |= CGROUP_MASK_CPU; if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c)) mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; @@ -1199,12 +1201,15 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { c->tasks_max != CGROUP_LIMIT_MAX) mask |= CGROUP_MASK_PIDS; - return mask; + return CGROUP_MASK_EXTEND_JOINED(mask); } CGroupMask unit_get_bpf_mask(Unit *u) { CGroupMask mask = 0; + /* Figure out which controllers we need, based on the cgroup context, possibly taking into account children + * too. */ + if (unit_get_needs_bpf_firewall(u)) mask |= CGROUP_MASK_BPF_FIREWALL; @@ -1243,7 +1248,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) { } assert_se(c = unit_get_cgroup_context(u)); - return c->delegate_controllers; + return CGROUP_MASK_EXTEND_JOINED(c->delegate_controllers); } CGroupMask unit_get_members_mask(Unit *u) { From e353faa0d62a0730bbeaca1047f85decd7de65d3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Nov 2018 21:16:57 +0100 Subject: [PATCH 7/8] cgroup-util: when attaching/creating cgroups in multiple hierarchies, take jointly mounted controlelrs into account If we create a cgroup in one controller it might already have been created in another too, if we have jointly mounted controllers. Take that into consideration. --- src/basic/cgroup-util.c | 50 +++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 5c02f0cb35..5d0114d085 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2110,6 +2110,7 @@ done: int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { CGroupController c; + CGroupMask done; bool created; int r; @@ -2134,20 +2135,28 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path if (r > 0) return created; + supported &= CGROUP_MASK_V1; + mask = CGROUP_MASK_EXTEND_JOINED(mask); + done = 0; + /* Otherwise, do the same in the other hierarchies */ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); const char *n; - if (!FLAGS_SET(CGROUP_MASK_V1, bit)) + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) continue; n = cgroup_controller_to_string(c); - if (FLAGS_SET(mask, bit)) (void) cg_create(n, path); - else if (FLAGS_SET(supported, bit)) + else (void) cg_trim(n, path, true); + + done |= CGROUP_MASK_EXTEND_JOINED(bit); } return created; @@ -2155,6 +2164,7 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { CGroupController c; + CGroupMask done; int r; r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); @@ -2167,23 +2177,26 @@ int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_m if (r > 0) return 0; + supported &= CGROUP_MASK_V1; + done = 0; + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); const char *p = NULL; - if (!FLAGS_SET(CGROUP_MASK_V1, bit)) + if (!FLAGS_SET(supported, bit)) continue; - if (!FLAGS_SET(supported, bit)) + if (FLAGS_SET(done, bit)) continue; if (path_callback) p = path_callback(bit, userdata); - if (!p) p = path; (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid); + done |= CGROUP_MASK_EXTEND_JOINED(bit); } return 0; @@ -2208,6 +2221,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { CGroupController c; + CGroupMask done; int r = 0, q; if (!path_equal(from, to)) { @@ -2222,30 +2236,34 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to if (q > 0) return r; + supported &= CGROUP_MASK_V1; + done = 0; + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); const char *p = NULL; - if (!FLAGS_SET(CGROUP_MASK_V1, bit)) + if (!FLAGS_SET(supported, bit)) continue; - if (!FLAGS_SET(supported, bit)) + if (FLAGS_SET(done, bit)) continue; if (to_callback) p = to_callback(bit, userdata); - if (!p) p = to; (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0); + done |= CGROUP_MASK_EXTEND_JOINED(bit); } - return 0; + return r; } int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { CGroupController c; + CGroupMask done; int r, q; r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); @@ -2258,19 +2276,23 @@ int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) if (q > 0) return r; + supported &= CGROUP_MASK_V1; + done = 0; + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - if (!FLAGS_SET(CGROUP_MASK_V1, bit)) - continue; - if (!FLAGS_SET(supported, bit)) continue; + if (FLAGS_SET(done, bit)) + continue; + (void) cg_trim(cgroup_controller_to_string(c), path, delete_root); + done |= CGROUP_MASK_EXTEND_JOINED(bit); } - return 0; + return r; } int cg_mask_to_string(CGroupMask mask, char **ret) { From f54353406a4fc29fd327fc21f13bb312675f0924 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Nov 2018 21:20:42 +0100 Subject: [PATCH 8/8] update TODO --- TODO | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TODO b/TODO index 86db6fcd47..161f81c63f 100644 --- a/TODO +++ b/TODO @@ -40,7 +40,7 @@ Features: * consider splitting out all temporary file creation APIs (we have so many in fileio.h and elsewhere!) into a new util file of its own. -* set memory.oom.group in cgroupsv2 for all leaf cgroups +* set memory.oom.group in cgroupsv2 for all leaf cgroups (kernel v4.19+) * drop umask() calls and suchlike from our generators, pid1 should set things up correctly anyway