diff --git a/TODO b/TODO index 383b45af54..5fb0c59d1a 100644 --- a/TODO +++ b/TODO @@ -59,15 +59,6 @@ Features: sd_id128_get_machine_app_specific(). After all on long-running systems both IDs have similar properties. -* emulate properties of the root cgroup on controllers that don't support such - properties natively on cpu/io/memory, the way we already do it for - "pids". Also, add the same logic to cgtop. - -* set TasksAccounting=1 on the root slice if we are running on the root cgroup, - and similar for the others, as soon as we emulate them properly. After all, - Linux keeps these system-wide stats anyway, and it costs nothing to expose - them. - * sd-bus: add vtable flag, that may be used to request client creds implicitly and asynchronously before dispatching the operation diff --git a/man/systemd-cgtop.xml b/man/systemd-cgtop.xml index d7ad08ec37..295f235196 100644 --- a/man/systemd-cgtop.xml +++ b/man/systemd-cgtop.xml @@ -228,6 +228,12 @@ indefinitely. + + + + A shortcut for . + + diff --git a/mkosi.build b/mkosi.build index 38cfe25025..0e644d54cb 100755 --- a/mkosi.build +++ b/mkosi.build @@ -27,43 +27,46 @@ set -ex export LC_CTYPE=en_US.UTF-8 -sysvinit_path=`realpath /etc/init.d` +if [ ! -f "$BUILDDIR"/build.ninja ] ; then + sysvinit_path=`realpath /etc/init.d` -nobody_user=`id -u -n 65534 2> /dev/null` -if [ "$nobody_user" != "" ] ; then - # Validate that we can translate forth and back - if [ "`id -u $nobody_user`" != 65534 ] ; then - nobody_user="" + nobody_user=`id -u -n 65534 2> /dev/null` + if [ "$nobody_user" != "" ] ; then + # Validate that we can translate forth and back + if [ "`id -u $nobody_user`" != 65534 ] ; then + nobody_user="" + fi fi -fi -if [ "$nobody_user" = "" ] ; then - if id -u nobody 2> /dev/null ; then - # The "nobody" user is defined already for something else, pick the Fedora name - nobody_user=nfsnobody - else - # The "nobody" user name is free, use it - nobody_user=nobody + if [ "$nobody_user" = "" ] ; then + if id -u nobody 2> /dev/null ; then + # The "nobody" user is defined already for something else, pick the Fedora name + nobody_user=nfsnobody + else + # The "nobody" user name is free, use it + nobody_user=nobody + fi fi + + nobody_group=`id -g -n 65534 2> /dev/null` + if [ "$nobody_group" != "" ] ; then + # Validate that we can translate forth and back + if [ "`id -g $nobody_group`" != 65534 ] ; then + nobody_group="" + fi + fi + if [ "$nobody_group" = "" ] ; then + if id -u nobody 2> /dev/null ; then + # The "nobody" group is defined already for something else, pick the Fedora name + nobody_group=nfsnobody + else + # The "nobody" group name is free, use it + nobody_group=nobody + fi + fi + + meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path" -D default-hierarchy=unified -D man=false -D "nobody-user=$nobody_user" -D "nobody-group=$nobody_group" fi -nobody_group=`id -g -n 65534 2> /dev/null` -if [ "$nobody_group" != "" ] ; then - # Validate that we can translate forth and back - if [ "`id -g $nobody_group`" != 65534 ] ; then - nobody_group="" - fi -fi -if [ "$nobody_group" = "" ] ; then - if id -u nobody 2> /dev/null ; then - # The "nobody" group is defined already for something else, pick the Fedora name - nobody_group=nfsnobody - else - # The "nobody" group name is free, use it - nobody_group=nobody - fi -fi - -[ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path" -D default-hierarchy=unified -D man=false -D "nobody-user=$nobody_user" -D "nobody-group=$nobody_group" ninja -C "$BUILDDIR" all [ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" ) ninja -C "$BUILDDIR" install diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 52ae37e6b4..9a4dd72270 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2030,46 +2030,84 @@ int cg_get_attribute(const char *controller, const char *path, const char *attri return read_one_line_file(p, ret); } -int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) { - _cleanup_free_ char *filename = NULL, *content = NULL; - char *line, *p; - int i, r; +int cg_get_keyed_attribute( + const char *controller, + const char *path, + const char *attribute, + char **keys, + char **ret_values) { - for (i = 0; keys[i]; i++) - values[i] = NULL; + _cleanup_free_ char *filename = NULL, *contents = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *p; + size_t n, i, n_done = 0; + char **v; + int r; + + /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with + * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of + * entries as 'keys'. On success each entry will be set to the value of the matching key. + * + * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */ r = cg_get_path(controller, path, attribute, &filename); if (r < 0) return r; - r = read_full_file(filename, &content, NULL); + r = read_full_file(filename, &contents, NULL); if (r < 0) return r; - p = content; - while ((line = strsep(&p, "\n"))) { - char *key; + n = strv_length(keys); + if (n == 0) /* No keys to retrieve? That's easy, we are done then */ + return 0; - key = strsep(&line, " "); + /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */ + v = newa0(char*, n); - for (i = 0; keys[i]; i++) { - if (streq(key, keys[i])) { - values[i] = strdup(line); - break; + for (p = contents; *p;) { + const char *w = NULL; + + for (i = 0; i < n; i++) + if (!v[i]) { + w = first_word(p, keys[i]); + if (w) + break; } - } + + if (w) { + size_t l; + + l = strcspn(w, NEWLINE); + v[i] = strndup(w, l); + if (!v[i]) { + r = -ENOMEM; + goto fail; + } + + n_done++; + if (n_done >= n) + goto done; + + p = w + l; + } else + p += strcspn(p, NEWLINE); + + p += strspn(p, NEWLINE); } - for (i = 0; keys[i]; i++) { - if (!values[i]) { - for (i = 0; keys[i]; i++) { - values[i] = mfree(values[i]); - } - return -ENOENT; - } - } + r = -ENXIO; +fail: + for (i = 0; i < n; i++) + free(v[i]); + + return r; + +done: + memcpy(ret_values, v, sizeof(char*) * n); return 0; + } int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 05c9f84505..068df102f7 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -186,7 +186,7 @@ int cg_create_and_attach(const char *controller, const char *path, pid_t pid); int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value); int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret); -int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values); +int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values); int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid); diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c index 9bb42cc7ba..6c2cfd2918 100644 --- a/src/basic/procfs-util.c +++ b/src/basic/procfs-util.c @@ -3,6 +3,8 @@ #include #include "alloc-util.h" +#include "def.h" +#include "fd-util.h" #include "fileio.h" #include "parse-util.h" #include "process-util.h" @@ -136,3 +138,131 @@ int procfs_tasks_get_current(uint64_t *ret) { return safe_atou64(nr, ret); } + +static uint64_t calc_gcd64(uint64_t a, uint64_t b) { + + while (b > 0) { + uint64_t t; + + t = a % b; + + a = b; + b = t; + } + + return a; +} + +int procfs_cpu_get_usage(nsec_t *ret) { + _cleanup_free_ char *first_line = NULL; + unsigned long user_ticks = 0, nice_ticks = 0, system_ticks = 0, + irq_ticks = 0, softirq_ticks = 0, + guest_ticks = 0, guest_nice_ticks = 0; + long ticks_per_second; + uint64_t sum, gcd, a, b; + const char *p; + int r; + + assert(ret); + + r = read_one_line_file("/proc/stat", &first_line); + if (r < 0) + return r; + + p = first_word(first_line, "cpu"); + if (!p) + return -EINVAL; + + if (sscanf(p, "%lu %lu %lu %*u %*u %lu %lu %*u %lu %lu", + &user_ticks, + &nice_ticks, + &system_ticks, + &irq_ticks, + &softirq_ticks, + &guest_ticks, + &guest_nice_ticks) < 5) /* we only insist on the first five fields */ + return -EINVAL; + + ticks_per_second = sysconf(_SC_CLK_TCK); + if (ticks_per_second < 0) + return -errno; + assert(ticks_per_second > 0); + + sum = (uint64_t) user_ticks + (uint64_t) nice_ticks + (uint64_t) system_ticks + + (uint64_t) irq_ticks + (uint64_t) softirq_ticks + + (uint64_t) guest_ticks + (uint64_t) guest_nice_ticks; + + /* Let's reduce this fraction before we apply it to avoid overflows when converting this to µsec */ + gcd = calc_gcd64(NSEC_PER_SEC, ticks_per_second); + + a = (uint64_t) NSEC_PER_SEC / gcd; + b = (uint64_t) ticks_per_second / gcd; + + *ret = DIV_ROUND_UP((nsec_t) sum * (nsec_t) a, (nsec_t) b); + return 0; +} + +int procfs_memory_get_current(uint64_t *ret) { + uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX; + _cleanup_fclose_ FILE *f = NULL; + int r; + + assert(ret); + + f = fopen("/proc/meminfo", "re"); + if (!f) + return -errno; + + for (;;) { + _cleanup_free_ char *line = NULL; + uint64_t *v; + char *p, *e; + size_t n; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; /* EOF: Couldn't find one or both fields? */ + + p = first_word(line, "MemTotal:"); + if (p) + v = &mem_total; + else { + p = first_word(line, "MemFree:"); + if (p) + v = &mem_free; + else + continue; + } + + /* Determine length of numeric value */ + n = strspn(p, DIGITS); + if (n == 0) + return -EINVAL; + e = p + n; + + /* Ensure the line ends in " kB" */ + n = strspn(e, WHITESPACE); + if (n == 0) + return -EINVAL; + if (!streq(e + n, "kB")) + return -EINVAL; + + *e = 0; + r = safe_atou64(p, v); + if (r < 0) + return r; + if (*v == UINT64_MAX) + return -EINVAL; + + if (mem_total != UINT64_MAX && mem_free != UINT64_MAX) + break; + } + + if (mem_free > mem_total) + return -EINVAL; + + *ret = (mem_total - mem_free) * 1024U; + return 0; +} diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h index 7466acd7f3..f697ed92bc 100644 --- a/src/basic/procfs-util.h +++ b/src/basic/procfs-util.h @@ -3,6 +3,12 @@ #include +#include "time-util.h" + int procfs_tasks_get_limit(uint64_t *ret); int procfs_tasks_set_limit(uint64_t limit); int procfs_tasks_get_current(uint64_t *ret); + +int procfs_cpu_get_usage(nsec_t *ret); + +int procfs_memory_get_current(uint64_t *ret); diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 413946182c..4cab5a253c 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -46,6 +46,7 @@ #include "terminal-util.h" #include "unit-name.h" #include "util.h" +#include "virt.h" typedef struct Group { char *path; @@ -125,6 +126,30 @@ static const char *maybe_format_bytes(char *buf, size_t l, bool is_valid, uint64 return format_bytes(buf, l, t); } +static bool is_root_cgroup(const char *path) { + + /* Returns true if the specified path belongs to the root cgroup. The root cgroup is special on cgroupsv2 as it + * carries only very few attributes in order not to export multiple truth about system state as most + * information is available elsewhere in /proc anyway. We need to be able to deal with that, and need to get + * our data from different sources in that case. + * + * There's one extra complication in all of this, though 😣: if the path to the cgroup indicates we are in the + * root cgroup this might actually not be the case, because cgroup namespacing might be in effect + * (CLONE_NEWCGROUP). Since there's no nice way to distuingish a real cgroup root from a fake namespaced one we + * do an explicit container check here, under the assumption that CLONE_NEWCGROUP is generally used when + * container managers are used too. + * + * Note that checking for a container environment is kinda ugly, since in theory people could use cgtop from + * inside a container where cgroup namespacing is turned off to watch the host system. However, that's mostly a + * theoretic usecase, and if people actually try all they'll lose is accounting for the top-level cgroup. Which + * isn't too bad. */ + + if (detect_container() > 0) + return false; + + return isempty(path) || path_equal(path, "/"); +} + static int process( const char *controller, const char *path, @@ -172,7 +197,8 @@ static int process( } } - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) { + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && + IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) { _cleanup_fclose_ FILE *f = NULL; pid_t pid; @@ -196,7 +222,7 @@ static int process( } else if (streq(controller, "pids") && arg_count == COUNT_PIDS) { - if (isempty(path) || path_equal(path, "/")) { + if (is_root_cgroup(path)) { r = procfs_tasks_get_current(&g->n_tasks); if (r < 0) return r; @@ -226,15 +252,18 @@ static int process( uint64_t new_usage; nsec_t timestamp; - if (all_unified) { - const char *keys[] = { "usage_usec", NULL }; + if (is_root_cgroup(path)) { + r = procfs_cpu_get_usage(&new_usage); + if (r < 0) + return r; + } else if (all_unified) { _cleanup_free_ char *val = NULL; if (!streq(controller, "cpu")) return 0; - r = cg_get_keyed_attribute("cpu", path, "cpu.stat", keys, &val); - if (r == -ENOENT) + r = cg_get_keyed_attribute("cpu", path, "cpu.stat", STRV_MAKE("usage_usec"), &val); + if (IN_SET(r, -ENOENT, -ENXIO)) return 0; if (r < 0) return r; @@ -284,24 +313,31 @@ static int process( g->cpu_iteration = iteration; } else if (streq(controller, "memory")) { - _cleanup_free_ char *p = NULL, *v = NULL; - if (all_unified) - r = cg_get_path(controller, path, "memory.current", &p); - else - r = cg_get_path(controller, path, "memory.usage_in_bytes", &p); - if (r < 0) - return r; + if (is_root_cgroup(path)) { + r = procfs_memory_get_current(&g->memory); + if (r < 0) + return r; + } else { + _cleanup_free_ char *p = NULL, *v = NULL; - r = read_one_line_file(p, &v); - if (r == -ENOENT) - return 0; - if (r < 0) - return r; + if (all_unified) + r = cg_get_path(controller, path, "memory.current", &p); + else + r = cg_get_path(controller, path, "memory.usage_in_bytes", &p); + if (r < 0) + return r; - r = safe_atou64(v, &g->memory); - if (r < 0) - return r; + r = read_one_line_file(p, &v); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + r = safe_atou64(v, &g->memory); + if (r < 0) + return r; + } if (g->memory > 0) g->memory_valid = true; @@ -506,6 +542,10 @@ static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration) return 0; } +static const char *empty_to_slash(const char *p) { + return isempty(p) ? "/" : p; +} + static int group_compare(const void*a, const void *b) { const Group *x = *(Group**)a, *y = *(Group**)b; @@ -515,9 +555,9 @@ static int group_compare(const void*a, const void *b) { * recursive summing is off, since that is actually * not accumulative for all children. */ - if (path_startswith(y->path, x->path)) + if (path_startswith(empty_to_slash(y->path), empty_to_slash(x->path))) return -1; - if (path_startswith(x->path, y->path)) + if (path_startswith(empty_to_slash(x->path), empty_to_slash(y->path))) return 1; } @@ -666,7 +706,7 @@ static void display(Hashmap *a) { g = array[j]; - path = isempty(g->path) ? "/" : g->path; + path = empty_to_slash(g->path); ellipsized = ellipsize(path, path_columns, 33); printf("%-*s", path_columns, ellipsized ?: path); @@ -709,6 +749,7 @@ static void help(void) { " --recursive=BOOL Sum up process count recursively\n" " -d --delay=DELAY Delay between updates\n" " -n --iterations=N Run for N iterations before exiting\n" + " -1 Shortcut for --iterations=1\n" " -b --batch Run in batch mode, accepting no input\n" " --depth=DEPTH Maximum traversal depth (default: %u)\n" " -M --machine= Show container\n" @@ -745,7 +786,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argc >= 1); assert(argv); - while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:", options, NULL)) >= 0) + while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:1", options, NULL)) >= 0) switch (c) { @@ -773,17 +814,15 @@ static int parse_argv(int argc, char *argv[]) { case ARG_DEPTH: r = safe_atou(optarg, &arg_depth); - if (r < 0) { - log_error("Failed to parse depth parameter."); - return -EINVAL; - } + if (r < 0) + return log_error_errno(r, "Failed to parse depth parameter: %s", optarg); break; case 'd': r = parse_sec(optarg, &arg_delay); if (r < 0 || arg_delay <= 0) { - log_error("Failed to parse delay parameter."); + log_error("Failed to parse delay parameter: %s", optarg); return -EINVAL; } @@ -791,13 +830,15 @@ static int parse_argv(int argc, char *argv[]) { case 'n': r = safe_atou(optarg, &arg_iterations); - if (r < 0) { - log_error("Failed to parse iterations parameter."); - return -EINVAL; - } + if (r < 0) + return log_error_errno(r, "Failed to parse iterations parameter: %s", optarg); break; + case '1': + arg_iterations = 1; + break; + case 'b': arg_batch = true; break; @@ -853,10 +894,8 @@ static int parse_argv(int argc, char *argv[]) { case ARG_RECURSIVE: r = parse_boolean(optarg); - if (r < 0) { - log_error("Failed to parse --recursive= argument: %s", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to parse --recursive= argument: %s", optarg); arg_recursive = r; arg_recursive_unset = r == 0; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 52431ec12f..3c0ff09639 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -38,19 +38,34 @@ #include "stdio-util.h" #include "string-table.h" #include "string-util.h" +#include "virt.h" #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) +bool manager_owns_root_cgroup(Manager *m) { + assert(m); + + /* Returns true if we are managing the root cgroup. Note that it isn't sufficient to just check whether the + * group root path equals "/" since that will also be the case if CLONE_NEWCGROUP is in the mix. Since there's + * appears to be no nice way to detect whether we are in a CLONE_NEWCGROUP namespace we instead just check if + * we run in any kind of container virtualization. */ + + if (detect_container() > 0) + return false; + + return isempty(m->cgroup_root) || path_equal(m->cgroup_root, "/"); +} + bool unit_has_root_cgroup(Unit *u) { assert(u); - /* Returns whether this unit manages the root cgroup. Note that this is different from being named "-.slice", - * as inside of containers the root slice won't be identical to the root cgroup. */ + /* Returns whether this unit manages the root cgroup. This will return true if this unit is the root slice and + * the manager manages the root cgroup. */ - if (!u->cgroup_path) + if (!manager_owns_root_cgroup(u->manager)) return false; - return isempty(u->cgroup_path) || path_equal(u->cgroup_path, "/"); + return unit_has_name(u, SPECIAL_ROOT_SLICE); } static void cgroup_compat_warn(void) { @@ -59,7 +74,9 @@ static void cgroup_compat_warn(void) { if (cgroup_compat_warned) return; - log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. See cgroup-compat debug messages for details."); + log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. " + "See cgroup-compat debug messages for details."); + cgroup_compat_warned = true; } @@ -2406,6 +2423,10 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { if (!u->cgroup_path) return -ENODATA; + /* The root cgroup doesn't expose this information, let's get it from /proc instead */ + if (unit_has_root_cgroup(u)) + return procfs_memory_get_current(ret); + if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) return -ENODATA; @@ -2437,13 +2458,13 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret) { if (!u->cgroup_path) return -ENODATA; - if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0) - return -ENODATA; - /* The root cgroup doesn't expose this information, let's get it from /proc instead */ if (unit_has_root_cgroup(u)) return procfs_tasks_get_current(ret); + if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0) + return -ENODATA; + r = cg_get_attribute("pids", u->cgroup_path, "pids.current", &v); if (r == -ENOENT) return -ENODATA; @@ -2464,20 +2485,25 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { if (!u->cgroup_path) return -ENODATA; + /* The root cgroup doesn't expose this information, let's get it from /proc instead */ + if (unit_has_root_cgroup(u)) + return procfs_cpu_get_usage(ret); + r = cg_all_unified(); if (r < 0) return r; if (r > 0) { - const char *keys[] = { "usage_usec", NULL }; _cleanup_free_ char *val = NULL; uint64_t us; if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0) return -ENODATA; - r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val); + r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val); if (r < 0) return r; + if (IN_SET(r, -ENOENT, -ENXIO)) + return -ENODATA; r = safe_atou64(val, &us); if (r < 0) diff --git a/src/core/cgroup.h b/src/core/cgroup.h index e2e875d1c7..ae5f1c7647 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -209,6 +209,7 @@ int unit_reset_ip_accounting(Unit *u); cc ? cc->name : false; \ }) +bool manager_owns_root_cgroup(Manager *m); bool unit_has_root_cgroup(Unit *u); int manager_notify_cgroup_empty(Manager *m, const char *group); diff --git a/src/core/slice.c b/src/core/slice.c index 2228669e00..1c4574b8bb 100644 --- a/src/core/slice.c +++ b/src/core/slice.c @@ -313,19 +313,18 @@ _pure_ static const char *slice_sub_state_to_string(Unit *u) { return slice_state_to_string(SLICE(u)->state); } -static void slice_enumerate_perpetual(Manager *m, const char *name) { +static int slice_make_perpetual(Manager *m, const char *name, Unit **ret) { Unit *u; int r; assert(m); + assert(name); u = manager_get_unit(m, name); if (!u) { r = unit_new_for_name(m, sizeof(Slice), name, &u); - if (r < 0) { - log_error_errno(r, "Failed to allocate the special %s unit: %m", name); - return; - } + if (r < 0) + return log_error_errno(r, "Failed to allocate the special %s unit: %m", name); } u->perpetual = true; @@ -333,15 +332,34 @@ static void slice_enumerate_perpetual(Manager *m, const char *name) { unit_add_to_load_queue(u); unit_add_to_dbus_queue(u); + + if (ret) + *ret = u; + + return 0; } static void slice_enumerate(Manager *m) { + Unit *u; + int r; + assert(m); - slice_enumerate_perpetual(m, SPECIAL_ROOT_SLICE); + r = slice_make_perpetual(m, SPECIAL_ROOT_SLICE, &u); + if (r >= 0 && manager_owns_root_cgroup(m)) { + Slice *s = SLICE(u); + + /* If we are managing the root cgroup then this means our root slice covers the whole system, which + * means the kernel will track CPU/tasks/memory for us anyway, and it is all available in /proc. Let's + * hence turn accounting on here, so that our APIs to query this data are available. */ + + s->cgroup_context.cpu_accounting = true; + s->cgroup_context.tasks_accounting = true; + s->cgroup_context.memory_accounting = true; + } if (MANAGER_IS_SYSTEM(m)) - slice_enumerate_perpetual(m, SPECIAL_SYSTEM_SLICE); + (void) slice_make_perpetual(m, SPECIAL_SYSTEM_SLICE, NULL); } const UnitVTable slice_vtable = { diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c index 2248a30635..c4163fc3a9 100644 --- a/src/test/test-cgroup-util.c +++ b/src/test/test-cgroup-util.c @@ -30,6 +30,7 @@ #include "special.h" #include "stat-util.h" #include "string-util.h" +#include "strv.h" #include "test-helper.h" #include "user-util.h" #include "util.h" @@ -404,6 +405,45 @@ static void test_cg_tests(void) { assert_se(!systemd); } +static void test_cg_get_keyed_attribute(void) { + _cleanup_free_ char *val = NULL; + char *vals3[3] = {}, *vals3a[3] = {}; + int i; + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "no_such_file", STRV_MAKE("no_such_attr"), &val) == -ENOENT); + assert_se(val == NULL); + + if (access("/sys/fs/cgroup/init.scope/cpu.stat", R_OK) < 0) { + log_info_errno(errno, "Skipping most of %s, /init.scope/cpu.stat not accessible: %m", __func__); + return; + } + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("no_such_attr"), &val) == -ENXIO); + assert_se(val == NULL); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec"), &val) == 0); + log_info("cpu /init.scope cpu.stat [usage_usec] → \"%s\"", val); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "no_such_attr"), vals3) == -ENXIO); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "usage_usec"), vals3) == -ENXIO); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", + STRV_MAKE("usage_usec", "user_usec", "system_usec"), vals3) == 0); + log_info("cpu /init.scope cpu.stat [usage_usec user_usec system_usec] → \"%s\", \"%s\", \"%s\"", + vals3[0], vals3[1], vals3[2]); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", + STRV_MAKE("system_usec", "user_usec", "usage_usec"), vals3a) == 0); + log_info("cpu /init.scope cpu.stat [system_usec user_usec usage_usec] → \"%s\", \"%s\", \"%s\"", + vals3a[0], vals3a[1], vals3a[2]); + + for (i = 0; i < 3; i++) { + free(vals3[i]); + free(vals3a[i]); + } +} + int main(void) { log_set_max_level(LOG_DEBUG); log_parse_environment(); @@ -429,6 +469,7 @@ int main(void) { test_is_wanted_print(false); /* run twice to test caching */ test_is_wanted(); test_cg_tests(); + test_cg_get_keyed_attribute(); return 0; } diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c index a253182517..10229de4e8 100644 --- a/src/test/test-procfs-util.c +++ b/src/test/test-procfs-util.c @@ -3,15 +3,24 @@ #include #include "log.h" +#include "parse-util.h" #include "procfs-util.h" int main(int argc, char *argv[]) { + char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_BYTES_MAX)]; + nsec_t nsec; uint64_t v; int r; log_parse_environment(); log_open(); + assert_se(procfs_cpu_get_usage(&nsec) >= 0); + log_info("Current sytem CPU time: %s", format_timespan(buf, sizeof(buf), nsec/NSEC_PER_USEC, 1)); + + assert_se(procfs_memory_get_current(&v) >= 0); + log_info("Current memory usage: %s", format_bytes(buf, sizeof(buf), v)); + assert_se(procfs_tasks_get_current(&v) >= 0); log_info("Current number of tasks: %" PRIu64, v);