From 9c6e3e1d3b9da444a16a7b484edf8be227f808e0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 16:51:03 +0100 Subject: [PATCH 01/13] cgtop: correctly order root cgroup always to the top Internally, we encode the root cgroup as empty string. However, path_compare() is allergic to comparing absolute and relative paths. Let's clean this up, by always uses "/" as path for the root cgroup when comparing. --- src/cgtop/cgtop.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 1a73fb099d..c68b56568b 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -506,6 +506,10 @@ static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration) return 0; } +static const char *empty_to_slash(const char *p) { + return isempty(p) ? "/" : p; +} + static int group_compare(const void*a, const void *b) { const Group *x = *(Group**)a, *y = *(Group**)b; @@ -515,9 +519,9 @@ static int group_compare(const void*a, const void *b) { * recursive summing is off, since that is actually * not accumulative for all children. */ - if (path_startswith(y->path, x->path)) + if (path_startswith(empty_to_slash(y->path), empty_to_slash(x->path))) return -1; - if (path_startswith(x->path, y->path)) + if (path_startswith(empty_to_slash(x->path), empty_to_slash(y->path))) return 1; } @@ -666,7 +670,7 @@ static void display(Hashmap *a) { g = array[j]; - path = isempty(g->path) ? "/" : g->path; + path = empty_to_slash(g->path); ellipsized = ellipsize(path, path_columns, 33); printf("%-*s", path_columns, ellipsized ?: path); From 4fc9ffab3bd61357ef695bd0d9d4603039fdea4a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 16:56:12 +0100 Subject: [PATCH 02/13] cgtop: add "-1" as shortcut for "--iterations=1" This is most likely the most useful use of --iterations, and such use for numeric parameters has precedents, let's make this work for cgtop too. --- man/systemd-cgtop.xml | 6 ++++++ src/cgtop/cgtop.c | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/man/systemd-cgtop.xml b/man/systemd-cgtop.xml index d7ad08ec37..295f235196 100644 --- a/man/systemd-cgtop.xml +++ b/man/systemd-cgtop.xml @@ -228,6 +228,12 @@ indefinitely. + + + + A shortcut for . + + diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index c68b56568b..1575cd51ff 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -713,6 +713,7 @@ static void help(void) { " --recursive=BOOL Sum up process count recursively\n" " -d --delay=DELAY Delay between updates\n" " -n --iterations=N Run for N iterations before exiting\n" + " -1 Shortcut for --iterations=1\n" " -b --batch Run in batch mode, accepting no input\n" " --depth=DEPTH Maximum traversal depth (default: %u)\n" " -M --machine= Show container\n" @@ -749,7 +750,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argc >= 1); assert(argv); - while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:", options, NULL)) >= 0) + while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:1", options, NULL)) >= 0) switch (c) { @@ -802,6 +803,10 @@ static int parse_argv(int argc, char *argv[]) { break; + case '1': + arg_iterations = 1; + break; + case 'b': arg_batch = true; break; From a7e6de218b255a14229953a1f6656915ece9db1b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 16:58:35 +0100 Subject: [PATCH 03/13] cgtop: add helper for checking if we are operating on the root cgroup --- src/cgtop/cgtop.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 1575cd51ff..229aadd269 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -125,6 +125,10 @@ static const char *maybe_format_bytes(char *buf, size_t l, bool is_valid, uint64 return format_bytes(buf, l, t); } +static bool is_root_cgroup(const char *path) { + return isempty(path) || path_equal(path, "/"); +} + static int process( const char *controller, const char *path, @@ -196,7 +200,7 @@ static int process( } else if (streq(controller, "pids") && arg_count == COUNT_PIDS) { - if (isempty(path) || path_equal(path, "/")) { + if (is_root_cgroup(path)) { r = procfs_tasks_get_current(&g->n_tasks); if (r < 0) return r; From ad078b4181d4b62de4b841fcb39b5707542bd38e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 16:59:27 +0100 Subject: [PATCH 04/13] cgtop: command line parsing improvements Always output the string we were unable to parse and use log_error_errno()'s return logic to shorten our code a bit. --- src/cgtop/cgtop.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 229aadd269..4e406e4ad2 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -782,17 +782,15 @@ static int parse_argv(int argc, char *argv[]) { case ARG_DEPTH: r = safe_atou(optarg, &arg_depth); - if (r < 0) { - log_error("Failed to parse depth parameter."); - return -EINVAL; - } + if (r < 0) + return log_error_errno(r, "Failed to parse depth parameter: %s", optarg); break; case 'd': r = parse_sec(optarg, &arg_delay); if (r < 0 || arg_delay <= 0) { - log_error("Failed to parse delay parameter."); + log_error("Failed to parse delay parameter: %s", optarg); return -EINVAL; } @@ -800,10 +798,8 @@ static int parse_argv(int argc, char *argv[]) { case 'n': r = safe_atou(optarg, &arg_iterations); - if (r < 0) { - log_error("Failed to parse iterations parameter."); - return -EINVAL; - } + if (r < 0) + return log_error_errno(r, "Failed to parse iterations parameter: %s", optarg); break; @@ -866,10 +862,8 @@ static int parse_argv(int argc, char *argv[]) { case ARG_RECURSIVE: r = parse_boolean(optarg); - if (r < 0) { - log_error("Failed to parse --recursive= argument: %s", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to parse --recursive= argument: %s", optarg); arg_recursive = r; arg_recursive_unset = r == 0; From ba4b1544f2a69c6786295d437b2f970f5ed1f68f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 17:25:30 +0100 Subject: [PATCH 05/13] cgtop: tweak root cgroup detection a bit Inside a cgroup-namespaced container we shouldn't assume that "/" is really the root cgroup, because it generally is not. --- src/cgtop/cgtop.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 4e406e4ad2..27e713fd93 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -46,6 +46,7 @@ #include "terminal-util.h" #include "unit-name.h" #include "util.h" +#include "virt.h" typedef struct Group { char *path; @@ -126,6 +127,26 @@ static const char *maybe_format_bytes(char *buf, size_t l, bool is_valid, uint64 } static bool is_root_cgroup(const char *path) { + + /* Returns true if the specified path belongs to the root cgroup. The root cgroup is special on cgroupsv2 as it + * carries only very few attributes in order not to export multiple truth about system state as most + * information is available elsewhere in /proc anyway. We need to be able to deal with that, and need to get + * our data from different sources in that case. + * + * There's one extra complication in all of this, though 😣: if the path to the cgroup indicates we are in the + * root cgroup this might actually not be the case, because cgroup namespacing might be in effect + * (CLONE_NEWCGROUP). Since there's no nice way to distuingish a real cgroup root from a fake namespaced one we + * do an explicit container check here, under the assumption that CLONE_NEWCGROUP is generally used when + * container managers are used too. + * + * Note that checking for a container environment is kinda ugly, since in theory people could use cgtop from + * inside a container where cgroup namespacing is turned off to watch the host system. However, that's mostly a + * theoretic usecase, and if people actually try all they'll lose is accounting for the top-level cgroup. Which + * isn't too bad. */ + + if (detect_container() > 0) + return false; + return isempty(path) || path_equal(path, "/"); } @@ -176,7 +197,8 @@ static int process( } } - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) { + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && + IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) { _cleanup_fclose_ FILE *f = NULL; pid_t pid; From a04fcf17badf8ee2ed549bf99c4e5a37e0f7aecf Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 17:32:26 +0100 Subject: [PATCH 06/13] procfs-util: add APIs to get consumed CPU time and used memory from /proc This is preparation for emulating the "usage_usec" keyed attribute of the "cpu.stat" property of the root cgroup from data in /proc. Similar, for emulating the "memory.current" attribute. --- src/basic/procfs-util.c | 130 ++++++++++++++++++++++++++++++++++++ src/basic/procfs-util.h | 6 ++ src/test/test-procfs-util.c | 9 +++ 3 files changed, 145 insertions(+) diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c index 9bb42cc7ba..6c2cfd2918 100644 --- a/src/basic/procfs-util.c +++ b/src/basic/procfs-util.c @@ -3,6 +3,8 @@ #include #include "alloc-util.h" +#include "def.h" +#include "fd-util.h" #include "fileio.h" #include "parse-util.h" #include "process-util.h" @@ -136,3 +138,131 @@ int procfs_tasks_get_current(uint64_t *ret) { return safe_atou64(nr, ret); } + +static uint64_t calc_gcd64(uint64_t a, uint64_t b) { + + while (b > 0) { + uint64_t t; + + t = a % b; + + a = b; + b = t; + } + + return a; +} + +int procfs_cpu_get_usage(nsec_t *ret) { + _cleanup_free_ char *first_line = NULL; + unsigned long user_ticks = 0, nice_ticks = 0, system_ticks = 0, + irq_ticks = 0, softirq_ticks = 0, + guest_ticks = 0, guest_nice_ticks = 0; + long ticks_per_second; + uint64_t sum, gcd, a, b; + const char *p; + int r; + + assert(ret); + + r = read_one_line_file("/proc/stat", &first_line); + if (r < 0) + return r; + + p = first_word(first_line, "cpu"); + if (!p) + return -EINVAL; + + if (sscanf(p, "%lu %lu %lu %*u %*u %lu %lu %*u %lu %lu", + &user_ticks, + &nice_ticks, + &system_ticks, + &irq_ticks, + &softirq_ticks, + &guest_ticks, + &guest_nice_ticks) < 5) /* we only insist on the first five fields */ + return -EINVAL; + + ticks_per_second = sysconf(_SC_CLK_TCK); + if (ticks_per_second < 0) + return -errno; + assert(ticks_per_second > 0); + + sum = (uint64_t) user_ticks + (uint64_t) nice_ticks + (uint64_t) system_ticks + + (uint64_t) irq_ticks + (uint64_t) softirq_ticks + + (uint64_t) guest_ticks + (uint64_t) guest_nice_ticks; + + /* Let's reduce this fraction before we apply it to avoid overflows when converting this to µsec */ + gcd = calc_gcd64(NSEC_PER_SEC, ticks_per_second); + + a = (uint64_t) NSEC_PER_SEC / gcd; + b = (uint64_t) ticks_per_second / gcd; + + *ret = DIV_ROUND_UP((nsec_t) sum * (nsec_t) a, (nsec_t) b); + return 0; +} + +int procfs_memory_get_current(uint64_t *ret) { + uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX; + _cleanup_fclose_ FILE *f = NULL; + int r; + + assert(ret); + + f = fopen("/proc/meminfo", "re"); + if (!f) + return -errno; + + for (;;) { + _cleanup_free_ char *line = NULL; + uint64_t *v; + char *p, *e; + size_t n; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; /* EOF: Couldn't find one or both fields? */ + + p = first_word(line, "MemTotal:"); + if (p) + v = &mem_total; + else { + p = first_word(line, "MemFree:"); + if (p) + v = &mem_free; + else + continue; + } + + /* Determine length of numeric value */ + n = strspn(p, DIGITS); + if (n == 0) + return -EINVAL; + e = p + n; + + /* Ensure the line ends in " kB" */ + n = strspn(e, WHITESPACE); + if (n == 0) + return -EINVAL; + if (!streq(e + n, "kB")) + return -EINVAL; + + *e = 0; + r = safe_atou64(p, v); + if (r < 0) + return r; + if (*v == UINT64_MAX) + return -EINVAL; + + if (mem_total != UINT64_MAX && mem_free != UINT64_MAX) + break; + } + + if (mem_free > mem_total) + return -EINVAL; + + *ret = (mem_total - mem_free) * 1024U; + return 0; +} diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h index 7466acd7f3..f697ed92bc 100644 --- a/src/basic/procfs-util.h +++ b/src/basic/procfs-util.h @@ -3,6 +3,12 @@ #include +#include "time-util.h" + int procfs_tasks_get_limit(uint64_t *ret); int procfs_tasks_set_limit(uint64_t limit); int procfs_tasks_get_current(uint64_t *ret); + +int procfs_cpu_get_usage(nsec_t *ret); + +int procfs_memory_get_current(uint64_t *ret); diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c index a253182517..10229de4e8 100644 --- a/src/test/test-procfs-util.c +++ b/src/test/test-procfs-util.c @@ -3,15 +3,24 @@ #include #include "log.h" +#include "parse-util.h" #include "procfs-util.h" int main(int argc, char *argv[]) { + char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_BYTES_MAX)]; + nsec_t nsec; uint64_t v; int r; log_parse_environment(); log_open(); + assert_se(procfs_cpu_get_usage(&nsec) >= 0); + log_info("Current sytem CPU time: %s", format_timespan(buf, sizeof(buf), nsec/NSEC_PER_USEC, 1)); + + assert_se(procfs_memory_get_current(&v) >= 0); + log_info("Current memory usage: %s", format_bytes(buf, sizeof(buf), v)); + assert_se(procfs_tasks_get_current(&v) >= 0); log_info("Current number of tasks: %" PRIu64, v); From 744c39ff7e2646bcf90b9710b89e4769f8820025 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 17:34:31 +0100 Subject: [PATCH 07/13] cgtop: hook up new /proc based emulation code for root cgroup memory/cpu stats Let's make this work. --- src/cgtop/cgtop.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 27e713fd93..ccd5fef0af 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -252,7 +252,11 @@ static int process( uint64_t new_usage; nsec_t timestamp; - if (all_unified) { + if (is_root_cgroup(path)) { + r = procfs_cpu_get_usage(&new_usage); + if (r < 0) + return r; + } else if (all_unified) { const char *keys[] = { "usage_usec", NULL }; _cleanup_free_ char *val = NULL; @@ -310,24 +314,31 @@ static int process( g->cpu_iteration = iteration; } else if (streq(controller, "memory")) { - _cleanup_free_ char *p = NULL, *v = NULL; - if (all_unified) - r = cg_get_path(controller, path, "memory.current", &p); - else - r = cg_get_path(controller, path, "memory.usage_in_bytes", &p); - if (r < 0) - return r; + if (is_root_cgroup(path)) { + r = procfs_memory_get_current(&g->memory); + if (r < 0) + return r; + } else { + _cleanup_free_ char *p = NULL, *v = NULL; - r = read_one_line_file(p, &v); - if (r == -ENOENT) - return 0; - if (r < 0) - return r; + if (all_unified) + r = cg_get_path(controller, path, "memory.current", &p); + else + r = cg_get_path(controller, path, "memory.usage_in_bytes", &p); + if (r < 0) + return r; - r = safe_atou64(v, &g->memory); - if (r < 0) - return r; + r = read_one_line_file(p, &v); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + r = safe_atou64(v, &g->memory); + if (r < 0) + return r; + } if (g->memory > 0) g->memory_valid = true; From b734a4ff1433e3804a0cbf0801fb81a059f97a79 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 18:35:52 +0100 Subject: [PATCH 08/13] cgroup-util: rework cg_get_keyed_attribute() a bit Let's make sure we don't clobber the return parameter on failure, to follow our coding style. Also, break the loop early if we have all attributes we need. This also changes the keys parameter to a simple char**, so that we can use STRV_MAKE() for passing the list of attributes to read. This also makes it possible to distuingish the case when the whole attribute file doesn't exist from one key in it missing. In the former case we return -ENOENT, in the latter we now return -ENXIO. --- src/basic/cgroup-util.c | 91 +++++++++++++++++++++++++++++++---------- src/basic/cgroup-util.h | 2 +- src/cgtop/cgtop.c | 5 +-- src/core/cgroup.c | 5 ++- 4 files changed, 76 insertions(+), 27 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 7c0ba92110..7944785bbf 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2032,46 +2032,95 @@ int cg_get_attribute(const char *controller, const char *path, const char *attri return read_one_line_file(p, ret); } -int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) { - _cleanup_free_ char *filename = NULL, *content = NULL; - char *line, *p; - int i, r; +int cg_get_keyed_attribute( + const char *controller, + const char *path, + const char *attribute, + char **keys, + char **ret_values) { - for (i = 0; keys[i]; i++) - values[i] = NULL; + _cleanup_free_ char *filename = NULL, *contents = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *p; + size_t n, i; + char **v; + int r; + + /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with + * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of + * entries as 'keys'. On success each entry will be set to the value of the matching key. + * + * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */ r = cg_get_path(controller, path, attribute, &filename); if (r < 0) return r; - r = read_full_file(filename, &content, NULL); + r = read_full_file(filename, &contents, NULL); if (r < 0) return r; - p = content; - while ((line = strsep(&p, "\n"))) { - char *key; + n = strv_length(keys); + if (n == 0) /* No keys to retrieve? That's easy, we are done then */ + return 0; - key = strsep(&line, " "); + /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */ + v = newa0(char*, n); - for (i = 0; keys[i]; i++) { - if (streq(key, keys[i])) { - values[i] = strdup(line); - break; + for (p = contents; *p;) { + const char *w = NULL; + size_t n_done = 0; + + for (i = 0; i < n; i++) { + if (v[i]) + n_done ++; + else { + w = first_word(p, keys[i]); + if (w) + break; } } - } - for (i = 0; keys[i]; i++) { - if (!values[i]) { - for (i = 0; keys[i]; i++) { - values[i] = mfree(values[i]); + if (w) { + char *c; + size_t l; + + l = strcspn(w, NEWLINE); + c = strndup(w, l); + if (!c) { + r = -ENOMEM; + goto fail; } - return -ENOENT; + + v[i] = c; + n_done++; + + if (n_done >= n) + goto done; + + p = w + l; + } else { + if (n_done >= n) + goto done; + + p += strcspn(p, NEWLINE); } + + p += strspn(p, NEWLINE); } + r = -ENXIO; + +fail: + for (i = 0; i < n; i++) + free(v[i]); + + return r; + +done: + memcpy(ret_values, v, sizeof(char*) * n); return 0; + } int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 05c9f84505..068df102f7 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -186,7 +186,7 @@ int cg_create_and_attach(const char *controller, const char *path, pid_t pid); int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value); int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret); -int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values); +int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values); int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid); diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index ccd5fef0af..d7708dab2d 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -257,14 +257,13 @@ static int process( if (r < 0) return r; } else if (all_unified) { - const char *keys[] = { "usage_usec", NULL }; _cleanup_free_ char *val = NULL; if (!streq(controller, "cpu")) return 0; - r = cg_get_keyed_attribute("cpu", path, "cpu.stat", keys, &val); - if (r == -ENOENT) + r = cg_get_keyed_attribute("cpu", path, "cpu.stat", STRV_MAKE("usage_usec"), &val); + if (IN_SET(r, -ENOENT, -ENXIO)) return 0; if (r < 0) return r; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 97b3756567..8b760a9888 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2358,16 +2358,17 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { if (r < 0) return r; if (r > 0) { - const char *keys[] = { "usage_usec", NULL }; _cleanup_free_ char *val = NULL; uint64_t us; if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0) return -ENODATA; - r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val); + r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val); if (r < 0) return r; + if (IN_SET(r, -ENOENT, -ENXIO)) + return -ENODATA; r = safe_atou64(val, &us); if (r < 0) From 1f73aa0021417c8ba70c78d16d6f68d3032db4ac Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 19:05:59 +0100 Subject: [PATCH 09/13] core: hook up /proc queries for the root slice, too Do what we already prepped in cgtop for the root slice in PID 1 too: consult /proc for the data we need. --- src/core/cgroup.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 8b760a9888..924285de82 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2296,6 +2296,10 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { if (!u->cgroup_path) return -ENODATA; + /* The root cgroup doesn't expose this information, let's get it from /proc instead */ + if (unit_has_root_cgroup(u)) + return procfs_memory_get_current(ret); + if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) return -ENODATA; @@ -2327,13 +2331,13 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret) { if (!u->cgroup_path) return -ENODATA; - if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0) - return -ENODATA; - /* The root cgroup doesn't expose this information, let's get it from /proc instead */ if (unit_has_root_cgroup(u)) return procfs_tasks_get_current(ret); + if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0) + return -ENODATA; + r = cg_get_attribute("pids", u->cgroup_path, "pids.current", &v); if (r == -ENOENT) return -ENODATA; @@ -2354,6 +2358,10 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { if (!u->cgroup_path) return -ENODATA; + /* The root cgroup doesn't expose this information, let's get it from /proc instead */ + if (unit_has_root_cgroup(u)) + return procfs_cpu_get_usage(ret); + r = cg_all_unified(); if (r < 0) return r; From cc6271f17d3c5c200e8a391f10d0afb71403fc28 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 19:07:01 +0100 Subject: [PATCH 10/13] core: turn on memory/cpu/tasks accounting by default for the root slice The kernel exposes the necessary data in /proc anyway, let's expose it hence by default. With this in place "systemctl status -- -.slice" will show accounting data out-of-the-box now. --- src/core/cgroup.c | 27 ++++++++++++++++++++++----- src/core/cgroup.h | 1 + src/core/slice.c | 32 +++++++++++++++++++++++++------- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 924285de82..0527996c28 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -37,19 +37,34 @@ #include "stdio-util.h" #include "string-table.h" #include "string-util.h" +#include "virt.h" #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) +bool manager_owns_root_cgroup(Manager *m) { + assert(m); + + /* Returns true if we are managing the root cgroup. Note that it isn't sufficient to just check whether the + * group root path equals "/" since that will also be the case if CLONE_NEWCGROUP is in the mix. Since there's + * appears to be no nice way to detect whether we are in a CLONE_NEWCGROUP namespace we instead just check if + * we run in any kind of container virtualization. */ + + if (detect_container() > 0) + return false; + + return isempty(m->cgroup_root) || path_equal(m->cgroup_root, "/"); +} + bool unit_has_root_cgroup(Unit *u) { assert(u); - /* Returns whether this unit manages the root cgroup. Note that this is different from being named "-.slice", - * as inside of containers the root slice won't be identical to the root cgroup. */ + /* Returns whether this unit manages the root cgroup. This will return true if this unit is the root slice and + * the manager manages the root cgroup. */ - if (!u->cgroup_path) + if (!manager_owns_root_cgroup(u->manager)) return false; - return isempty(u->cgroup_path) || path_equal(u->cgroup_path, "/"); + return unit_has_name(u, SPECIAL_ROOT_SLICE); } static void cgroup_compat_warn(void) { @@ -58,7 +73,9 @@ static void cgroup_compat_warn(void) { if (cgroup_compat_warned) return; - log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. See cgroup-compat debug messages for details."); + log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. " + "See cgroup-compat debug messages for details."); + cgroup_compat_warned = true; } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 1f50441412..bc8a6951c9 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -208,6 +208,7 @@ int unit_reset_ip_accounting(Unit *u); cc ? cc->name : false; \ }) +bool manager_owns_root_cgroup(Manager *m); bool unit_has_root_cgroup(Unit *u); int manager_notify_cgroup_empty(Manager *m, const char *group); diff --git a/src/core/slice.c b/src/core/slice.c index ef2177279a..9cb828cae1 100644 --- a/src/core/slice.c +++ b/src/core/slice.c @@ -313,19 +313,18 @@ _pure_ static const char *slice_sub_state_to_string(Unit *u) { return slice_state_to_string(SLICE(u)->state); } -static void slice_enumerate_perpetual(Manager *m, const char *name) { +static int slice_make_perpetual(Manager *m, const char *name, Unit **ret) { Unit *u; int r; assert(m); + assert(name); u = manager_get_unit(m, name); if (!u) { r = unit_new_for_name(m, sizeof(Slice), name, &u); - if (r < 0) { - log_error_errno(r, "Failed to allocate the special %s unit: %m", name); - return; - } + if (r < 0) + return log_error_errno(r, "Failed to allocate the special %s unit: %m", name); } u->perpetual = true; @@ -333,15 +332,34 @@ static void slice_enumerate_perpetual(Manager *m, const char *name) { unit_add_to_load_queue(u); unit_add_to_dbus_queue(u); + + if (ret) + *ret = u; + + return 0; } static void slice_enumerate(Manager *m) { + Unit *u; + int r; + assert(m); - slice_enumerate_perpetual(m, SPECIAL_ROOT_SLICE); + r = slice_make_perpetual(m, SPECIAL_ROOT_SLICE, &u); + if (r >= 0 && manager_owns_root_cgroup(m)) { + Slice *s = SLICE(u); + + /* If we are managing the root cgroup then this means our root slice covers the whole system, which + * means the kernel will track CPU/tasks/memory for us anyway, and it is all available in /proc. Let's + * hence turn accounting on here, so that our APIs to query this data are available. */ + + s->cgroup_context.cpu_accounting = true; + s->cgroup_context.tasks_accounting = true; + s->cgroup_context.memory_accounting = true; + } if (MANAGER_IS_SYSTEM(m)) - slice_enumerate_perpetual(m, SPECIAL_SYSTEM_SLICE); + (void) slice_make_perpetual(m, SPECIAL_SYSTEM_SLICE, NULL); } const UnitVTable slice_vtable = { From c81a2569143455880f0177c918a477c1b1207fe5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 19:22:40 +0100 Subject: [PATCH 11/13] mkosi: no need to determine meson parameters if we don't run meson Small optimization. --- mkosi.build | 67 ++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/mkosi.build b/mkosi.build index 38cfe25025..0e644d54cb 100755 --- a/mkosi.build +++ b/mkosi.build @@ -27,43 +27,46 @@ set -ex export LC_CTYPE=en_US.UTF-8 -sysvinit_path=`realpath /etc/init.d` +if [ ! -f "$BUILDDIR"/build.ninja ] ; then + sysvinit_path=`realpath /etc/init.d` -nobody_user=`id -u -n 65534 2> /dev/null` -if [ "$nobody_user" != "" ] ; then - # Validate that we can translate forth and back - if [ "`id -u $nobody_user`" != 65534 ] ; then - nobody_user="" + nobody_user=`id -u -n 65534 2> /dev/null` + if [ "$nobody_user" != "" ] ; then + # Validate that we can translate forth and back + if [ "`id -u $nobody_user`" != 65534 ] ; then + nobody_user="" + fi fi -fi -if [ "$nobody_user" = "" ] ; then - if id -u nobody 2> /dev/null ; then - # The "nobody" user is defined already for something else, pick the Fedora name - nobody_user=nfsnobody - else - # The "nobody" user name is free, use it - nobody_user=nobody + if [ "$nobody_user" = "" ] ; then + if id -u nobody 2> /dev/null ; then + # The "nobody" user is defined already for something else, pick the Fedora name + nobody_user=nfsnobody + else + # The "nobody" user name is free, use it + nobody_user=nobody + fi fi + + nobody_group=`id -g -n 65534 2> /dev/null` + if [ "$nobody_group" != "" ] ; then + # Validate that we can translate forth and back + if [ "`id -g $nobody_group`" != 65534 ] ; then + nobody_group="" + fi + fi + if [ "$nobody_group" = "" ] ; then + if id -u nobody 2> /dev/null ; then + # The "nobody" group is defined already for something else, pick the Fedora name + nobody_group=nfsnobody + else + # The "nobody" group name is free, use it + nobody_group=nobody + fi + fi + + meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path" -D default-hierarchy=unified -D man=false -D "nobody-user=$nobody_user" -D "nobody-group=$nobody_group" fi -nobody_group=`id -g -n 65534 2> /dev/null` -if [ "$nobody_group" != "" ] ; then - # Validate that we can translate forth and back - if [ "`id -g $nobody_group`" != 65534 ] ; then - nobody_group="" - fi -fi -if [ "$nobody_group" = "" ] ; then - if id -u nobody 2> /dev/null ; then - # The "nobody" group is defined already for something else, pick the Fedora name - nobody_group=nfsnobody - else - # The "nobody" group name is free, use it - nobody_group=nobody - fi -fi - -[ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path" -D default-hierarchy=unified -D man=false -D "nobody-user=$nobody_user" -D "nobody-group=$nobody_group" ninja -C "$BUILDDIR" all [ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" ) ninja -C "$BUILDDIR" install From 972d4398bd5e60e34e753041d280d744729308cc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Feb 2018 19:43:35 +0100 Subject: [PATCH 12/13] update TODO --- TODO | 9 --------- 1 file changed, 9 deletions(-) diff --git a/TODO b/TODO index a6cdae84a0..058cd2baf8 100644 --- a/TODO +++ b/TODO @@ -45,15 +45,6 @@ Features: sd_id128_get_machine_app_specific(). After all on long-running systems both IDs have similar properties. -* emulate properties of the root cgroup on controllers that don't support such - properties natively on cpu/io/memory, the way we already do it for - "pids". Also, add the same logic to cgtop. - -* set TasksAccounting=1 on the root slice if we are running on the root cgroup, - and similar for the others, as soon as we emulate them properly. After all, - Linux keeps these system-wide stats anyway, and it costs nothing to expose - them. - * sd-bus: add vtable flag, that may be used to request client creds implicitly and asynchronously before dispatching the operation From 9177fa9f2b59502fbd433af88f1173a705b5fb27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 1 Mar 2018 09:30:55 +0100 Subject: [PATCH 13/13] basic/cgroup-util: simplify cg_get_keyed_attribute(), add test I didn't like the nested loop where we'd count what we have acquired already, since we should always know that. --- src/basic/cgroup-util.c | 23 ++++++--------------- src/test/test-cgroup-util.c | 41 +++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 7944785bbf..9c3905e91e 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2042,7 +2042,7 @@ int cg_get_keyed_attribute( _cleanup_free_ char *filename = NULL, *contents = NULL; _cleanup_fclose_ FILE *f = NULL; const char *p; - size_t n, i; + size_t n, i, n_done = 0; char **v; int r; @@ -2069,42 +2069,31 @@ int cg_get_keyed_attribute( for (p = contents; *p;) { const char *w = NULL; - size_t n_done = 0; - for (i = 0; i < n; i++) { - if (v[i]) - n_done ++; - else { + for (i = 0; i < n; i++) + if (!v[i]) { w = first_word(p, keys[i]); if (w) break; } - } if (w) { - char *c; size_t l; l = strcspn(w, NEWLINE); - c = strndup(w, l); - if (!c) { + v[i] = strndup(w, l); + if (!v[i]) { r = -ENOMEM; goto fail; } - v[i] = c; n_done++; - if (n_done >= n) goto done; p = w + l; - } else { - if (n_done >= n) - goto done; - + } else p += strcspn(p, NEWLINE); - } p += strspn(p, NEWLINE); } diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c index 2248a30635..c4163fc3a9 100644 --- a/src/test/test-cgroup-util.c +++ b/src/test/test-cgroup-util.c @@ -30,6 +30,7 @@ #include "special.h" #include "stat-util.h" #include "string-util.h" +#include "strv.h" #include "test-helper.h" #include "user-util.h" #include "util.h" @@ -404,6 +405,45 @@ static void test_cg_tests(void) { assert_se(!systemd); } +static void test_cg_get_keyed_attribute(void) { + _cleanup_free_ char *val = NULL; + char *vals3[3] = {}, *vals3a[3] = {}; + int i; + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "no_such_file", STRV_MAKE("no_such_attr"), &val) == -ENOENT); + assert_se(val == NULL); + + if (access("/sys/fs/cgroup/init.scope/cpu.stat", R_OK) < 0) { + log_info_errno(errno, "Skipping most of %s, /init.scope/cpu.stat not accessible: %m", __func__); + return; + } + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("no_such_attr"), &val) == -ENXIO); + assert_se(val == NULL); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec"), &val) == 0); + log_info("cpu /init.scope cpu.stat [usage_usec] → \"%s\"", val); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "no_such_attr"), vals3) == -ENXIO); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "usage_usec"), vals3) == -ENXIO); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", + STRV_MAKE("usage_usec", "user_usec", "system_usec"), vals3) == 0); + log_info("cpu /init.scope cpu.stat [usage_usec user_usec system_usec] → \"%s\", \"%s\", \"%s\"", + vals3[0], vals3[1], vals3[2]); + + assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", + STRV_MAKE("system_usec", "user_usec", "usage_usec"), vals3a) == 0); + log_info("cpu /init.scope cpu.stat [system_usec user_usec usage_usec] → \"%s\", \"%s\", \"%s\"", + vals3a[0], vals3a[1], vals3a[2]); + + for (i = 0; i < 3; i++) { + free(vals3[i]); + free(vals3a[i]); + } +} + int main(void) { log_set_max_level(LOG_DEBUG); log_parse_environment(); @@ -429,6 +469,7 @@ int main(void) { test_is_wanted_print(false); /* run twice to test caching */ test_is_wanted(); test_cg_tests(); + test_cg_get_keyed_attribute(); return 0; }