diff --git a/TODO b/TODO
index 383b45af54..5fb0c59d1a 100644
--- a/TODO
+++ b/TODO
@@ -59,15 +59,6 @@ Features:
sd_id128_get_machine_app_specific(). After all on long-running systems both
IDs have similar properties.
-* emulate properties of the root cgroup on controllers that don't support such
- properties natively on cpu/io/memory, the way we already do it for
- "pids". Also, add the same logic to cgtop.
-
-* set TasksAccounting=1 on the root slice if we are running on the root cgroup,
- and similar for the others, as soon as we emulate them properly. After all,
- Linux keeps these system-wide stats anyway, and it costs nothing to expose
- them.
-
* sd-bus: add vtable flag, that may be used to request client creds implicitly
and asynchronously before dispatching the operation
diff --git a/man/systemd-cgtop.xml b/man/systemd-cgtop.xml
index d7ad08ec37..295f235196 100644
--- a/man/systemd-cgtop.xml
+++ b/man/systemd-cgtop.xml
@@ -228,6 +228,12 @@
indefinitely.
+
+
+
+ A shortcut for .
+
+
diff --git a/mkosi.build b/mkosi.build
index 38cfe25025..0e644d54cb 100755
--- a/mkosi.build
+++ b/mkosi.build
@@ -27,43 +27,46 @@ set -ex
export LC_CTYPE=en_US.UTF-8
-sysvinit_path=`realpath /etc/init.d`
+if [ ! -f "$BUILDDIR"/build.ninja ] ; then
+ sysvinit_path=`realpath /etc/init.d`
-nobody_user=`id -u -n 65534 2> /dev/null`
-if [ "$nobody_user" != "" ] ; then
- # Validate that we can translate forth and back
- if [ "`id -u $nobody_user`" != 65534 ] ; then
- nobody_user=""
+ nobody_user=`id -u -n 65534 2> /dev/null`
+ if [ "$nobody_user" != "" ] ; then
+ # Validate that we can translate forth and back
+ if [ "`id -u $nobody_user`" != 65534 ] ; then
+ nobody_user=""
+ fi
fi
-fi
-if [ "$nobody_user" = "" ] ; then
- if id -u nobody 2> /dev/null ; then
- # The "nobody" user is defined already for something else, pick the Fedora name
- nobody_user=nfsnobody
- else
- # The "nobody" user name is free, use it
- nobody_user=nobody
+ if [ "$nobody_user" = "" ] ; then
+ if id -u nobody 2> /dev/null ; then
+ # The "nobody" user is defined already for something else, pick the Fedora name
+ nobody_user=nfsnobody
+ else
+ # The "nobody" user name is free, use it
+ nobody_user=nobody
+ fi
fi
+
+ nobody_group=`id -g -n 65534 2> /dev/null`
+ if [ "$nobody_group" != "" ] ; then
+ # Validate that we can translate forth and back
+ if [ "`id -g $nobody_group`" != 65534 ] ; then
+ nobody_group=""
+ fi
+ fi
+ if [ "$nobody_group" = "" ] ; then
+ if id -u nobody 2> /dev/null ; then
+ # The "nobody" group is defined already for something else, pick the Fedora name
+ nobody_group=nfsnobody
+ else
+ # The "nobody" group name is free, use it
+ nobody_group=nobody
+ fi
+ fi
+
+ meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path" -D default-hierarchy=unified -D man=false -D "nobody-user=$nobody_user" -D "nobody-group=$nobody_group"
fi
-nobody_group=`id -g -n 65534 2> /dev/null`
-if [ "$nobody_group" != "" ] ; then
- # Validate that we can translate forth and back
- if [ "`id -g $nobody_group`" != 65534 ] ; then
- nobody_group=""
- fi
-fi
-if [ "$nobody_group" = "" ] ; then
- if id -u nobody 2> /dev/null ; then
- # The "nobody" group is defined already for something else, pick the Fedora name
- nobody_group=nfsnobody
- else
- # The "nobody" group name is free, use it
- nobody_group=nobody
- fi
-fi
-
-[ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR" -D "sysvinit-path=$sysvinit_path" -D default-hierarchy=unified -D man=false -D "nobody-user=$nobody_user" -D "nobody-group=$nobody_group"
ninja -C "$BUILDDIR" all
[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" )
ninja -C "$BUILDDIR" install
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 52ae37e6b4..9a4dd72270 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -2030,46 +2030,84 @@ int cg_get_attribute(const char *controller, const char *path, const char *attri
return read_one_line_file(p, ret);
}
-int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
- _cleanup_free_ char *filename = NULL, *content = NULL;
- char *line, *p;
- int i, r;
+int cg_get_keyed_attribute(
+ const char *controller,
+ const char *path,
+ const char *attribute,
+ char **keys,
+ char **ret_values) {
- for (i = 0; keys[i]; i++)
- values[i] = NULL;
+ _cleanup_free_ char *filename = NULL, *contents = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+ size_t n, i, n_done = 0;
+ char **v;
+ int r;
+
+ /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with
+ * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
+ * entries as 'keys'. On success each entry will be set to the value of the matching key.
+ *
+ * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
r = cg_get_path(controller, path, attribute, &filename);
if (r < 0)
return r;
- r = read_full_file(filename, &content, NULL);
+ r = read_full_file(filename, &contents, NULL);
if (r < 0)
return r;
- p = content;
- while ((line = strsep(&p, "\n"))) {
- char *key;
+ n = strv_length(keys);
+ if (n == 0) /* No keys to retrieve? That's easy, we are done then */
+ return 0;
- key = strsep(&line, " ");
+ /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
+ v = newa0(char*, n);
- for (i = 0; keys[i]; i++) {
- if (streq(key, keys[i])) {
- values[i] = strdup(line);
- break;
+ for (p = contents; *p;) {
+ const char *w = NULL;
+
+ for (i = 0; i < n; i++)
+ if (!v[i]) {
+ w = first_word(p, keys[i]);
+ if (w)
+ break;
}
- }
+
+ if (w) {
+ size_t l;
+
+ l = strcspn(w, NEWLINE);
+ v[i] = strndup(w, l);
+ if (!v[i]) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n_done++;
+ if (n_done >= n)
+ goto done;
+
+ p = w + l;
+ } else
+ p += strcspn(p, NEWLINE);
+
+ p += strspn(p, NEWLINE);
}
- for (i = 0; keys[i]; i++) {
- if (!values[i]) {
- for (i = 0; keys[i]; i++) {
- values[i] = mfree(values[i]);
- }
- return -ENOENT;
- }
- }
+ r = -ENXIO;
+fail:
+ for (i = 0; i < n; i++)
+ free(v[i]);
+
+ return r;
+
+done:
+ memcpy(ret_values, v, sizeof(char*) * n);
return 0;
+
}
int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index 05c9f84505..068df102f7 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -186,7 +186,7 @@ int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
-int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values);
+int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values);
int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid);
diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c
index 9bb42cc7ba..6c2cfd2918 100644
--- a/src/basic/procfs-util.c
+++ b/src/basic/procfs-util.c
@@ -3,6 +3,8 @@
#include
#include "alloc-util.h"
+#include "def.h"
+#include "fd-util.h"
#include "fileio.h"
#include "parse-util.h"
#include "process-util.h"
@@ -136,3 +138,131 @@ int procfs_tasks_get_current(uint64_t *ret) {
return safe_atou64(nr, ret);
}
+
+static uint64_t calc_gcd64(uint64_t a, uint64_t b) {
+
+ while (b > 0) {
+ uint64_t t;
+
+ t = a % b;
+
+ a = b;
+ b = t;
+ }
+
+ return a;
+}
+
+int procfs_cpu_get_usage(nsec_t *ret) {
+ _cleanup_free_ char *first_line = NULL;
+ unsigned long user_ticks = 0, nice_ticks = 0, system_ticks = 0,
+ irq_ticks = 0, softirq_ticks = 0,
+ guest_ticks = 0, guest_nice_ticks = 0;
+ long ticks_per_second;
+ uint64_t sum, gcd, a, b;
+ const char *p;
+ int r;
+
+ assert(ret);
+
+ r = read_one_line_file("/proc/stat", &first_line);
+ if (r < 0)
+ return r;
+
+ p = first_word(first_line, "cpu");
+ if (!p)
+ return -EINVAL;
+
+ if (sscanf(p, "%lu %lu %lu %*u %*u %lu %lu %*u %lu %lu",
+ &user_ticks,
+ &nice_ticks,
+ &system_ticks,
+ &irq_ticks,
+ &softirq_ticks,
+ &guest_ticks,
+ &guest_nice_ticks) < 5) /* we only insist on the first five fields */
+ return -EINVAL;
+
+ ticks_per_second = sysconf(_SC_CLK_TCK);
+ if (ticks_per_second < 0)
+ return -errno;
+ assert(ticks_per_second > 0);
+
+ sum = (uint64_t) user_ticks + (uint64_t) nice_ticks + (uint64_t) system_ticks +
+ (uint64_t) irq_ticks + (uint64_t) softirq_ticks +
+ (uint64_t) guest_ticks + (uint64_t) guest_nice_ticks;
+
+ /* Let's reduce this fraction before we apply it to avoid overflows when converting this to µsec */
+ gcd = calc_gcd64(NSEC_PER_SEC, ticks_per_second);
+
+ a = (uint64_t) NSEC_PER_SEC / gcd;
+ b = (uint64_t) ticks_per_second / gcd;
+
+ *ret = DIV_ROUND_UP((nsec_t) sum * (nsec_t) a, (nsec_t) b);
+ return 0;
+}
+
+int procfs_memory_get_current(uint64_t *ret) {
+ uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(ret);
+
+ f = fopen("/proc/meminfo", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uint64_t *v;
+ char *p, *e;
+ size_t n;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL; /* EOF: Couldn't find one or both fields? */
+
+ p = first_word(line, "MemTotal:");
+ if (p)
+ v = &mem_total;
+ else {
+ p = first_word(line, "MemFree:");
+ if (p)
+ v = &mem_free;
+ else
+ continue;
+ }
+
+ /* Determine length of numeric value */
+ n = strspn(p, DIGITS);
+ if (n == 0)
+ return -EINVAL;
+ e = p + n;
+
+ /* Ensure the line ends in " kB" */
+ n = strspn(e, WHITESPACE);
+ if (n == 0)
+ return -EINVAL;
+ if (!streq(e + n, "kB"))
+ return -EINVAL;
+
+ *e = 0;
+ r = safe_atou64(p, v);
+ if (r < 0)
+ return r;
+ if (*v == UINT64_MAX)
+ return -EINVAL;
+
+ if (mem_total != UINT64_MAX && mem_free != UINT64_MAX)
+ break;
+ }
+
+ if (mem_free > mem_total)
+ return -EINVAL;
+
+ *ret = (mem_total - mem_free) * 1024U;
+ return 0;
+}
diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h
index 7466acd7f3..f697ed92bc 100644
--- a/src/basic/procfs-util.h
+++ b/src/basic/procfs-util.h
@@ -3,6 +3,12 @@
#include
+#include "time-util.h"
+
int procfs_tasks_get_limit(uint64_t *ret);
int procfs_tasks_set_limit(uint64_t limit);
int procfs_tasks_get_current(uint64_t *ret);
+
+int procfs_cpu_get_usage(nsec_t *ret);
+
+int procfs_memory_get_current(uint64_t *ret);
diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c
index 413946182c..4cab5a253c 100644
--- a/src/cgtop/cgtop.c
+++ b/src/cgtop/cgtop.c
@@ -46,6 +46,7 @@
#include "terminal-util.h"
#include "unit-name.h"
#include "util.h"
+#include "virt.h"
typedef struct Group {
char *path;
@@ -125,6 +126,30 @@ static const char *maybe_format_bytes(char *buf, size_t l, bool is_valid, uint64
return format_bytes(buf, l, t);
}
+static bool is_root_cgroup(const char *path) {
+
+ /* Returns true if the specified path belongs to the root cgroup. The root cgroup is special on cgroupsv2 as it
+ * carries only very few attributes in order not to export multiple truth about system state as most
+ * information is available elsewhere in /proc anyway. We need to be able to deal with that, and need to get
+ * our data from different sources in that case.
+ *
+ * There's one extra complication in all of this, though 😣: if the path to the cgroup indicates we are in the
+ * root cgroup this might actually not be the case, because cgroup namespacing might be in effect
+ * (CLONE_NEWCGROUP). Since there's no nice way to distuingish a real cgroup root from a fake namespaced one we
+ * do an explicit container check here, under the assumption that CLONE_NEWCGROUP is generally used when
+ * container managers are used too.
+ *
+ * Note that checking for a container environment is kinda ugly, since in theory people could use cgtop from
+ * inside a container where cgroup namespacing is turned off to watch the host system. However, that's mostly a
+ * theoretic usecase, and if people actually try all they'll lose is accounting for the top-level cgroup. Which
+ * isn't too bad. */
+
+ if (detect_container() > 0)
+ return false;
+
+ return isempty(path) || path_equal(path, "/");
+}
+
static int process(
const char *controller,
const char *path,
@@ -172,7 +197,8 @@ static int process(
}
}
- if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) {
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) &&
+ IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) {
_cleanup_fclose_ FILE *f = NULL;
pid_t pid;
@@ -196,7 +222,7 @@ static int process(
} else if (streq(controller, "pids") && arg_count == COUNT_PIDS) {
- if (isempty(path) || path_equal(path, "/")) {
+ if (is_root_cgroup(path)) {
r = procfs_tasks_get_current(&g->n_tasks);
if (r < 0)
return r;
@@ -226,15 +252,18 @@ static int process(
uint64_t new_usage;
nsec_t timestamp;
- if (all_unified) {
- const char *keys[] = { "usage_usec", NULL };
+ if (is_root_cgroup(path)) {
+ r = procfs_cpu_get_usage(&new_usage);
+ if (r < 0)
+ return r;
+ } else if (all_unified) {
_cleanup_free_ char *val = NULL;
if (!streq(controller, "cpu"))
return 0;
- r = cg_get_keyed_attribute("cpu", path, "cpu.stat", keys, &val);
- if (r == -ENOENT)
+ r = cg_get_keyed_attribute("cpu", path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
+ if (IN_SET(r, -ENOENT, -ENXIO))
return 0;
if (r < 0)
return r;
@@ -284,24 +313,31 @@ static int process(
g->cpu_iteration = iteration;
} else if (streq(controller, "memory")) {
- _cleanup_free_ char *p = NULL, *v = NULL;
- if (all_unified)
- r = cg_get_path(controller, path, "memory.current", &p);
- else
- r = cg_get_path(controller, path, "memory.usage_in_bytes", &p);
- if (r < 0)
- return r;
+ if (is_root_cgroup(path)) {
+ r = procfs_memory_get_current(&g->memory);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *p = NULL, *v = NULL;
- r = read_one_line_file(p, &v);
- if (r == -ENOENT)
- return 0;
- if (r < 0)
- return r;
+ if (all_unified)
+ r = cg_get_path(controller, path, "memory.current", &p);
+ else
+ r = cg_get_path(controller, path, "memory.usage_in_bytes", &p);
+ if (r < 0)
+ return r;
- r = safe_atou64(v, &g->memory);
- if (r < 0)
- return r;
+ r = read_one_line_file(p, &v);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &g->memory);
+ if (r < 0)
+ return r;
+ }
if (g->memory > 0)
g->memory_valid = true;
@@ -506,6 +542,10 @@ static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration)
return 0;
}
+static const char *empty_to_slash(const char *p) {
+ return isempty(p) ? "/" : p;
+}
+
static int group_compare(const void*a, const void *b) {
const Group *x = *(Group**)a, *y = *(Group**)b;
@@ -515,9 +555,9 @@ static int group_compare(const void*a, const void *b) {
* recursive summing is off, since that is actually
* not accumulative for all children. */
- if (path_startswith(y->path, x->path))
+ if (path_startswith(empty_to_slash(y->path), empty_to_slash(x->path)))
return -1;
- if (path_startswith(x->path, y->path))
+ if (path_startswith(empty_to_slash(x->path), empty_to_slash(y->path)))
return 1;
}
@@ -666,7 +706,7 @@ static void display(Hashmap *a) {
g = array[j];
- path = isempty(g->path) ? "/" : g->path;
+ path = empty_to_slash(g->path);
ellipsized = ellipsize(path, path_columns, 33);
printf("%-*s", path_columns, ellipsized ?: path);
@@ -709,6 +749,7 @@ static void help(void) {
" --recursive=BOOL Sum up process count recursively\n"
" -d --delay=DELAY Delay between updates\n"
" -n --iterations=N Run for N iterations before exiting\n"
+ " -1 Shortcut for --iterations=1\n"
" -b --batch Run in batch mode, accepting no input\n"
" --depth=DEPTH Maximum traversal depth (default: %u)\n"
" -M --machine= Show container\n"
@@ -745,7 +786,7 @@ static int parse_argv(int argc, char *argv[]) {
assert(argc >= 1);
assert(argv);
- while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:", options, NULL)) >= 0)
+ while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:1", options, NULL)) >= 0)
switch (c) {
@@ -773,17 +814,15 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_DEPTH:
r = safe_atou(optarg, &arg_depth);
- if (r < 0) {
- log_error("Failed to parse depth parameter.");
- return -EINVAL;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse depth parameter: %s", optarg);
break;
case 'd':
r = parse_sec(optarg, &arg_delay);
if (r < 0 || arg_delay <= 0) {
- log_error("Failed to parse delay parameter.");
+ log_error("Failed to parse delay parameter: %s", optarg);
return -EINVAL;
}
@@ -791,13 +830,15 @@ static int parse_argv(int argc, char *argv[]) {
case 'n':
r = safe_atou(optarg, &arg_iterations);
- if (r < 0) {
- log_error("Failed to parse iterations parameter.");
- return -EINVAL;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse iterations parameter: %s", optarg);
break;
+ case '1':
+ arg_iterations = 1;
+ break;
+
case 'b':
arg_batch = true;
break;
@@ -853,10 +894,8 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_RECURSIVE:
r = parse_boolean(optarg);
- if (r < 0) {
- log_error("Failed to parse --recursive= argument: %s", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --recursive= argument: %s", optarg);
arg_recursive = r;
arg_recursive_unset = r == 0;
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 52431ec12f..3c0ff09639 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -38,19 +38,34 @@
#include "stdio-util.h"
#include "string-table.h"
#include "string-util.h"
+#include "virt.h"
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
+bool manager_owns_root_cgroup(Manager *m) {
+ assert(m);
+
+ /* Returns true if we are managing the root cgroup. Note that it isn't sufficient to just check whether the
+ * group root path equals "/" since that will also be the case if CLONE_NEWCGROUP is in the mix. Since there's
+ * appears to be no nice way to detect whether we are in a CLONE_NEWCGROUP namespace we instead just check if
+ * we run in any kind of container virtualization. */
+
+ if (detect_container() > 0)
+ return false;
+
+ return isempty(m->cgroup_root) || path_equal(m->cgroup_root, "/");
+}
+
bool unit_has_root_cgroup(Unit *u) {
assert(u);
- /* Returns whether this unit manages the root cgroup. Note that this is different from being named "-.slice",
- * as inside of containers the root slice won't be identical to the root cgroup. */
+ /* Returns whether this unit manages the root cgroup. This will return true if this unit is the root slice and
+ * the manager manages the root cgroup. */
- if (!u->cgroup_path)
+ if (!manager_owns_root_cgroup(u->manager))
return false;
- return isempty(u->cgroup_path) || path_equal(u->cgroup_path, "/");
+ return unit_has_name(u, SPECIAL_ROOT_SLICE);
}
static void cgroup_compat_warn(void) {
@@ -59,7 +74,9 @@ static void cgroup_compat_warn(void) {
if (cgroup_compat_warned)
return;
- log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. See cgroup-compat debug messages for details.");
+ log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. "
+ "See cgroup-compat debug messages for details.");
+
cgroup_compat_warned = true;
}
@@ -2406,6 +2423,10 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
if (!u->cgroup_path)
return -ENODATA;
+ /* The root cgroup doesn't expose this information, let's get it from /proc instead */
+ if (unit_has_root_cgroup(u))
+ return procfs_memory_get_current(ret);
+
if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
return -ENODATA;
@@ -2437,13 +2458,13 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret) {
if (!u->cgroup_path)
return -ENODATA;
- if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
- return -ENODATA;
-
/* The root cgroup doesn't expose this information, let's get it from /proc instead */
if (unit_has_root_cgroup(u))
return procfs_tasks_get_current(ret);
+ if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
+ return -ENODATA;
+
r = cg_get_attribute("pids", u->cgroup_path, "pids.current", &v);
if (r == -ENOENT)
return -ENODATA;
@@ -2464,20 +2485,25 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
if (!u->cgroup_path)
return -ENODATA;
+ /* The root cgroup doesn't expose this information, let's get it from /proc instead */
+ if (unit_has_root_cgroup(u))
+ return procfs_cpu_get_usage(ret);
+
r = cg_all_unified();
if (r < 0)
return r;
if (r > 0) {
- const char *keys[] = { "usage_usec", NULL };
_cleanup_free_ char *val = NULL;
uint64_t us;
if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
return -ENODATA;
- r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val);
+ r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
if (r < 0)
return r;
+ if (IN_SET(r, -ENOENT, -ENXIO))
+ return -ENODATA;
r = safe_atou64(val, &us);
if (r < 0)
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index e2e875d1c7..ae5f1c7647 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -209,6 +209,7 @@ int unit_reset_ip_accounting(Unit *u);
cc ? cc->name : false; \
})
+bool manager_owns_root_cgroup(Manager *m);
bool unit_has_root_cgroup(Unit *u);
int manager_notify_cgroup_empty(Manager *m, const char *group);
diff --git a/src/core/slice.c b/src/core/slice.c
index 2228669e00..1c4574b8bb 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -313,19 +313,18 @@ _pure_ static const char *slice_sub_state_to_string(Unit *u) {
return slice_state_to_string(SLICE(u)->state);
}
-static void slice_enumerate_perpetual(Manager *m, const char *name) {
+static int slice_make_perpetual(Manager *m, const char *name, Unit **ret) {
Unit *u;
int r;
assert(m);
+ assert(name);
u = manager_get_unit(m, name);
if (!u) {
r = unit_new_for_name(m, sizeof(Slice), name, &u);
- if (r < 0) {
- log_error_errno(r, "Failed to allocate the special %s unit: %m", name);
- return;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate the special %s unit: %m", name);
}
u->perpetual = true;
@@ -333,15 +332,34 @@ static void slice_enumerate_perpetual(Manager *m, const char *name) {
unit_add_to_load_queue(u);
unit_add_to_dbus_queue(u);
+
+ if (ret)
+ *ret = u;
+
+ return 0;
}
static void slice_enumerate(Manager *m) {
+ Unit *u;
+ int r;
+
assert(m);
- slice_enumerate_perpetual(m, SPECIAL_ROOT_SLICE);
+ r = slice_make_perpetual(m, SPECIAL_ROOT_SLICE, &u);
+ if (r >= 0 && manager_owns_root_cgroup(m)) {
+ Slice *s = SLICE(u);
+
+ /* If we are managing the root cgroup then this means our root slice covers the whole system, which
+ * means the kernel will track CPU/tasks/memory for us anyway, and it is all available in /proc. Let's
+ * hence turn accounting on here, so that our APIs to query this data are available. */
+
+ s->cgroup_context.cpu_accounting = true;
+ s->cgroup_context.tasks_accounting = true;
+ s->cgroup_context.memory_accounting = true;
+ }
if (MANAGER_IS_SYSTEM(m))
- slice_enumerate_perpetual(m, SPECIAL_SYSTEM_SLICE);
+ (void) slice_make_perpetual(m, SPECIAL_SYSTEM_SLICE, NULL);
}
const UnitVTable slice_vtable = {
diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c
index 2248a30635..c4163fc3a9 100644
--- a/src/test/test-cgroup-util.c
+++ b/src/test/test-cgroup-util.c
@@ -30,6 +30,7 @@
#include "special.h"
#include "stat-util.h"
#include "string-util.h"
+#include "strv.h"
#include "test-helper.h"
#include "user-util.h"
#include "util.h"
@@ -404,6 +405,45 @@ static void test_cg_tests(void) {
assert_se(!systemd);
}
+static void test_cg_get_keyed_attribute(void) {
+ _cleanup_free_ char *val = NULL;
+ char *vals3[3] = {}, *vals3a[3] = {};
+ int i;
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "no_such_file", STRV_MAKE("no_such_attr"), &val) == -ENOENT);
+ assert_se(val == NULL);
+
+ if (access("/sys/fs/cgroup/init.scope/cpu.stat", R_OK) < 0) {
+ log_info_errno(errno, "Skipping most of %s, /init.scope/cpu.stat not accessible: %m", __func__);
+ return;
+ }
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("no_such_attr"), &val) == -ENXIO);
+ assert_se(val == NULL);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec"), &val) == 0);
+ log_info("cpu /init.scope cpu.stat [usage_usec] → \"%s\"", val);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "no_such_attr"), vals3) == -ENXIO);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "usage_usec"), vals3) == -ENXIO);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat",
+ STRV_MAKE("usage_usec", "user_usec", "system_usec"), vals3) == 0);
+ log_info("cpu /init.scope cpu.stat [usage_usec user_usec system_usec] → \"%s\", \"%s\", \"%s\"",
+ vals3[0], vals3[1], vals3[2]);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat",
+ STRV_MAKE("system_usec", "user_usec", "usage_usec"), vals3a) == 0);
+ log_info("cpu /init.scope cpu.stat [system_usec user_usec usage_usec] → \"%s\", \"%s\", \"%s\"",
+ vals3a[0], vals3a[1], vals3a[2]);
+
+ for (i = 0; i < 3; i++) {
+ free(vals3[i]);
+ free(vals3a[i]);
+ }
+}
+
int main(void) {
log_set_max_level(LOG_DEBUG);
log_parse_environment();
@@ -429,6 +469,7 @@ int main(void) {
test_is_wanted_print(false); /* run twice to test caching */
test_is_wanted();
test_cg_tests();
+ test_cg_get_keyed_attribute();
return 0;
}
diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c
index a253182517..10229de4e8 100644
--- a/src/test/test-procfs-util.c
+++ b/src/test/test-procfs-util.c
@@ -3,15 +3,24 @@
#include
#include "log.h"
+#include "parse-util.h"
#include "procfs-util.h"
int main(int argc, char *argv[]) {
+ char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_BYTES_MAX)];
+ nsec_t nsec;
uint64_t v;
int r;
log_parse_environment();
log_open();
+ assert_se(procfs_cpu_get_usage(&nsec) >= 0);
+ log_info("Current sytem CPU time: %s", format_timespan(buf, sizeof(buf), nsec/NSEC_PER_USEC, 1));
+
+ assert_se(procfs_memory_get_current(&v) >= 0);
+ log_info("Current memory usage: %s", format_bytes(buf, sizeof(buf), v));
+
assert_se(procfs_tasks_get_current(&v) >= 0);
log_info("Current number of tasks: %" PRIu64, v);