cgtop: major modernizations

In preparation of the unified cgroup support, let's clean up cgtop:

a) rework time code to be based on "nsec_t" rather than "struct timespec"

b) Introduce long option --order= for selecting ordering

c) count number of processes only in the main hierarchy, don't bother
   with the controller hierarchies. We don't allow orthogonal
   hierarchies in systemd anymore, hence there's no point to check the
   other hierarchies.

d) Deal with non-monotonic cpuacct values (see #749)

e) When sorting groups, don't do prefix compare when ordering by number
   of tasks, since this is not accumulative for all children.

f) Actually make --cpu without parameter work

g) Don't output control characters when we get them as input.

Fixes #749.
This commit is contained in:
Lennart Poettering 2015-08-28 02:04:33 +02:00
parent 21b735e798
commit 45d7a8bb6c
4 changed files with 203 additions and 142 deletions

View File

@ -1,4 +1,4 @@
<?xml version='1.0'?> <!--*-nxml-*-->
<?xml version='1.0'?> <!--*- Mode: nxml; nxml-child-indent: 2; indent-tabs-mode: nil -*-->
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
@ -64,10 +64,10 @@
regular intervals (by default every 1s), similar in style to
<citerefentry project='man-pages'><refentrytitle>top</refentrytitle><manvolnum>1</manvolnum></citerefentry>.</para>
<para>If <command>systemd-cgtop</command> is not connected to a tty, no
column headers are printed and the default is to only run one iteration.
The <varname>--iterations</varname> argument, if given, is still honored.
This mode is suitable for scripting.</para>
<para>If <command>systemd-cgtop</command> is not connected to a
tty, no column headers are printed and the default is to only run
one iteration. The <varname>--iterations=</varname> argument, if
given, is honored. This mode is suitable for scripting.</para>
<para>Resource usage is only accounted for control groups in the
relevant hierarchy, i.e. CPU usage is only accounted for control
@ -104,6 +104,7 @@
<variablelist>
<varlistentry>
<term><option>-p</option></term>
<term><option>--order=path</option></term>
<listitem><para>Order by control group
path name.</para></listitem>
@ -111,25 +112,28 @@
<varlistentry>
<term><option>-t</option></term>
<term><option>--order=tasks</option></term>
<listitem><para>Order by number of tasks in control group
(i.e. threads and processes).</para></listitem>
<listitem><para>Order by number of processes in control group.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>-c</option></term>
<term><option>--order=cpu</option></term>
<listitem><para>Order by CPU load.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>-m</option></term>
<term><option>--order=memory</option></term>
<listitem><para>Order by memory usage.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>-i</option></term>
<term><option>--order=io</option></term>
<listitem><para>Order by disk I/O load.</para></listitem>
</varlistentry>
@ -140,7 +144,7 @@
<listitem><para>Run in "batch" mode: do not accept input and
run until the iteration limit set with
<option>--iterations</option> is exhausted or until killed.
<option>--iterations=</option> is exhausted or until killed.
This mode could be useful for sending output from
<command>systemd-cgtop</command> to other programs or to a
file.</para></listitem>
@ -155,6 +159,15 @@
numbers.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--cpu=percentage</option></term>
<term><option>--cpu=time</option></term>
<listitem><para>Controls whether the CPU usage is shown as
percentage or time. By default the CPU usage is shown as
percentage.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>-n</option></term>
<term><option>--iterations=</option></term>

View File

@ -36,6 +36,14 @@ usec_t now(clockid_t clock_id) {
return timespec_load(&ts);
}
nsec_t now_nsec(clockid_t clock_id) {
struct timespec ts;
assert_se(clock_gettime(clock_id, &ts) == 0);
return timespec_load_nsec(&ts);
}
dual_timestamp* dual_timestamp_get(dual_timestamp *ts) {
assert(ts);
@ -129,6 +137,18 @@ usec_t timespec_load(const struct timespec *ts) {
(usec_t) ts->tv_nsec / NSEC_PER_USEC;
}
nsec_t timespec_load_nsec(const struct timespec *ts) {
assert(ts);
if (ts->tv_sec == (time_t) -1 &&
ts->tv_nsec == (long) -1)
return NSEC_INFINITY;
return
(nsec_t) ts->tv_sec * NSEC_PER_SEC +
(nsec_t) ts->tv_nsec;
}
struct timespec *timespec_store(struct timespec *ts, usec_t u) {
assert(ts);

View File

@ -70,6 +70,7 @@ typedef struct dual_timestamp {
#define DUAL_TIMESTAMP_NULL ((struct dual_timestamp) { 0ULL, 0ULL })
usec_t now(clockid_t clock);
nsec_t now_nsec(clockid_t clock);
dual_timestamp* dual_timestamp_get(dual_timestamp *ts);
dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u);
@ -87,6 +88,8 @@ struct timespec *timespec_store(struct timespec *ts, usec_t u);
usec_t timeval_load(const struct timeval *tv) _pure_;
struct timeval *timeval_store(struct timeval *tv, usec_t u);
nsec_t timespec_load_nsec(const struct timespec *ts) _pure_;
char *format_timestamp(char *buf, size_t l, usec_t t);
char *format_timestamp_utc(char *buf, size_t l, usec_t t);
char *format_timestamp_us(char *buf, size_t l, usec_t t);

View File

@ -19,7 +19,6 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#define __STDC_FORMAT_MACROS
#include <errno.h>
#include <string.h>
#include <stdlib.h>
@ -48,20 +47,20 @@ typedef struct Group {
unsigned n_tasks;
unsigned cpu_iteration;
uint64_t cpu_usage;
struct timespec cpu_timestamp;
nsec_t cpu_usage;
nsec_t cpu_timestamp;
double cpu_fraction;
uint64_t memory;
unsigned io_iteration;
uint64_t io_input, io_output;
struct timespec io_timestamp;
nsec_t io_timestamp;
uint64_t io_input_bps, io_output_bps;
} Group;
static unsigned arg_depth = 3;
static unsigned arg_iterations = (unsigned)-1;
static unsigned arg_iterations = (unsigned) -1;
static bool arg_batch = false;
static bool arg_raw = false;
static usec_t arg_delay = 1*USEC_PER_SEC;
@ -111,9 +110,6 @@ static const char *maybe_format_bytes(char *buf, size_t l, bool is_valid, off_t
static int process(const char *controller, const char *path, Hashmap *a, Hashmap *b, unsigned iteration) {
Group *g;
int r;
FILE *f = NULL;
pid_t pid;
unsigned n;
assert(controller);
assert(path);
@ -142,84 +138,81 @@ static int process(const char *controller, const char *path, Hashmap *a, Hashmap
r = hashmap_move_one(a, b, path);
if (r < 0)
return r;
g->cpu_valid = g->memory_valid = g->io_valid = g->n_tasks_valid = false;
}
}
/* Regardless which controller, let's find the maximum number
* of processes in any of it */
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
_cleanup_fclose_ FILE *f = NULL;
pid_t pid;
r = cg_enumerate_processes(controller, path, &f);
if (r < 0)
return r;
r = cg_enumerate_processes(controller, path, &f);
if (r == -ENOENT)
return 0;
if (r < 0)
return r;
n = 0;
while (cg_read_pid(f, &pid) > 0)
n++;
fclose(f);
g->n_tasks = 0;
while (cg_read_pid(f, &pid) > 0)
g->n_tasks++;
if (n > 0) {
if (g->n_tasks_valid)
g->n_tasks = MAX(g->n_tasks, n);
else
g->n_tasks = n;
if (g->n_tasks > 0)
g->n_tasks_valid = true;
g->n_tasks_valid = true;
}
if (streq(controller, "cpuacct")) {
} else if (streq(controller, "cpuacct")) {
_cleanup_free_ char *p = NULL, *v = NULL;
uint64_t new_usage;
char *p, *v;
struct timespec ts;
nsec_t timestamp;
r = cg_get_path(controller, path, "cpuacct.usage", &p);
if (r < 0)
return r;
r = read_one_line_file(p, &v);
free(p);
if (r == -ENOENT)
return 0;
if (r < 0)
return r;
r = safe_atou64(v, &new_usage);
free(v);
if (r < 0)
return r;
assert_se(clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
timestamp = now_nsec(CLOCK_MONOTONIC);
if (g->cpu_iteration == iteration - 1) {
uint64_t x, y;
if (g->cpu_iteration == iteration - 1 &&
(nsec_t) new_usage > g->cpu_usage) {
x = ((uint64_t) ts.tv_sec * 1000000000ULL + (uint64_t) ts.tv_nsec) -
((uint64_t) g->cpu_timestamp.tv_sec * 1000000000ULL + (uint64_t) g->cpu_timestamp.tv_nsec);
nsec_t x, y;
y = new_usage - g->cpu_usage;
x = timestamp - g->cpu_timestamp;
if (x < 1)
x = 1;
if (y > 0) {
g->cpu_fraction = (double) y / (double) x;
g->cpu_valid = true;
}
y = (nsec_t) new_usage - g->cpu_usage;
g->cpu_fraction = (double) y / (double) x;
g->cpu_valid = true;
}
g->cpu_usage = new_usage;
g->cpu_timestamp = ts;
g->cpu_usage = (nsec_t) new_usage;
g->cpu_timestamp = timestamp;
g->cpu_iteration = iteration;
} else if (streq(controller, "memory")) {
char *p, *v;
_cleanup_free_ char *p = NULL, *v = NULL;
r = cg_get_path(controller, path, "memory.usage_in_bytes", &p);
if (r < 0)
return r;
r = read_one_line_file(p, &v);
free(p);
if (r == -ENOENT)
return 0;
if (r < 0)
return r;
r = safe_atou64(v, &g->memory);
free(v);
if (r < 0)
return r;
@ -227,19 +220,21 @@ static int process(const char *controller, const char *path, Hashmap *a, Hashmap
g->memory_valid = true;
} else if (streq(controller, "blkio")) {
char *p;
_cleanup_fclose_ FILE *f = NULL;
_cleanup_free_ char *p = NULL;
uint64_t wr = 0, rd = 0;
struct timespec ts;
nsec_t timestamp;
r = cg_get_path(controller, path, "blkio.io_service_bytes", &p);
if (r < 0)
return r;
f = fopen(p, "re");
free(p);
if (!f)
if (!f) {
if (errno == ENOENT)
return 0;
return -errno;
}
for (;;) {
char line[LINE_MAX], *l;
@ -269,20 +264,26 @@ static int process(const char *controller, const char *path, Hashmap *a, Hashmap
*q += k;
}
fclose(f);
assert_se(clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
timestamp = now_nsec(CLOCK_MONOTONIC);
if (g->io_iteration == iteration - 1) {
uint64_t x, yr, yw;
x = ((uint64_t) ts.tv_sec * 1000000000ULL + (uint64_t) ts.tv_nsec) -
((uint64_t) g->io_timestamp.tv_sec * 1000000000ULL + (uint64_t) g->io_timestamp.tv_nsec);
x = (uint64_t) (timestamp - g->io_timestamp);
if (x < 1)
x = 1;
yr = rd - g->io_input;
yw = wr - g->io_output;
if (rd > g->io_input)
yr = rd - g->io_input;
else
yr = 0;
if (g->io_input > 0 || g->io_output > 0) {
if (wr > g->io_output)
yw = wr - g->io_output;
else
yw = 0;
if (yr > 0 || yw > 0) {
g->io_input_bps = (yr * 1000000000ULL) / x;
g->io_output_bps = (yw * 1000000000ULL) / x;
g->io_valid = true;
@ -291,7 +292,7 @@ static int process(const char *controller, const char *path, Hashmap *a, Hashmap
g->io_input = rd;
g->io_output = wr;
g->io_timestamp = ts;
g->io_timestamp = timestamp;
g->io_iteration = iteration;
}
@ -306,7 +307,7 @@ static int refresh_one(
unsigned iteration,
unsigned depth) {
DIR *d = NULL;
_cleanup_closedir_ DIR *d = NULL;
int r;
assert(controller);
@ -321,41 +322,29 @@ static int refresh_one(
return r;
r = cg_enumerate_subgroups(controller, path, &d);
if (r < 0) {
if (r == -ENOENT)
return 0;
if (r == -ENOENT)
return 0;
if (r < 0)
return r;
}
for (;;) {
char *fn, *p;
_cleanup_free_ char *fn = NULL, *p = NULL;
r = cg_read_subgroup(d, &fn);
if (r <= 0)
goto finish;
return r;
p = strjoin(path, "/", fn, NULL);
free(fn);
if (!p) {
r = -ENOMEM;
goto finish;
}
if (!p)
return -ENOMEM;
path_kill_slashes(p);
r = refresh_one(controller, p, a, b, iteration, depth + 1);
free(p);
if (r < 0)
goto finish;
return r;
}
finish:
if (d)
closedir(d);
return r;
}
@ -364,35 +353,43 @@ static int refresh(Hashmap *a, Hashmap *b, unsigned iteration) {
assert(a);
r = refresh_one("name=systemd", "/", a, b, iteration, 0);
r = refresh_one(SYSTEMD_CGROUP_CONTROLLER, "/", a, b, iteration, 0);
if (r < 0)
if (r != -ENOENT)
return r;
return r;
r = refresh_one("cpuacct", "/", a, b, iteration, 0);
if (r < 0)
if (r != -ENOENT)
return r;
return r;
r = refresh_one("memory", "/", a, b, iteration, 0);
if (r < 0)
if (r != -ENOENT)
return r;
return r;
r = refresh_one("blkio", "/", a, b, iteration, 0);
if (r < 0)
if (r != -ENOENT)
return r;
return r;
return 0;
}
static int group_compare(const void*a, const void *b) {
const Group *x = *(Group**)a, *y = *(Group**)b;
if (path_startswith(y->path, x->path))
return -1;
if (path_startswith(x->path, y->path))
return 1;
if (arg_order != ORDER_TASKS) {
/* Let's make sure that the parent is always before
* the child. Except when ordering by tasks, since
* that is actually not accumulative for all
* children. */
if (arg_order == ORDER_CPU) {
if (path_startswith(y->path, x->path))
return -1;
if (path_startswith(x->path, y->path))
return 1;
}
switch (arg_order) {
case ORDER_PATH:
break;
case ORDER_CPU:
if (arg_cpu_type == CPU_PERCENT) {
if (x->cpu_valid && y->cpu_valid) {
if (x->cpu_fraction > y->cpu_fraction)
@ -409,10 +406,10 @@ static int group_compare(const void*a, const void *b) {
else if (x->cpu_usage < y->cpu_usage)
return 1;
}
}
if (arg_order == ORDER_TASKS) {
break;
case ORDER_TASKS:
if (x->n_tasks_valid && y->n_tasks_valid) {
if (x->n_tasks > y->n_tasks)
return -1;
@ -422,9 +419,10 @@ static int group_compare(const void*a, const void *b) {
return -1;
else if (y->n_tasks_valid)
return 1;
}
if (arg_order == ORDER_MEMORY) {
break;
case ORDER_MEMORY:
if (x->memory_valid && y->memory_valid) {
if (x->memory > y->memory)
return -1;
@ -434,9 +432,10 @@ static int group_compare(const void*a, const void *b) {
return -1;
else if (y->memory_valid)
return 1;
}
if (arg_order == ORDER_IO) {
break;
case ORDER_IO:
if (x->io_valid && y->io_valid) {
if (x->io_input_bps + x->io_output_bps > y->io_input_bps + y->io_output_bps)
return -1;
@ -448,7 +447,7 @@ static int group_compare(const void*a, const void *b) {
return 1;
}
return strcmp(x->path, y->path);
return path_compare(x->path, y->path);
}
#define ON ANSI_HIGHLIGHT_ON
@ -481,9 +480,10 @@ static int display(Hashmap *a) {
for (j = 0; j < n; j++) {
unsigned cputlen, pathtlen;
format_timespan(buffer, sizeof(buffer), (nsec_t) (array[j]->cpu_usage / NSEC_PER_USEC), 0);
format_timespan(buffer, sizeof(buffer), (usec_t) (array[j]->cpu_usage / NSEC_PER_USEC), 0);
cputlen = strlen(buffer);
maxtcpu = MAX(maxtcpu, cputlen);
pathtlen = strlen(array[j]->path);
maxtpath = MAX(maxtpath, pathtlen);
}
@ -503,7 +503,7 @@ static int display(Hashmap *a) {
path_columns = 10;
printf("%s%-*s%s %s%7s%s %s%s%s %s%8s%s %s%8s%s %s%8s%s\n\n",
arg_order == ORDER_PATH ? ON : "", path_columns, "Path",
arg_order == ORDER_PATH ? ON : "", path_columns, "Control Group",
arg_order == ORDER_PATH ? OFF : "",
arg_order == ORDER_TASKS ? ON : "", "Tasks",
arg_order == ORDER_TASKS ? OFF : "",
@ -519,7 +519,7 @@ static int display(Hashmap *a) {
path_columns = maxtpath;
for (j = 0; j < n; j++) {
char *p;
_cleanup_free_ char *p = NULL;
if (on_tty() && j + 5 > rows)
break;
@ -527,8 +527,7 @@ static int display(Hashmap *a) {
g = array[j];
p = ellipsize(g->path, path_columns, 33);
printf("%-*s", path_columns, p ? p : g->path);
free(p);
printf("%-*s", path_columns, p ?: g->path);
if (g->n_tasks_valid)
printf(" %7u", g->n_tasks);
@ -541,7 +540,7 @@ static int display(Hashmap *a) {
else
fputs(" -", stdout);
} else
printf(" %*s", maxtcpu, format_timespan(buffer, sizeof(buffer), (nsec_t) (g->cpu_usage / NSEC_PER_USEC), 0));
printf(" %*s", maxtcpu, format_timespan(buffer, sizeof(buffer), (usec_t) (g->cpu_usage / NSEC_PER_USEC), 0));
printf(" %8s", maybe_format_bytes(buffer, sizeof(buffer), g->memory_valid, g->memory));
printf(" %8s", maybe_format_bytes(buffer, sizeof(buffer), g->io_valid, g->io_input_bps));
@ -557,14 +556,15 @@ static void help(void) {
printf("%s [OPTIONS...]\n\n"
"Show top control groups by their resource usage.\n\n"
" -h --help Show this help\n"
" --version Print version and exit\n"
" -p Order by path\n"
" -t Order by number of tasks\n"
" -c Order by CPU load\n"
" -m Order by memory load\n"
" -i Order by IO load\n"
" --version Show package version\n"
" -p --order=path Order by path\n"
" -t --order=tasks Order by number of tasks\n"
" -c --order=cpu Order by CPU load (default)\n"
" -m --order=memory Order by memory load\n"
" -i --order=io Order by IO load\n"
" -r --raw Provide raw (not human-readable) numbers\n"
" --cpu[=TYPE] Show CPU usage as time or percentage (default)\n"
" --cpu=percentage Show CPU usage as percentage (default)\n"
" --cpu=time Show CPU usage as time\n"
" -d --delay=DELAY Delay between updates\n"
" -n --iterations=N Run for N iterations before exiting\n"
" -b --batch Run in batch mode, accepting no input\n"
@ -577,18 +577,20 @@ static int parse_argv(int argc, char *argv[]) {
enum {
ARG_VERSION = 0x100,
ARG_DEPTH,
ARG_CPU_TYPE
ARG_CPU_TYPE,
ARG_ORDER,
};
static const struct option options[] = {
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, ARG_VERSION },
{ "delay", required_argument, NULL, 'd' },
{ "iterations", required_argument, NULL, 'n' },
{ "batch", no_argument, NULL, 'b' },
{ "raw", no_argument, NULL, 'r' },
{ "depth", required_argument, NULL, ARG_DEPTH },
{ "cpu", optional_argument, NULL, ARG_CPU_TYPE},
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, ARG_VERSION },
{ "delay", required_argument, NULL, 'd' },
{ "iterations", required_argument, NULL, 'n' },
{ "batch", no_argument, NULL, 'b' },
{ "raw", no_argument, NULL, 'r' },
{ "depth", required_argument, NULL, ARG_DEPTH },
{ "cpu", optional_argument, NULL, ARG_CPU_TYPE },
{ "order", required_argument, NULL, ARG_ORDER },
{}
};
@ -613,13 +615,17 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_CPU_TYPE:
if (optarg) {
if (strcmp(optarg, "time") == 0)
if (streq(optarg, "time"))
arg_cpu_type = CPU_TIME;
else if (strcmp(optarg, "percentage") == 0)
else if (streq(optarg, "percentage"))
arg_cpu_type = CPU_PERCENT;
else
else {
log_error("Unknown argument to --cpu=: %s", optarg);
return -EINVAL;
}
}
} else
arg_cpu_type = CPU_TIME;
break;
case ARG_DEPTH:
@ -677,6 +683,23 @@ static int parse_argv(int argc, char *argv[]) {
arg_order = ORDER_IO;
break;
case ARG_ORDER:
if (streq(optarg, "path"))
arg_order = ORDER_PATH;
else if (streq(optarg, "tasks"))
arg_order = ORDER_TASKS;
else if (streq(optarg, "cpu"))
arg_order = ORDER_CPU;
else if (streq(optarg, "memory"))
arg_order = ORDER_MEMORY;
else if (streq(optarg, "io"))
arg_order = ORDER_IO;
else {
log_error("Invalid argument to --order=: %s", optarg);
return -EINVAL;
}
break;
case '?':
return -EINVAL;
@ -715,7 +738,7 @@ int main(int argc, char *argv[]) {
signal(SIGWINCH, columns_lines_cache_reset);
if (arg_iterations == (unsigned)-1)
if (arg_iterations == (unsigned) -1)
arg_iterations = on_tty() ? 0 : 1;
while (!quit) {
@ -753,11 +776,10 @@ int main(int argc, char *argv[]) {
fputs("\n", stdout);
fflush(stdout);
if (arg_batch) {
if (arg_batch)
usleep(last_refresh + arg_delay - t);
} else {
r = read_one_char(stdin, &key,
last_refresh + arg_delay - t, NULL);
else {
r = read_one_char(stdin, &key, last_refresh + arg_delay - t, NULL);
if (r == -ETIMEDOUT)
continue;
if (r < 0) {
@ -843,7 +865,10 @@ int main(int argc, char *argv[]) {
break;
default:
fprintf(stdout, "\nUnknown key '%c'. Ignoring.", key);
if (key < ' ')
fprintf(stdout, "\nUnknown key '\\x%x'. Ignoring.", key);
else
fprintf(stdout, "\nUnknown key '%c'. Ignoring.", key);
fflush(stdout);
sleep(1);
break;