Systemd/src/bootchart/store.c
Karel Zak c358d728e7 bootchart: don't parse /proc/uptime, use CLOCK_BOOTTIME
* systemd-bootchart always parses /proc/uptime, although the
  information is unnecessary when --rel specified

* use /proc/uptime is overkill, since Linux 2.6.39 we have
  clock_gettime(CLOCK_BOOTTIME, ...). The backend on kernel side is
  get_monotonic_boottime() in both cases.

* main() uses "if (graph_start <= 0.0)" to detect that /proc is
  available.

  This is fragile solution as graph_start is always smaller than zero
  on all systems after suspend/resume (e.g. laptops), because in this
  case the system uptime includes suspend time and uptime is always
  greater number than monotonic time. For example right now difference
  between uptime and monotonic time is 37 hours on my laptop.

  Note that main() calls log_uptime() (to parse /proc/uptime) for each
  sample when it believes that /proc is not available. So on my laptop
  systemd-boochars spends all live with /proc/uptime parsing +
  nanosleep(), try

    strace  /usr/lib/systemd/systemd-bootchart

  to see the never ending loop.

  This patch uses access("/proc/vmstat", F_OK) to detect procfs.
2014-08-03 01:12:53 -04:00

504 lines
17 KiB
C

/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright (C) 2009-2013 Intel Corporation
Authors:
Auke Kok <auke-jan.h.kok@intel.com>
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <unistd.h>
#include <stdlib.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <string.h>
#include <dirent.h>
#include <fcntl.h>
#include <time.h>
#include "util.h"
#include "strxcpyx.h"
#include "store.h"
#include "bootchart.h"
#include "cgroup-util.h"
/*
* Alloc a static 4k buffer for stdio - primarily used to increase
* PSS buffering from the default 1k stdin buffer to reduce
* read() overhead.
*/
static char smaps_buf[4096];
static int skip = 0;
DIR *proc;
int procfd = -1;
double gettime_ns(void) {
struct timespec n;
clock_gettime(CLOCK_MONOTONIC, &n);
return (n.tv_sec + (n.tv_nsec / 1000000000.0));
}
static double gettime_up(void) {
struct timespec n;
clock_gettime(CLOCK_BOOTTIME, &n);
return (n.tv_sec + (n.tv_nsec / 1000000000.0));
}
void log_uptime(void) {
if (arg_relative)
graph_start = log_start = gettime_ns();
else {
double uptime = gettime_up();
log_start = gettime_ns();
graph_start = log_start - uptime;
}
}
static char *bufgetline(char *buf) {
char *c;
if (!buf)
return NULL;
c = strchr(buf, '\n');
if (c)
c++;
return c;
}
static int pid_cmdline_strscpy(char *buffer, size_t buf_len, int pid) {
char filename[PATH_MAX];
_cleanup_close_ int fd=-1;
ssize_t n;
sprintf(filename, "%d/cmdline", pid);
fd = openat(procfd, filename, O_RDONLY);
if (fd < 0)
return -errno;
n = read(fd, buffer, buf_len-1);
if (n > 0) {
int i;
for (i = 0; i < n; i++)
if (buffer[i] == '\0')
buffer[i] = ' ';
buffer[n] = '\0';
}
return 0;
}
void log_sample(int sample, struct list_sample_data **ptr) {
static int vmstat;
static int schedstat;
char buf[4096];
char key[256];
char val[256];
char rt[256];
char wt[256];
char *m;
int c;
int p;
int mod;
static int e_fd;
ssize_t s;
ssize_t n;
struct dirent *ent;
int fd;
struct list_sample_data *sampledata;
struct ps_sched_struct *ps_prev = NULL;
sampledata = *ptr;
/* all the per-process stuff goes here */
if (!proc) {
/* find all processes */
proc = opendir("/proc");
if (!proc)
return;
procfd = dirfd(proc);
} else {
rewinddir(proc);
}
if (!vmstat) {
/* block stuff */
vmstat = openat(procfd, "vmstat", O_RDONLY);
if (vmstat == -1) {
log_error("Failed to open /proc/vmstat: %m");
exit(EXIT_FAILURE);
}
}
n = pread(vmstat, buf, sizeof(buf) - 1, 0);
if (n <= 0) {
close(vmstat);
return;
}
buf[n] = '\0';
m = buf;
while (m) {
if (sscanf(m, "%s %s", key, val) < 2)
goto vmstat_next;
if (streq(key, "pgpgin"))
sampledata->blockstat.bi = atoi(val);
if (streq(key, "pgpgout")) {
sampledata->blockstat.bo = atoi(val);
break;
}
vmstat_next:
m = bufgetline(m);
if (!m)
break;
}
if (!schedstat) {
/* overall CPU utilization */
schedstat = openat(procfd, "schedstat", O_RDONLY);
if (schedstat == -1) {
log_error("Failed to open /proc/schedstat: %m");
exit(EXIT_FAILURE);
}
}
n = pread(schedstat, buf, sizeof(buf) - 1, 0);
if (n <= 0) {
close(schedstat);
return;
}
buf[n] = '\0';
m = buf;
while (m) {
if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
goto schedstat_next;
if (strstr(key, "cpu")) {
c = atoi((const char*)(key+3));
if (c > MAXCPUS)
/* Oops, we only have room for MAXCPUS data */
break;
sampledata->runtime[c] = atoll(rt);
sampledata->waittime[c] = atoll(wt);
if (c == cpus)
cpus = c + 1;
}
schedstat_next:
m = bufgetline(m);
if (!m)
break;
}
if (arg_entropy) {
if (!e_fd) {
e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY);
}
if (e_fd) {
n = pread(e_fd, buf, sizeof(buf) - 1, 0);
if (n > 0) {
buf[n] = '\0';
sampledata->entropy_avail = atoi(buf);
}
}
}
while ((ent = readdir(proc)) != NULL) {
char filename[PATH_MAX];
int pid;
struct ps_struct *ps;
if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
continue;
pid = atoi(ent->d_name);
if (pid >= MAXPIDS)
continue;
ps = ps_first;
while (ps->next_ps) {
ps = ps->next_ps;
if (ps->pid == pid)
break;
}
/* end of our LL? then append a new record */
if (ps->pid != pid) {
_cleanup_fclose_ FILE *st = NULL;
char t[32];
struct ps_struct *parent;
ps->next_ps = new0(struct ps_struct, 1);
if (!ps->next_ps) {
log_oom();
exit (EXIT_FAILURE);
}
ps = ps->next_ps;
ps->pid = pid;
ps->sample = new0(struct ps_sched_struct, 1);
if (!ps->sample) {
log_oom();
exit (EXIT_FAILURE);
}
ps->sample->sampledata = sampledata;
pscount++;
/* mark our first sample */
ps->first = ps->last = ps->sample;
ps->sample->runtime = atoll(rt);
ps->sample->waittime = atoll(wt);
/* get name, start time */
if (!ps->sched) {
sprintf(filename, "%d/sched", pid);
ps->sched = openat(procfd, filename, O_RDONLY);
if (ps->sched == -1)
continue;
}
s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
if (s <= 0) {
close(ps->sched);
continue;
}
buf[s] = '\0';
if (!sscanf(buf, "%s %*s %*s", key))
continue;
strscpy(ps->name, sizeof(ps->name), key);
/* cmdline */
if (arg_show_cmdline)
pid_cmdline_strscpy(ps->name, sizeof(ps->name), pid);
/* discard line 2 */
m = bufgetline(buf);
if (!m)
continue;
m = bufgetline(m);
if (!m)
continue;
if (!sscanf(m, "%*s %*s %s", t))
continue;
ps->starttime = strtod(t, NULL) / 1000.0;
if (arg_show_cgroup)
/* if this fails, that's OK */
cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
ps->pid, &ps->cgroup);
/* ppid */
sprintf(filename, "%d/stat", pid);
fd = openat(procfd, filename, O_RDONLY);
st = fdopen(fd, "r");
if (!st)
continue;
if (!fscanf(st, "%*s %*s %*s %i", &p)) {
continue;
}
ps->ppid = p;
/*
* setup child pointers
*
* these are used to paint the tree coherently later
* each parent has a LL of children, and a LL of siblings
*/
if (pid == 1)
continue; /* nothing to do for init atm */
/* kthreadd has ppid=0, which breaks our tree ordering */
if (ps->ppid == 0)
ps->ppid = 1;
parent = ps_first;
while ((parent->next_ps && parent->pid != ps->ppid))
parent = parent->next_ps;
if (parent->pid != ps->ppid) {
/* orphan */
ps->ppid = 1;
parent = ps_first->next_ps;
}
ps->parent = parent;
if (!parent->children) {
/* it's the first child */
parent->children = ps;
} else {
/* walk all children and append */
struct ps_struct *children;
children = parent->children;
while (children->next)
children = children->next;
children->next = ps;
}
}
/* else -> found pid, append data in ps */
/* below here is all continuous logging parts - we get here on every
* iteration */
/* rt, wt */
if (!ps->schedstat) {
sprintf(filename, "%d/schedstat", pid);
ps->schedstat = openat(procfd, filename, O_RDONLY);
if (ps->schedstat == -1)
continue;
}
s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
if (s <= 0) {
/* clean up our file descriptors - assume that the process exited */
close(ps->schedstat);
if (ps->sched)
close(ps->sched);
//if (ps->smaps)
// fclose(ps->smaps);
continue;
}
buf[s] = '\0';
if (!sscanf(buf, "%s %s %*s", rt, wt))
continue;
ps->sample->next = new0(struct ps_sched_struct, 1);
if (!ps->sample) {
log_oom();
exit(EXIT_FAILURE);
}
ps->sample->next->prev = ps->sample;
ps->sample = ps->sample->next;
ps->last = ps->sample;
ps->sample->runtime = atoll(rt);
ps->sample->waittime = atoll(wt);
ps->sample->sampledata = sampledata;
ps->sample->ps_new = ps;
if (ps_prev) {
ps_prev->cross = ps->sample;
}
ps_prev = ps->sample;
ps->total = (ps->last->runtime - ps->first->runtime)
/ 1000000000.0;
if (!arg_pss)
goto catch_rename;
/* Pss */
if (!ps->smaps) {
sprintf(filename, "%d/smaps", pid);
fd = openat(procfd, filename, O_RDONLY);
ps->smaps = fdopen(fd, "r");
if (!ps->smaps)
continue;
setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
}
else {
rewind(ps->smaps);
}
/* test to see if we need to skip another field */
if (skip == 0) {
if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
continue;
}
if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
continue;
}
if (buf[392] == 'V') {
skip = 2;
}
else {
skip = 1;
}
rewind(ps->smaps);
}
while (1) {
int pss_kb;
/* skip one line, this contains the object mapped. */
if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
break;
}
/* then there's a 28 char 14 line block */
if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
break;
}
pss_kb = atoi(&buf[61]);
ps->sample->pss += pss_kb;
/* skip one more line if this is a newer kernel */
if (skip == 2) {
if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
break;
}
}
if (ps->sample->pss > ps->pss_max)
ps->pss_max = ps->sample->pss;
catch_rename:
/* catch process rename, try to randomize time */
mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
if (((samples - ps->pid) + pid) % (int)(mod) == 0) {
/* re-fetch name */
/* get name, start time */
if (!ps->sched) {
sprintf(filename, "%d/sched", pid);
ps->sched = openat(procfd, filename, O_RDONLY);
if (ps->sched == -1)
continue;
}
s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
if (s <= 0) {
/* clean up file descriptors */
close(ps->sched);
if (ps->schedstat)
close(ps->schedstat);
//if (ps->smaps)
// fclose(ps->smaps);
continue;
}
buf[s] = '\0';
if (!sscanf(buf, "%s %*s %*s", key))
continue;
strscpy(ps->name, sizeof(ps->name), key);
/* cmdline */
if (arg_show_cmdline)
pid_cmdline_strscpy(ps->name, sizeof(ps->name), pid);
}
}
}