cgroup: optionally mount a specific cgroup controllers together, and add cpu+cpuacct to the default

This commit is contained in:
Lennart Poettering 2011-08-23 00:37:35 +02:00
parent c5f0532ff1
commit 0c85a4f3ef
14 changed files with 307 additions and 29 deletions

9
TODO
View File

@ -24,8 +24,6 @@ Features:
* hide PAM/TCPWrap options in fragment parser when compile time disabled
* make arbitrary cgroups attributes settable
* when we automatically restart a service, ensure we retsart its rdeps, too.
* allow Type=simple with PIDFile=
@ -46,8 +44,6 @@ Features:
* logind: use sysfs path in device hash table instead of sysname, as soon as fb driver is fixed
* timedated: implement NTP calls
* implement Register= switch in .socket units to enable registration
in Avahi, RPC and other socket registration services.
@ -84,10 +80,6 @@ Features:
* GC unreferenced jobs (such as .device jobs)
* add JoinControllers= to system.conf to mount certain cgroup
controllers together in order to guarantee atomic creation/addition
of cgroups
* avoid DefaultStandardOutput=syslog to have any effect on StandardInput=socket services
* cgroup_notify_empty(): recursively check groups up the tree, too
@ -102,6 +94,7 @@ Features:
- cgroup best pratices to avoid stepping on each others toes
- how to pass throw-away units to systemd, or dynamically change properties of existing units
- how to integrate cgconfig and suchlike with systemd
- security properties
* allow port=0 in .socket units

View File

@ -129,6 +129,26 @@
touch any hierarchies but its
own.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>JoinControllers=cpu,cpuacct</varname></term>
<listitem><para>Configures controllers
that shall be mounted in a single
hierarchy. By default systemd will
mount all controllers which are
enabled in the kernel in individual
hierachies, with the exception of
those listed in this setting. Takes a
space separated list of comma
separated controller names, in order
to allow multiple joined
hierarchies. Defaults to
'cpu,cpuacct'. Pass an empty string to
ensure that systemd mounts all
controllers in separate
hierarchies.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -611,7 +611,7 @@ static int automount_start(Unit *u) {
assert(a->state == AUTOMOUNT_DEAD || a->state == AUTOMOUNT_FAILED);
if (path_is_mount_point(a->where)) {
if (path_is_mount_point(a->where, false)) {
log_error("Path %s is already a mount point, refusing start for %s", a->where, u->meta.id);
return -EEXIST;
}

View File

@ -513,7 +513,7 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch
if (_unlikely_(!good)) {
int r;
r = path_is_mount_point("/sys/fs/cgroup");
r = path_is_mount_point("/sys/fs/cgroup", false);
if (r <= 0)
return r < 0 ? r : -ENOENT;

View File

@ -270,7 +270,7 @@ int manager_setup_cgroup(Manager *m) {
assert(m);
/* 0. Be nice to Ingo Molnar #628004 */
if (path_is_mount_point("/sys/fs/cgroup/systemd") <= 0) {
if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
log_warning("No control group support available, not creating root group.");
return 0;
}

View File

@ -75,6 +75,7 @@ static bool arg_sysv_console = true;
static bool arg_mount_auto = true;
static bool arg_swap_auto = true;
static char **arg_default_controllers = NULL;
static char ***arg_join_controllers = NULL;
static ExecOutput arg_default_std_output = EXEC_OUTPUT_INHERIT;
static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
@ -494,6 +495,124 @@ static int config_parse_cpu_affinity2(
return 0;
}
static void strv_free_free(char ***l) {
char ***i;
if (!l)
return;
for (i = l; *i; i++)
strv_free(*i);
free(l);
}
static void free_join_controllers(void) {
if (!arg_join_controllers)
return;
strv_free_free(arg_join_controllers);
arg_join_controllers = NULL;
}
static int config_parse_join_controllers(
const char *filename,
unsigned line,
const char *section,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
unsigned n = 0;
char *state, *w;
size_t length;
assert(filename);
assert(lvalue);
assert(rvalue);
free_join_controllers();
FOREACH_WORD_QUOTED(w, length, rvalue, state) {
char *s, **l;
s = strndup(w, length);
if (!s)
return -ENOMEM;
l = strv_split(s, ",");
free(s);
strv_uniq(l);
if (strv_length(l) <= 1) {
strv_free(l);
continue;
}
if (!arg_join_controllers) {
arg_join_controllers = new(char**, 2);
if (!arg_join_controllers) {
strv_free(l);
return -ENOMEM;
}
arg_join_controllers[0] = l;
arg_join_controllers[1] = NULL;
n = 1;
} else {
char ***a;
char ***t;
t = new0(char**, n+2);
if (!t) {
strv_free(l);
return -ENOMEM;
}
n = 0;
for (a = arg_join_controllers; *a; a++) {
if (strv_overlap(*a, l)) {
char **c;
c = strv_merge(*a, l);
if (!c) {
strv_free(l);
strv_free_free(t);
return -ENOMEM;
}
strv_free(l);
l = c;
} else {
char **c;
c = strv_copy(*a);
if (!c) {
strv_free(l);
strv_free_free(t);
return -ENOMEM;
}
t[n++] = c;
}
}
t[n++] = strv_uniq(l);
strv_free_free(arg_join_controllers);
arg_join_controllers = t;
}
}
return 0;
}
static int parse_config_file(void) {
const ConfigTableItem items[] = {
@ -514,6 +633,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultControllers", config_parse_strv, 0, &arg_default_controllers },
{ "Manager", "DefaultStandardOutput", config_parse_output, 0, &arg_default_std_output },
{ "Manager", "DefaultStandardError", config_parse_output, 0, &arg_default_std_error },
{ "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
{ NULL, NULL, NULL, 0, NULL }
};
@ -1084,14 +1204,28 @@ int main(int argc, char *argv[]) {
log_open();
}
/* Initialize default unit */
if (set_default_unit(SPECIAL_DEFAULT_TARGET) < 0)
goto finish;
/* By default, mount "cpu" and "cpuacct" together */
arg_join_controllers = new(char**, 2);
if (!arg_join_controllers)
goto finish;
arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
arg_join_controllers[1] = NULL;
if (!arg_join_controllers[0])
goto finish;
/* Mount /proc, /sys and friends, so that /proc/cmdline and
* /proc/$PID/fd is available. */
if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS"))
if (mount_setup(loaded_policy) < 0)
if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS")) {
r = mount_setup(loaded_policy);
if (r < 0)
goto finish;
}
/* Reset all signal handlers. */
assert_se(reset_all_signal_handlers() == 0);
@ -1199,6 +1333,12 @@ int main(int argc, char *argv[]) {
if (getpid() == 1)
install_crash_handler();
if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS")) {
r = mount_cgroup_controllers(arg_join_controllers);
if (r < 0)
goto finish;
}
log_full(arg_running_as == MANAGER_SYSTEM ? LOG_INFO : LOG_DEBUG,
PACKAGE_STRING " running in %s mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")", manager_running_as_to_string(arg_running_as));
@ -1368,6 +1508,7 @@ finish:
free(arg_default_unit);
strv_free(arg_default_controllers);
free_join_controllers();
dbus_shutdown();

View File

@ -34,6 +34,8 @@
#include "macro.h"
#include "util.h"
#include "label.h"
#include "set.h"
#include "strv.h"
#ifndef TTY_GID
#define TTY_GID 5
@ -104,7 +106,7 @@ static int mount_one(const MountPoint *p, bool relabel) {
if (relabel)
label_fix(p->where, true);
if ((r = path_is_mount_point(p->where)) < 0)
if ((r = path_is_mount_point(p->where, true)) < 0)
return r;
if (r > 0)
@ -133,7 +135,7 @@ static int mount_one(const MountPoint *p, bool relabel) {
if (relabel)
label_fix(p->where, false);
return 0;
return 1;
}
int mount_setup_early(void) {
@ -155,25 +157,33 @@ int mount_setup_early(void) {
return r;
}
static int mount_cgroup_controllers(void) {
int mount_cgroup_controllers(char ***join_controllers) {
int r;
FILE *f;
char buf[LINE_MAX];
Set *controllers;
/* Mount all available cgroup controllers that are built into the kernel. */
if (!(f = fopen("/proc/cgroups", "re"))) {
f = fopen("/proc/cgroups", "re");
if (!f) {
log_error("Failed to enumerate cgroup controllers: %m");
return 0;
}
controllers = set_new(string_hash_func, string_compare_func);
if (!controllers) {
r = -ENOMEM;
log_error("Failed to allocate controller set.");
goto finish;
}
/* Ignore the header line */
(void) fgets(buf, sizeof(buf), f);
for (;;) {
MountPoint p;
char *controller, *where;
int enabled = false;
char *controller;
int enabled = 0;
if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
@ -190,8 +200,66 @@ static int mount_cgroup_controllers(void) {
continue;
}
if (asprintf(&where, "/sys/fs/cgroup/%s", controller) < 0) {
r = set_put(controllers, controller);
if (r < 0) {
log_error("Failed to add controller to set.");
free(controller);
goto finish;
}
}
for (;;) {
MountPoint p;
char *controller, *where, *options;
char ***k = NULL;
controller = set_steal_first(controllers);
if (!controller)
break;
if (join_controllers)
for (k = join_controllers; *k; k++)
if (strv_find(*k, controller))
break;
if (k && *k) {
char **i, **j;
for (i = *k, j = *k; *i; i++) {
if (!streq(*i, controller)) {
char *t;
t = set_remove(controllers, *i);
if (!t) {
free(*i);
continue;
}
free(t);
}
*(j++) = *i;
}
*j = NULL;
options = strv_join(*k, ",");
if (!options) {
log_error("Failed to join options");
free(controller);
r = -ENOMEM;
goto finish;
}
} else {
options = controller;
controller = NULL;
}
where = strappend("/sys/fs/cgroup/", options);
if (!where) {
log_error("Failed to build path");
free(options);
r = -ENOMEM;
goto finish;
}
@ -200,7 +268,7 @@ static int mount_cgroup_controllers(void) {
p.what = "cgroup";
p.where = where;
p.type = "cgroup";
p.options = controller;
p.options = options;
p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
p.fatal = false;
@ -208,13 +276,45 @@ static int mount_cgroup_controllers(void) {
free(controller);
free(where);
if (r < 0)
if (r < 0) {
free(options);
goto finish;
}
if (r > 0 && k && *k) {
char **i;
for (i = *k; *i; i++) {
char *t;
t = strappend("/sys/fs/cgroup/", *i);
if (!t) {
log_error("Failed to build path");
r = -ENOMEM;
free(options);
goto finish;
}
r = symlink(options, t);
free(t);
if (r < 0 && errno != EEXIST) {
log_error("Failed to create symlink: %m");
r = -errno;
free(options);
goto finish;
}
}
}
free(options);
}
r = 0;
finish:
set_free_free(controllers);
fclose(f);
return r;
@ -301,5 +401,5 @@ int mount_setup(bool loaded_policy) {
mkdir("/run/systemd", 0755);
mkdir("/run/systemd/system", 0755);
return mount_cgroup_controllers();
return 0;
}

View File

@ -28,6 +28,8 @@ int mount_setup_early(void);
int mount_setup(bool loaded_policy);
int mount_cgroup_controllers(char ***join_controllers);
bool mount_point_is_api(const char *path);
bool mount_point_ignore(const char *path);

View File

@ -167,7 +167,7 @@ static int mount_all(const char *dest) {
break;
}
if ((t = path_is_mount_point(where)) < 0) {
if ((t = path_is_mount_point(where, false)) < 0) {
log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
free(where);

View File

@ -660,3 +660,16 @@ char **strv_parse_nulstr(const char *s, size_t l) {
return v;
}
bool strv_overlap(char **a, char **b) {
char **i, **j;
STRV_FOREACH(i, a) {
STRV_FOREACH(j, b) {
if (streq(*i, *j))
return true;
}
}
return false;
}

View File

@ -68,6 +68,8 @@ char **strv_env_clean(char **l);
char **strv_parse_nulstr(const char *s, size_t l);
bool strv_overlap(char **a, char **b);
#define STRV_FOREACH(s, l) \
for ((s) = (l); (s) && *(s); (s)++)

View File

@ -23,3 +23,4 @@
#DefaultControllers=cpu
#DefaultStandardOutput=inherit
#DefaultStandardError=inherit
#JoinControllers=cpu,cpuacct

View File

@ -2895,19 +2895,25 @@ ssize_t loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) {
return n;
}
int path_is_mount_point(const char *t) {
int path_is_mount_point(const char *t, bool allow_symlink) {
struct stat a, b;
char *parent;
int r;
if (lstat(t, &a) < 0) {
if (allow_symlink)
r = stat(t, &a);
else
r = lstat(t, &a);
if (r < 0) {
if (errno == ENOENT)
return 0;
return -errno;
}
if ((r = parent_of_path(t, &parent)) < 0)
r = parent_of_path(t, &parent);
if (r < 0)
return r;
r = lstat(parent, &b);

View File

@ -341,7 +341,7 @@ int fopen_temporary(const char *path, FILE **_f, char **_temp_path);
ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll);
ssize_t loop_write(int fd, const void *buf, size_t nbytes, bool do_poll);
int path_is_mount_point(const char *path);
int path_is_mount_point(const char *path, bool allow_symlink);
bool is_device_path(const char *path);