2010-06-21 23:27:18 +02:00
|
|
|
/***
|
|
|
|
This file is part of systemd.
|
|
|
|
|
|
|
|
Copyright 2010 Lennart Poettering
|
|
|
|
|
|
|
|
systemd is free software; you can redistribute it and/or modify it
|
2012-04-12 00:20:58 +02:00
|
|
|
under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or
|
2010-06-21 23:27:18 +02:00
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
systemd is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
2012-04-12 00:20:58 +02:00
|
|
|
Lesser General Public License for more details.
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2012-04-12 00:20:58 +02:00
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
2010-06-21 23:27:18 +02:00
|
|
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
***/
|
|
|
|
|
2015-10-22 20:12:31 +02:00
|
|
|
#include <dirent.h>
|
2010-06-21 23:27:18 +02:00
|
|
|
#include <errno.h>
|
2015-10-22 20:12:31 +02:00
|
|
|
#include <ftw.h>
|
2015-11-30 21:43:37 +01:00
|
|
|
#include <limits.h>
|
2010-06-21 23:27:18 +02:00
|
|
|
#include <signal.h>
|
2015-11-30 21:43:37 +01:00
|
|
|
#include <stddef.h>
|
2010-06-21 23:27:18 +02:00
|
|
|
#include <stdlib.h>
|
2015-10-22 20:12:31 +02:00
|
|
|
#include <string.h>
|
2010-07-21 04:32:44 +02:00
|
|
|
#include <sys/stat.h>
|
2015-11-30 21:43:37 +01:00
|
|
|
#include <sys/statfs.h>
|
2010-07-21 04:32:44 +02:00
|
|
|
#include <sys/types.h>
|
2016-08-30 23:18:46 +02:00
|
|
|
#include <sys/xattr.h>
|
2015-10-22 20:12:31 +02:00
|
|
|
#include <unistd.h>
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2015-10-27 03:01:06 +01:00
|
|
|
#include "alloc-util.h"
|
2015-10-25 13:14:12 +01:00
|
|
|
#include "cgroup-util.h"
|
2015-12-01 23:22:03 +01:00
|
|
|
#include "def.h"
|
2015-10-26 20:07:55 +01:00
|
|
|
#include "dirent-util.h"
|
2015-10-22 20:12:31 +02:00
|
|
|
#include "extract-word.h"
|
2015-10-25 13:14:12 +01:00
|
|
|
#include "fd-util.h"
|
2015-10-22 20:12:31 +02:00
|
|
|
#include "fileio.h"
|
2016-11-07 16:14:59 +01:00
|
|
|
#include "format-util.h"
|
2015-10-26 21:16:26 +01:00
|
|
|
#include "fs-util.h"
|
2015-12-01 23:22:03 +01:00
|
|
|
#include "log.h"
|
2015-10-22 20:12:31 +02:00
|
|
|
#include "login-util.h"
|
|
|
|
#include "macro.h"
|
2015-12-01 23:22:03 +01:00
|
|
|
#include "missing.h"
|
2015-10-22 20:12:31 +02:00
|
|
|
#include "mkdir.h"
|
2015-10-26 16:18:16 +01:00
|
|
|
#include "parse-util.h"
|
2012-05-07 21:36:12 +02:00
|
|
|
#include "path-util.h"
|
2015-10-27 01:26:52 +01:00
|
|
|
#include "proc-cmdline.h"
|
2015-10-22 20:12:31 +02:00
|
|
|
#include "process-util.h"
|
|
|
|
#include "set.h"
|
2013-06-20 03:45:08 +02:00
|
|
|
#include "special.h"
|
2015-10-27 01:26:52 +01:00
|
|
|
#include "stat-util.h"
|
2016-01-12 15:34:20 +01:00
|
|
|
#include "stdio-util.h"
|
2015-10-26 22:31:05 +01:00
|
|
|
#include "string-table.h"
|
2015-10-24 22:58:24 +02:00
|
|
|
#include "string-util.h"
|
2015-10-22 20:12:31 +02:00
|
|
|
#include "unit-name.h"
|
2015-10-25 22:32:30 +01:00
|
|
|
#include "user-util.h"
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2010-07-11 00:50:49 +02:00
|
|
|
int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_free_ char *fs = NULL;
|
2010-07-11 00:50:49 +02:00
|
|
|
FILE *f;
|
2013-04-16 04:36:06 +02:00
|
|
|
int r;
|
2010-07-11 00:50:49 +02:00
|
|
|
|
|
|
|
assert(_f);
|
|
|
|
|
2012-04-16 17:35:58 +02:00
|
|
|
r = cg_get_path(controller, path, "cgroup.procs", &fs);
|
|
|
|
if (r < 0)
|
2010-07-11 00:50:49 +02:00
|
|
|
return r;
|
|
|
|
|
|
|
|
f = fopen(fs, "re");
|
|
|
|
if (!f)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
*_f = f;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_read_pid(FILE *f, pid_t *_pid) {
|
|
|
|
unsigned long ul;
|
|
|
|
|
|
|
|
/* Note that the cgroup.procs might contain duplicates! See
|
|
|
|
* cgroups.txt for details. */
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
assert(f);
|
|
|
|
assert(_pid);
|
|
|
|
|
2010-07-11 00:50:49 +02:00
|
|
|
errno = 0;
|
|
|
|
if (fscanf(f, "%lu", &ul) != 1) {
|
|
|
|
|
|
|
|
if (feof(f))
|
|
|
|
return 0;
|
|
|
|
|
2016-01-11 18:47:14 +01:00
|
|
|
return errno > 0 ? -errno : -EIO;
|
2010-07-11 00:50:49 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ul <= 0)
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
*_pid = (pid_t) ul;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2016-03-25 16:38:50 +01:00
|
|
|
int cg_read_event(const char *controller, const char *path, const char *event,
|
|
|
|
char **val)
|
|
|
|
{
|
|
|
|
_cleanup_free_ char *events = NULL, *content = NULL;
|
|
|
|
char *p, *line;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = cg_get_path(controller, path, "cgroup.events", &events);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
r = read_full_file(events, &content, NULL);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
p = content;
|
|
|
|
while ((line = strsep(&p, "\n"))) {
|
|
|
|
char *key;
|
|
|
|
|
|
|
|
key = strsep(&line, " ");
|
|
|
|
if (!key || !line)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (strcmp(key, event))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
*val = strdup(line);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2016-06-23 11:52:45 +02:00
|
|
|
bool cg_ns_supported(void) {
|
|
|
|
static thread_local int enabled = -1;
|
|
|
|
|
|
|
|
if (enabled >= 0)
|
|
|
|
return enabled;
|
|
|
|
|
|
|
|
if (access("/proc/self/ns/cgroup", F_OK) == 0)
|
|
|
|
enabled = 1;
|
|
|
|
else
|
|
|
|
enabled = 0;
|
|
|
|
|
|
|
|
return enabled;
|
|
|
|
}
|
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_free_ char *fs = NULL;
|
2010-07-12 18:16:44 +02:00
|
|
|
int r;
|
|
|
|
DIR *d;
|
|
|
|
|
|
|
|
assert(_d);
|
|
|
|
|
|
|
|
/* This is not recursive! */
|
|
|
|
|
2012-04-16 17:35:58 +02:00
|
|
|
r = cg_get_path(controller, path, NULL, &fs);
|
|
|
|
if (r < 0)
|
2010-07-12 18:16:44 +02:00
|
|
|
return r;
|
|
|
|
|
|
|
|
d = opendir(fs);
|
|
|
|
if (!d)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
*_d = d;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_read_subgroup(DIR *d, char **fn) {
|
|
|
|
struct dirent *de;
|
|
|
|
|
|
|
|
assert(d);
|
2013-04-16 04:36:06 +02:00
|
|
|
assert(fn);
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2015-09-01 17:54:17 +02:00
|
|
|
FOREACH_DIRENT_ALL(de, d, return -errno) {
|
2010-07-12 18:16:44 +02:00
|
|
|
char *b;
|
|
|
|
|
|
|
|
if (de->d_type != DT_DIR)
|
|
|
|
continue;
|
|
|
|
|
2017-02-02 00:06:18 +01:00
|
|
|
if (dot_or_dot_dot(de->d_name))
|
2010-07-12 18:16:44 +02:00
|
|
|
continue;
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
b = strdup(de->d_name);
|
|
|
|
if (!b)
|
2010-07-12 18:16:44 +02:00
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
*fn = b;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
int cg_rmdir(const char *controller, const char *path) {
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_free_ char *p = NULL;
|
2010-07-12 18:16:44 +02:00
|
|
|
int r;
|
|
|
|
|
2011-08-21 20:05:51 +02:00
|
|
|
r = cg_get_path(controller, path, NULL, &p);
|
|
|
|
if (r < 0)
|
2010-07-12 18:16:44 +02:00
|
|
|
return r;
|
|
|
|
|
|
|
|
r = rmdir(p);
|
2013-04-16 04:36:06 +02:00
|
|
|
if (r < 0 && errno != ENOENT)
|
|
|
|
return -errno;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
|
|
|
|
r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
|
|
|
|
if (r < 0)
|
|
|
|
log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
|
|
|
|
}
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
return 0;
|
2010-07-12 18:16:44 +02:00
|
|
|
}
|
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
int cg_kill(
|
|
|
|
const char *controller,
|
|
|
|
const char *path,
|
|
|
|
int sig,
|
|
|
|
CGroupFlags flags,
|
|
|
|
Set *s,
|
|
|
|
cg_kill_log_func_t log_kill,
|
|
|
|
void *userdata) {
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_set_free_ Set *allocated_set = NULL;
|
2010-07-12 18:16:44 +02:00
|
|
|
bool done = false;
|
2010-06-21 23:27:18 +02:00
|
|
|
int r, ret = 0;
|
2010-07-12 18:16:44 +02:00
|
|
|
pid_t my_pid;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
assert(sig >= 0);
|
|
|
|
|
2016-07-20 11:16:53 +02:00
|
|
|
/* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
|
|
|
|
* SIGCONT on SIGKILL. */
|
|
|
|
if (IN_SET(sig, SIGCONT, SIGKILL))
|
|
|
|
flags &= ~CGROUP_SIGCONT;
|
|
|
|
|
2010-06-21 23:27:18 +02:00
|
|
|
/* This goes through the tasks list and kills them all. This
|
|
|
|
* is repeated until no further processes are added to the
|
|
|
|
* tasks list, to properly handle forking processes */
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
if (!s) {
|
2014-08-13 01:00:18 +02:00
|
|
|
s = allocated_set = set_new(NULL);
|
2013-04-16 04:36:06 +02:00
|
|
|
if (!s)
|
2010-08-31 23:24:47 +02:00
|
|
|
return -ENOMEM;
|
2013-04-16 04:36:06 +02:00
|
|
|
}
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
my_pid = getpid();
|
|
|
|
|
|
|
|
do {
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_fclose_ FILE *f = NULL;
|
2011-01-22 01:47:37 +01:00
|
|
|
pid_t pid = 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
done = true;
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
r = cg_enumerate_processes(controller, path, &f);
|
|
|
|
if (r < 0) {
|
2010-07-13 19:00:01 +02:00
|
|
|
if (ret >= 0 && r != -ENOENT)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
return ret;
|
2010-07-12 18:16:44 +02:00
|
|
|
}
|
2010-07-11 00:50:49 +02:00
|
|
|
|
|
|
|
while ((r = cg_read_pid(f, &pid)) > 0) {
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
|
2010-07-11 00:50:49 +02:00
|
|
|
continue;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2015-09-03 13:22:51 +02:00
|
|
|
if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
|
2010-07-11 00:50:49 +02:00
|
|
|
continue;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
if (log_kill)
|
|
|
|
log_kill(pid, sig, userdata);
|
|
|
|
|
2010-06-21 23:27:18 +02:00
|
|
|
/* If we haven't killed this process yet, kill
|
|
|
|
* it */
|
2010-07-13 19:00:01 +02:00
|
|
|
if (kill(pid, sig) < 0) {
|
|
|
|
if (ret >= 0 && errno != ESRCH)
|
2010-06-21 23:27:18 +02:00
|
|
|
ret = -errno;
|
2014-02-06 19:27:59 +01:00
|
|
|
} else {
|
2016-07-20 11:16:05 +02:00
|
|
|
if (flags & CGROUP_SIGCONT)
|
2015-09-01 18:02:43 +02:00
|
|
|
(void) kill(pid, SIGCONT);
|
2011-03-03 23:55:30 +01:00
|
|
|
|
2014-02-06 19:27:59 +01:00
|
|
|
if (ret == 0)
|
|
|
|
ret = 1;
|
2011-03-03 23:55:30 +01:00
|
|
|
}
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
done = false;
|
|
|
|
|
2015-09-03 13:22:51 +02:00
|
|
|
r = set_put(s, PID_TO_PTR(pid));
|
2013-04-16 04:36:06 +02:00
|
|
|
if (r < 0) {
|
2010-07-12 18:16:44 +02:00
|
|
|
if (ret >= 0)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
return ret;
|
2010-07-12 18:16:44 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (r < 0) {
|
|
|
|
if (ret >= 0)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
return ret;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* To avoid racing against processes which fork
|
|
|
|
* quicker than we can kill them we repeat this until
|
|
|
|
* no new pids need to be killed. */
|
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
} while (!done);
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
return ret;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
int cg_kill_recursive(
|
|
|
|
const char *controller,
|
|
|
|
const char *path,
|
|
|
|
int sig,
|
|
|
|
CGroupFlags flags,
|
|
|
|
Set *s,
|
|
|
|
cg_kill_log_func_t log_kill,
|
|
|
|
void *userdata) {
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_set_free_ Set *allocated_set = NULL;
|
|
|
|
_cleanup_closedir_ DIR *d = NULL;
|
2015-09-01 18:02:43 +02:00
|
|
|
int r, ret;
|
2010-07-12 18:16:44 +02:00
|
|
|
char *fn;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
assert(path);
|
|
|
|
assert(sig >= 0);
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
if (!s) {
|
2014-08-13 01:00:18 +02:00
|
|
|
s = allocated_set = set_new(NULL);
|
2013-04-16 04:36:06 +02:00
|
|
|
if (!s)
|
2010-08-31 23:24:47 +02:00
|
|
|
return -ENOMEM;
|
2013-04-16 04:36:06 +02:00
|
|
|
}
|
2010-08-31 23:24:47 +02:00
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
r = cg_enumerate_subgroups(controller, path, &d);
|
|
|
|
if (r < 0) {
|
2010-07-13 19:00:01 +02:00
|
|
|
if (ret >= 0 && r != -ENOENT)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
return ret;
|
2010-07-12 18:16:44 +02:00
|
|
|
}
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
while ((r = cg_read_subgroup(d, &fn)) > 0) {
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_free_ char *p = NULL;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-10-23 17:43:27 +02:00
|
|
|
p = strjoin(path, "/", fn);
|
2010-07-12 18:16:44 +02:00
|
|
|
free(fn);
|
2013-04-16 04:36:06 +02:00
|
|
|
if (!p)
|
|
|
|
return -ENOMEM;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
|
2015-09-01 18:02:43 +02:00
|
|
|
if (r != 0 && ret >= 0)
|
2010-07-12 18:16:44 +02:00
|
|
|
ret = r;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
2013-04-16 04:36:06 +02:00
|
|
|
if (ret >= 0 && r < 0)
|
2010-07-12 18:16:44 +02:00
|
|
|
ret = r;
|
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
if (flags & CGROUP_REMOVE) {
|
2013-06-27 04:14:27 +02:00
|
|
|
r = cg_rmdir(controller, path);
|
2013-04-16 04:36:06 +02:00
|
|
|
if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
|
|
|
|
return r;
|
|
|
|
}
|
2010-08-31 23:24:47 +02:00
|
|
|
|
2010-06-21 23:27:18 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
int cg_migrate(
|
|
|
|
const char *cfrom,
|
|
|
|
const char *pfrom,
|
|
|
|
const char *cto,
|
|
|
|
const char *pto,
|
|
|
|
CGroupFlags flags) {
|
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
bool done = false;
|
2013-01-12 04:24:12 +01:00
|
|
|
_cleanup_set_free_ Set *s = NULL;
|
2010-06-21 23:27:18 +02:00
|
|
|
int r, ret = 0;
|
|
|
|
pid_t my_pid;
|
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
assert(cfrom);
|
|
|
|
assert(pfrom);
|
|
|
|
assert(cto);
|
|
|
|
assert(pto);
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2014-08-13 01:00:18 +02:00
|
|
|
s = set_new(NULL);
|
2013-01-12 04:24:12 +01:00
|
|
|
if (!s)
|
2010-07-12 18:16:44 +02:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2010-06-21 23:27:18 +02:00
|
|
|
my_pid = getpid();
|
|
|
|
|
|
|
|
do {
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_fclose_ FILE *f = NULL;
|
2011-01-22 01:47:37 +01:00
|
|
|
pid_t pid = 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
done = true;
|
|
|
|
|
2013-06-06 15:49:01 +02:00
|
|
|
r = cg_enumerate_processes(cfrom, pfrom, &f);
|
2013-01-12 04:24:12 +01:00
|
|
|
if (r < 0) {
|
2010-07-13 19:00:01 +02:00
|
|
|
if (ret >= 0 && r != -ENOENT)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
return ret;
|
2010-07-12 18:16:44 +02:00
|
|
|
}
|
2010-07-11 00:50:49 +02:00
|
|
|
|
|
|
|
while ((r = cg_read_pid(f, &pid)) > 0) {
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
/* This might do weird stuff if we aren't a
|
|
|
|
* single-threaded program. However, we
|
|
|
|
* luckily know we are not */
|
2016-07-20 11:16:05 +02:00
|
|
|
if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
|
2010-07-11 00:50:49 +02:00
|
|
|
continue;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2015-09-03 13:22:51 +02:00
|
|
|
if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
|
2010-07-12 18:16:44 +02:00
|
|
|
continue;
|
|
|
|
|
2015-09-01 17:53:14 +02:00
|
|
|
/* Ignore kernel threads. Since they can only
|
|
|
|
* exist in the root cgroup, we only check for
|
|
|
|
* them there. */
|
|
|
|
if (cfrom &&
|
|
|
|
(isempty(pfrom) || path_equal(pfrom, "/")) &&
|
|
|
|
is_kernel_thread(pid) > 0)
|
|
|
|
continue;
|
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
r = cg_attach(cto, pto, pid);
|
|
|
|
if (r < 0) {
|
2010-07-13 19:00:01 +02:00
|
|
|
if (ret >= 0 && r != -ESRCH)
|
2010-07-12 18:16:44 +02:00
|
|
|
ret = r;
|
|
|
|
} else if (ret == 0)
|
|
|
|
ret = 1;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
done = false;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2015-09-03 13:22:51 +02:00
|
|
|
r = set_put(s, PID_TO_PTR(pid));
|
2013-01-12 04:24:12 +01:00
|
|
|
if (r < 0) {
|
2010-07-12 18:16:44 +02:00
|
|
|
if (ret >= 0)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
return ret;
|
2010-07-12 18:16:44 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (r < 0) {
|
|
|
|
if (ret >= 0)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
return ret;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
2010-07-12 18:16:44 +02:00
|
|
|
} while (!done);
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
return ret;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
int cg_migrate_recursive(
|
|
|
|
const char *cfrom,
|
|
|
|
const char *pfrom,
|
|
|
|
const char *cto,
|
|
|
|
const char *pto,
|
2016-07-20 11:16:05 +02:00
|
|
|
CGroupFlags flags) {
|
2013-06-27 04:14:27 +02:00
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
_cleanup_closedir_ DIR *d = NULL;
|
2013-04-16 04:36:06 +02:00
|
|
|
int r, ret = 0;
|
2010-07-12 18:16:44 +02:00
|
|
|
char *fn;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
assert(cfrom);
|
|
|
|
assert(pfrom);
|
|
|
|
assert(cto);
|
|
|
|
assert(pto);
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
r = cg_enumerate_subgroups(cfrom, pfrom, &d);
|
|
|
|
if (r < 0) {
|
2010-07-13 19:00:01 +02:00
|
|
|
if (ret >= 0 && r != -ENOENT)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
return ret;
|
2010-07-12 18:16:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
while ((r = cg_read_subgroup(d, &fn)) > 0) {
|
2013-01-12 04:24:12 +01:00
|
|
|
_cleanup_free_ char *p = NULL;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-10-23 17:43:27 +02:00
|
|
|
p = strjoin(pfrom, "/", fn);
|
2010-07-12 18:16:44 +02:00
|
|
|
free(fn);
|
2015-09-01 18:02:43 +02:00
|
|
|
if (!p)
|
|
|
|
return -ENOMEM;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
|
2010-07-12 18:16:44 +02:00
|
|
|
if (r != 0 && ret >= 0)
|
|
|
|
ret = r;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
if (r < 0 && ret >= 0)
|
|
|
|
ret = r;
|
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
if (flags & CGROUP_REMOVE) {
|
2013-06-27 04:14:27 +02:00
|
|
|
r = cg_rmdir(cfrom, pfrom);
|
2013-01-12 04:24:12 +01:00
|
|
|
if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
|
|
|
|
return r;
|
|
|
|
}
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-09-24 04:56:05 +02:00
|
|
|
int cg_migrate_recursive_fallback(
|
|
|
|
const char *cfrom,
|
|
|
|
const char *pfrom,
|
|
|
|
const char *cto,
|
|
|
|
const char *pto,
|
2016-07-20 11:16:05 +02:00
|
|
|
CGroupFlags flags) {
|
2013-09-24 04:56:05 +02:00
|
|
|
|
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(cfrom);
|
|
|
|
assert(pfrom);
|
|
|
|
assert(cto);
|
|
|
|
assert(pto);
|
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
|
2013-09-24 04:56:05 +02:00
|
|
|
if (r < 0) {
|
|
|
|
char prefix[strlen(pto) + 1];
|
|
|
|
|
|
|
|
/* This didn't work? Then let's try all prefixes of the destination */
|
|
|
|
|
2013-09-25 20:58:23 +02:00
|
|
|
PATH_FOREACH_PREFIX(prefix, pto) {
|
2015-09-01 18:02:43 +02:00
|
|
|
int q;
|
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
|
2015-09-01 18:02:43 +02:00
|
|
|
if (q >= 0)
|
|
|
|
return q;
|
2013-09-24 04:56:05 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-01 18:02:43 +02:00
|
|
|
return r;
|
2013-09-24 04:56:05 +02:00
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
static const char *controller_to_dirname(const char *controller) {
|
|
|
|
const char *e;
|
2012-04-14 02:15:13 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
assert(controller);
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
/* Converts a controller name to the directory name below
|
|
|
|
* /sys/fs/cgroup/ we want to mount it to. Effectively, this
|
|
|
|
* just cuts off the name= prefixed used for named
|
|
|
|
* hierarchies, if it is specified. */
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
|
|
|
|
if (cg_hybrid_unified())
|
|
|
|
controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
|
|
|
|
else
|
|
|
|
controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
|
|
|
|
}
|
2016-11-21 20:45:53 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
e = startswith(controller, "name=");
|
|
|
|
if (e)
|
|
|
|
return e;
|
|
|
|
|
|
|
|
return controller;
|
2012-04-14 02:15:13 +02:00
|
|
|
}
|
|
|
|
|
2015-09-03 14:56:26 +02:00
|
|
|
static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
|
|
|
|
const char *dn;
|
2012-04-17 18:42:09 +02:00
|
|
|
char *t = NULL;
|
2012-04-14 02:15:13 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
assert(fs);
|
2015-09-03 14:56:26 +02:00
|
|
|
assert(controller);
|
|
|
|
|
|
|
|
dn = controller_to_dirname(controller);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
if (isempty(path) && isempty(suffix))
|
2015-09-03 14:56:26 +02:00
|
|
|
t = strappend("/sys/fs/cgroup/", dn);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
else if (isempty(path))
|
2016-10-23 17:43:27 +02:00
|
|
|
t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
else if (isempty(suffix))
|
2016-10-23 17:43:27 +02:00
|
|
|
t = strjoin("/sys/fs/cgroup/", dn, "/", path);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
else
|
2016-10-23 17:43:27 +02:00
|
|
|
t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!t)
|
|
|
|
return -ENOMEM;
|
2012-04-14 02:15:13 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
*fs = t;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int join_path_unified(const char *path, const char *suffix, char **fs) {
|
|
|
|
char *t;
|
|
|
|
|
|
|
|
assert(fs);
|
|
|
|
|
|
|
|
if (isempty(path) && isempty(suffix))
|
|
|
|
t = strdup("/sys/fs/cgroup");
|
|
|
|
else if (isempty(path))
|
|
|
|
t = strappend("/sys/fs/cgroup/", suffix);
|
|
|
|
else if (isempty(suffix))
|
|
|
|
t = strappend("/sys/fs/cgroup/", path);
|
|
|
|
else
|
2016-10-23 17:43:27 +02:00
|
|
|
t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
|
2012-04-14 02:15:13 +02:00
|
|
|
if (!t)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
*fs = t;
|
2012-04-14 02:15:13 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-06-21 23:27:18 +02:00
|
|
|
int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2010-07-12 03:15:20 +02:00
|
|
|
assert(fs);
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!controller) {
|
|
|
|
char *t;
|
|
|
|
|
2015-09-03 14:56:26 +02:00
|
|
|
/* If no controller is specified, we return the path
|
|
|
|
* *below* the controllers, without any prefix. */
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
if (!path && !suffix)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2015-09-03 19:43:15 +02:00
|
|
|
if (!suffix)
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
t = strdup(path);
|
2015-09-03 19:43:15 +02:00
|
|
|
else if (!path)
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
t = strdup(suffix);
|
|
|
|
else
|
2016-10-23 17:43:27 +02:00
|
|
|
t = strjoin(path, "/", suffix);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!t)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
*fs = path_kill_slashes(t);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!cg_controller_is_valid(controller))
|
2013-04-25 00:01:29 +02:00
|
|
|
return -EINVAL;
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_all_unified())
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
r = join_path_unified(path, suffix, fs);
|
2015-09-03 14:56:26 +02:00
|
|
|
else
|
|
|
|
r = join_path_legacy(controller, path, suffix, fs);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2013-04-16 04:36:06 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
path_kill_slashes(*fs);
|
|
|
|
return 0;
|
2012-04-14 02:15:13 +02:00
|
|
|
}
|
2010-07-12 03:15:20 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
static int controller_is_accessible(const char *controller) {
|
2012-05-03 23:23:38 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
assert(controller);
|
2012-05-03 23:23:38 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
/* Checks whether a specific controller is accessible,
|
|
|
|
* i.e. its hierarchy mounted. In the unified hierarchy all
|
|
|
|
* controllers are considered accessible, except for the named
|
|
|
|
* hierarchies */
|
2014-12-30 01:57:23 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!cg_controller_is_valid(controller))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_all_unified()) {
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
/* We don't support named hierarchies if we are using
|
|
|
|
* the unified hierarchy. */
|
|
|
|
|
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (startswith(controller, "name="))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
const char *cc, *dn;
|
|
|
|
|
|
|
|
dn = controller_to_dirname(controller);
|
|
|
|
cc = strjoina("/sys/fs/cgroup/", dn);
|
|
|
|
|
|
|
|
if (laccess(cc, F_OK) < 0)
|
|
|
|
return -errno;
|
|
|
|
}
|
2012-05-03 23:23:38 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-04-14 02:15:13 +02:00
|
|
|
int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
|
2012-05-03 23:23:38 +02:00
|
|
|
int r;
|
2010-07-12 03:15:20 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
assert(controller);
|
2012-04-14 02:15:13 +02:00
|
|
|
assert(fs);
|
2011-08-01 01:55:31 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
/* Check if the specified controller is actually accessible */
|
|
|
|
r = controller_is_accessible(controller);
|
2012-05-03 23:23:38 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2012-04-14 02:15:13 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return cg_get_path(controller, path, suffix, fs);
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2011-08-21 21:00:41 +02:00
|
|
|
static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
|
2013-06-27 04:14:27 +02:00
|
|
|
assert(path);
|
|
|
|
assert(sb);
|
|
|
|
assert(ftwbuf);
|
2011-08-21 21:00:41 +02:00
|
|
|
|
|
|
|
if (typeflag != FTW_DP)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (ftwbuf->level < 1)
|
|
|
|
return 0;
|
|
|
|
|
2015-09-01 18:02:43 +02:00
|
|
|
(void) rmdir(path);
|
2011-08-21 21:00:41 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-06-21 23:27:18 +02:00
|
|
|
int cg_trim(const char *controller, const char *path, bool delete_root) {
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_free_ char *fs = NULL;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r = 0, q;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
assert(path);
|
|
|
|
|
2011-08-21 21:00:41 +02:00
|
|
|
r = cg_get_path(controller, path, NULL, &fs);
|
|
|
|
if (r < 0)
|
2010-06-21 23:27:18 +02:00
|
|
|
return r;
|
|
|
|
|
2011-08-21 21:00:41 +02:00
|
|
|
errno = 0;
|
2015-09-01 18:02:43 +02:00
|
|
|
if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
|
|
|
|
if (errno == ENOENT)
|
|
|
|
r = 0;
|
2016-01-11 20:31:14 +01:00
|
|
|
else if (errno > 0)
|
2015-09-01 18:02:43 +02:00
|
|
|
r = -errno;
|
|
|
|
else
|
|
|
|
r = -EIO;
|
|
|
|
}
|
2011-08-21 21:00:41 +02:00
|
|
|
|
|
|
|
if (delete_root) {
|
2013-06-27 04:14:27 +02:00
|
|
|
if (rmdir(fs) < 0 && errno != ENOENT)
|
|
|
|
return -errno;
|
2011-08-21 21:00:41 +02:00
|
|
|
}
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
|
|
|
|
q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
|
|
|
|
if (q < 0)
|
|
|
|
log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
|
|
|
|
}
|
|
|
|
|
2011-08-21 21:00:41 +02:00
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2013-10-02 04:54:07 +02:00
|
|
|
int cg_create(const char *controller, const char *path) {
|
|
|
|
_cleanup_free_ char *fs = NULL;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = cg_get_path_and_check(controller, path, NULL, &fs);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
r = mkdir_parents(fs, 0755);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
if (mkdir(fs, 0755) < 0) {
|
|
|
|
|
|
|
|
if (errno == EEXIST)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
|
|
|
|
r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
|
|
|
|
if (r < 0)
|
|
|
|
log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
|
|
|
|
}
|
|
|
|
|
2013-10-02 04:54:07 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
|
|
|
|
int r, q;
|
|
|
|
|
|
|
|
assert(pid >= 0);
|
|
|
|
|
|
|
|
r = cg_create(controller, path);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
q = cg_attach(controller, path, pid);
|
|
|
|
if (q < 0)
|
|
|
|
return q;
|
|
|
|
|
|
|
|
/* This does not remove the cgroup on failure */
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2010-06-21 23:27:18 +02:00
|
|
|
int cg_attach(const char *controller, const char *path, pid_t pid) {
|
2013-04-02 20:31:42 +02:00
|
|
|
_cleanup_free_ char *fs = NULL;
|
|
|
|
char c[DECIMAL_STR_MAX(pid_t) + 2];
|
2010-06-21 23:27:18 +02:00
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(path);
|
|
|
|
assert(pid >= 0);
|
|
|
|
|
2013-06-06 15:49:01 +02:00
|
|
|
r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
|
2012-04-14 02:15:13 +02:00
|
|
|
if (r < 0)
|
2010-07-11 00:50:49 +02:00
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
if (pid == 0)
|
|
|
|
pid = getpid();
|
|
|
|
|
2016-01-12 15:34:20 +01:00
|
|
|
xsprintf(c, PID_FMT "\n", pid);
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
r = write_string_file(fs, c, 0);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
|
|
|
|
r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
|
|
|
|
if (r < 0)
|
|
|
|
log_warning_errno(r, "Failed to attach %d to compat systemd cgroup %s: %m", pid, path);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2013-09-24 04:56:05 +02:00
|
|
|
int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
|
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(controller);
|
|
|
|
assert(path);
|
|
|
|
assert(pid >= 0);
|
|
|
|
|
|
|
|
r = cg_attach(controller, path, pid);
|
|
|
|
if (r < 0) {
|
|
|
|
char prefix[strlen(path) + 1];
|
|
|
|
|
|
|
|
/* This didn't work? Then let's try all prefixes of
|
|
|
|
* the destination */
|
|
|
|
|
2013-09-25 20:58:23 +02:00
|
|
|
PATH_FOREACH_PREFIX(prefix, path) {
|
2015-09-01 18:02:43 +02:00
|
|
|
int q;
|
|
|
|
|
|
|
|
q = cg_attach(controller, prefix, pid);
|
|
|
|
if (q >= 0)
|
|
|
|
return q;
|
2013-09-24 04:56:05 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-01 18:02:43 +02:00
|
|
|
return r;
|
2013-09-24 04:56:05 +02:00
|
|
|
}
|
|
|
|
|
2013-04-08 19:42:58 +02:00
|
|
|
int cg_set_group_access(
|
|
|
|
const char *controller,
|
|
|
|
const char *path,
|
|
|
|
mode_t mode,
|
|
|
|
uid_t uid,
|
|
|
|
gid_t gid) {
|
|
|
|
|
2013-04-02 20:31:42 +02:00
|
|
|
_cleanup_free_ char *fs = NULL;
|
2010-06-21 23:27:18 +02:00
|
|
|
int r;
|
|
|
|
|
2015-09-01 18:02:43 +02:00
|
|
|
if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
|
|
|
|
return 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2014-11-28 20:51:01 +01:00
|
|
|
if (mode != MODE_INVALID)
|
2012-01-18 15:40:21 +01:00
|
|
|
mode &= 0777;
|
|
|
|
|
|
|
|
r = cg_get_path(controller, path, NULL, &fs);
|
|
|
|
if (r < 0)
|
2010-06-21 23:27:18 +02:00
|
|
|
return r;
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
r = chmod_and_chown(fs, mode, uid, gid);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
|
|
|
|
r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
|
|
|
|
if (r < 0)
|
|
|
|
log_warning_errno(r, "Failed to set group access on compat systemd cgroup %s: %m", path);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2013-04-08 18:22:47 +02:00
|
|
|
int cg_set_task_access(
|
|
|
|
const char *controller,
|
|
|
|
const char *path,
|
|
|
|
mode_t mode,
|
|
|
|
uid_t uid,
|
2013-06-27 04:14:27 +02:00
|
|
|
gid_t gid) {
|
2013-04-08 18:22:47 +02:00
|
|
|
|
|
|
|
_cleanup_free_ char *fs = NULL, *procs = NULL;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
assert(path);
|
|
|
|
|
2014-11-28 20:51:01 +01:00
|
|
|
if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
|
2012-01-18 15:40:21 +01:00
|
|
|
return 0;
|
|
|
|
|
2014-11-28 20:51:01 +01:00
|
|
|
if (mode != MODE_INVALID)
|
2012-01-18 15:40:21 +01:00
|
|
|
mode &= 0666;
|
|
|
|
|
2013-06-06 15:49:01 +02:00
|
|
|
r = cg_get_path(controller, path, "cgroup.procs", &fs);
|
2012-01-18 15:40:21 +01:00
|
|
|
if (r < 0)
|
2010-06-21 23:27:18 +02:00
|
|
|
return r;
|
|
|
|
|
|
|
|
r = chmod_and_chown(fs, mode, uid, gid);
|
2013-04-08 18:22:47 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (!cg_unified(controller)) {
|
|
|
|
/* Compatibility, Always keep values for "tasks" in sync with
|
|
|
|
* "cgroup.procs" */
|
|
|
|
if (cg_get_path(controller, path, "tasks", &procs) >= 0)
|
|
|
|
(void) chmod_and_chown(procs, mode, uid, gid);
|
|
|
|
}
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
|
|
|
|
r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
|
|
|
|
if (r < 0)
|
|
|
|
log_warning_errno(r, "Failed to set task access on compat systemd cgroup %s: %m", path);
|
|
|
|
}
|
2013-04-08 18:22:47 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2016-08-30 23:18:46 +02:00
|
|
|
int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
|
|
|
|
_cleanup_free_ char *fs = NULL;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(path);
|
|
|
|
assert(name);
|
|
|
|
assert(value || size <= 0);
|
|
|
|
|
|
|
|
r = cg_get_path(controller, path, NULL, &fs);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
if (setxattr(fs, name, value, size, flags) < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
|
|
|
|
_cleanup_free_ char *fs = NULL;
|
|
|
|
ssize_t n;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(path);
|
|
|
|
assert(name);
|
|
|
|
|
|
|
|
r = cg_get_path(controller, path, NULL, &fs);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
n = getxattr(fs, name, value, size);
|
|
|
|
if (n < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
return (int) n;
|
|
|
|
}
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
|
|
|
|
_cleanup_fclose_ FILE *f = NULL;
|
|
|
|
char line[LINE_MAX];
|
2016-11-21 20:45:53 +01:00
|
|
|
const char *fs, *controller_str;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
size_t cs = 0;
|
2016-11-21 20:45:53 +01:00
|
|
|
bool unified;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
assert(path);
|
2010-07-11 00:50:49 +02:00
|
|
|
assert(pid >= 0);
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-08-16 00:13:36 +02:00
|
|
|
if (controller) {
|
|
|
|
if (!cg_controller_is_valid(controller))
|
|
|
|
return -EINVAL;
|
|
|
|
} else
|
|
|
|
controller = SYSTEMD_CGROUP_CONTROLLER;
|
|
|
|
|
|
|
|
unified = cg_unified(controller);
|
2016-11-21 20:45:53 +01:00
|
|
|
if (!unified) {
|
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
|
|
|
|
controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
|
|
|
|
else
|
|
|
|
controller_str = controller;
|
|
|
|
|
|
|
|
cs = strlen(controller_str);
|
|
|
|
}
|
2013-04-16 04:36:06 +02:00
|
|
|
|
2014-01-04 02:35:23 +01:00
|
|
|
fs = procfs_file_alloca(pid, "cgroup");
|
2010-07-11 00:50:49 +02:00
|
|
|
f = fopen(fs, "re");
|
2010-07-13 19:00:01 +02:00
|
|
|
if (!f)
|
|
|
|
return errno == ENOENT ? -ESRCH : -errno;
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
FOREACH_LINE(line, f, return -errno) {
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
char *e, *p;
|
2010-07-11 00:50:49 +02:00
|
|
|
|
|
|
|
truncate_nl(line);
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (unified) {
|
|
|
|
e = startswith(line, "0:");
|
|
|
|
if (!e)
|
|
|
|
continue;
|
2010-07-11 00:50:49 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
e = strchr(e, ':');
|
|
|
|
if (!e)
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
char *l;
|
|
|
|
size_t k;
|
|
|
|
const char *word, *state;
|
|
|
|
bool found = false;
|
|
|
|
|
|
|
|
l = strchr(line, ':');
|
|
|
|
if (!l)
|
|
|
|
continue;
|
2013-04-30 01:22:36 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
l++;
|
|
|
|
e = strchr(l, ':');
|
|
|
|
if (!e)
|
|
|
|
continue;
|
2013-04-30 01:22:36 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
*e = 0;
|
|
|
|
FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
|
2016-11-21 20:45:53 +01:00
|
|
|
if (k == cs && memcmp(word, controller_str, cs) == 0) {
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
2013-04-30 01:22:36 +02:00
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!found)
|
|
|
|
continue;
|
2013-04-30 01:22:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
p = strdup(e + 1);
|
2013-04-16 04:36:06 +02:00
|
|
|
if (!p)
|
|
|
|
return -ENOMEM;
|
2010-07-11 00:50:49 +02:00
|
|
|
|
|
|
|
*path = p;
|
2013-04-16 04:36:06 +02:00
|
|
|
return 0;
|
2010-07-11 00:50:49 +02:00
|
|
|
}
|
|
|
|
|
2015-09-03 19:44:02 +02:00
|
|
|
return -ENODATA;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int cg_install_release_agent(const char *controller, const char *agent) {
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_free_ char *fs = NULL, *contents = NULL;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
const char *sc;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
assert(agent);
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_unified(controller)) /* doesn't apply to unified hierarchy */
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
r = cg_get_path(controller, NULL, "release_agent", &fs);
|
|
|
|
if (r < 0)
|
2010-07-11 00:50:49 +02:00
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
r = read_one_line_file(fs, &contents);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
sc = strstrip(contents);
|
2015-09-01 18:02:43 +02:00
|
|
|
if (isempty(sc)) {
|
2015-07-07 01:19:25 +02:00
|
|
|
r = write_string_file(fs, agent, 0);
|
2013-04-02 20:31:42 +02:00
|
|
|
if (r < 0)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
2015-09-03 18:27:19 +02:00
|
|
|
} else if (!path_equal(sc, agent))
|
2013-04-16 04:36:06 +02:00
|
|
|
return -EEXIST;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2015-08-03 19:26:07 +02:00
|
|
|
fs = mfree(fs);
|
2013-04-16 04:36:06 +02:00
|
|
|
r = cg_get_path(controller, NULL, "notify_on_release", &fs);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2015-08-03 19:26:07 +02:00
|
|
|
contents = mfree(contents);
|
2013-04-16 04:36:06 +02:00
|
|
|
r = read_one_line_file(fs, &contents);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
sc = strstrip(contents);
|
|
|
|
if (streq(sc, "0")) {
|
2015-07-07 01:19:25 +02:00
|
|
|
r = write_string_file(fs, "1", 0);
|
2013-04-16 04:36:06 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2010-07-11 00:50:49 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
return 1;
|
|
|
|
}
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
if (!streq(sc, "1"))
|
|
|
|
return -EIO;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
return 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2013-07-04 20:31:18 +02:00
|
|
|
int cg_uninstall_release_agent(const char *controller) {
|
|
|
|
_cleanup_free_ char *fs = NULL;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_unified(controller)) /* Doesn't apply to unified hierarchy */
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return -EOPNOTSUPP;
|
2013-07-04 20:31:18 +02:00
|
|
|
|
2013-07-10 23:47:15 +02:00
|
|
|
r = cg_get_path(controller, NULL, "notify_on_release", &fs);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
2015-07-07 01:19:25 +02:00
|
|
|
r = write_string_file(fs, "0", 0);
|
2013-07-10 23:47:15 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
2015-08-03 19:26:07 +02:00
|
|
|
fs = mfree(fs);
|
2013-07-10 23:47:15 +02:00
|
|
|
|
2013-07-04 20:31:18 +02:00
|
|
|
r = cg_get_path(controller, NULL, "release_agent", &fs);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
2015-07-07 01:19:25 +02:00
|
|
|
r = write_string_file(fs, "", 0);
|
2013-07-04 20:31:18 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
2013-07-10 23:47:15 +02:00
|
|
|
return 0;
|
2013-07-04 20:31:18 +02:00
|
|
|
}
|
|
|
|
|
2015-09-01 18:32:07 +02:00
|
|
|
int cg_is_empty(const char *controller, const char *path) {
|
2013-04-16 04:36:06 +02:00
|
|
|
_cleanup_fclose_ FILE *f = NULL;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
pid_t pid;
|
2013-04-16 04:36:06 +02:00
|
|
|
int r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
assert(path);
|
|
|
|
|
2013-06-06 15:49:01 +02:00
|
|
|
r = cg_enumerate_processes(controller, path, &f);
|
2015-09-01 18:32:07 +02:00
|
|
|
if (r == -ENOENT)
|
|
|
|
return 1;
|
2012-04-16 17:35:58 +02:00
|
|
|
if (r < 0)
|
2015-09-01 18:32:07 +02:00
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2015-09-01 18:32:07 +02:00
|
|
|
r = cg_read_pid(f, &pid);
|
2010-07-11 00:50:49 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2015-09-01 18:32:07 +02:00
|
|
|
return r == 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2015-09-01 18:32:07 +02:00
|
|
|
int cg_is_empty_recursive(const char *controller, const char *path) {
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
|
|
|
assert(path);
|
|
|
|
|
2015-09-01 18:36:28 +02:00
|
|
|
/* The root cgroup is always populated */
|
|
|
|
if (controller && (isempty(path) || path_equal(path, "/")))
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return false;
|
2015-09-01 18:36:28 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_unified(controller)) {
|
2016-03-25 16:38:50 +01:00
|
|
|
_cleanup_free_ char *t = NULL;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
/* On the unified hierarchy we can check empty state
|
2016-03-25 16:38:50 +01:00
|
|
|
* via the "populated" attribute of "cgroup.events". */
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-03-25 16:38:50 +01:00
|
|
|
r = cg_read_event(controller, path, "populated", &t);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
return streq(t, "0");
|
|
|
|
} else {
|
|
|
|
_cleanup_closedir_ DIR *d = NULL;
|
|
|
|
char *fn;
|
2010-06-21 23:27:18 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
r = cg_is_empty(controller, path);
|
2010-07-12 18:16:44 +02:00
|
|
|
if (r <= 0)
|
2013-04-16 04:36:06 +02:00
|
|
|
return r;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
r = cg_enumerate_subgroups(controller, path, &d);
|
|
|
|
if (r == -ENOENT)
|
|
|
|
return 1;
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
while ((r = cg_read_subgroup(d, &fn)) > 0) {
|
|
|
|
_cleanup_free_ char *p = NULL;
|
|
|
|
|
2016-10-23 17:43:27 +02:00
|
|
|
p = strjoin(path, "/", fn);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
free(fn);
|
|
|
|
if (!p)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
r = cg_is_empty_recursive(controller, p);
|
|
|
|
if (r <= 0)
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2010-07-12 18:16:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int cg_split_spec(const char *spec, char **controller, char **path) {
|
|
|
|
char *t = NULL, *u = NULL;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
const char *e;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
|
|
|
assert(spec);
|
|
|
|
|
|
|
|
if (*spec == '/') {
|
2013-01-19 00:59:19 +01:00
|
|
|
if (!path_is_safe(spec))
|
|
|
|
return -EINVAL;
|
2010-07-12 18:16:44 +02:00
|
|
|
|
|
|
|
if (path) {
|
2013-01-12 04:24:12 +01:00
|
|
|
t = strdup(spec);
|
|
|
|
if (!t)
|
2010-07-12 18:16:44 +02:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2014-04-16 04:16:40 +02:00
|
|
|
*path = path_kill_slashes(t);
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
if (controller)
|
|
|
|
*controller = NULL;
|
|
|
|
|
|
|
|
return 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
2013-01-12 04:24:12 +01:00
|
|
|
e = strchr(spec, ':');
|
|
|
|
if (!e) {
|
2015-06-01 13:46:52 +02:00
|
|
|
if (!cg_controller_is_valid(spec))
|
2010-07-12 18:16:44 +02:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (controller) {
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
t = strdup(spec);
|
2013-01-12 04:24:12 +01:00
|
|
|
if (!t)
|
2010-07-12 18:16:44 +02:00
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
*controller = t;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (path)
|
|
|
|
*path = NULL;
|
|
|
|
|
|
|
|
return 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
t = strndup(spec, e-spec);
|
2013-01-19 00:59:19 +01:00
|
|
|
if (!t)
|
|
|
|
return -ENOMEM;
|
2015-06-01 13:46:52 +02:00
|
|
|
if (!cg_controller_is_valid(t)) {
|
2013-01-19 00:59:19 +01:00
|
|
|
free(t);
|
2010-07-12 18:16:44 +02:00
|
|
|
return -EINVAL;
|
2013-01-12 04:24:12 +01:00
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (isempty(e+1))
|
|
|
|
u = NULL;
|
|
|
|
else {
|
2013-09-26 19:57:58 +02:00
|
|
|
u = strdup(e+1);
|
|
|
|
if (!u) {
|
|
|
|
free(t);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2013-09-26 19:57:58 +02:00
|
|
|
if (!path_is_safe(u) ||
|
|
|
|
!path_is_absolute(u)) {
|
|
|
|
free(t);
|
|
|
|
free(u);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
path_kill_slashes(u);
|
|
|
|
}
|
2013-04-30 00:15:30 +02:00
|
|
|
|
2010-07-12 18:16:44 +02:00
|
|
|
if (controller)
|
|
|
|
*controller = t;
|
2013-01-19 00:59:19 +01:00
|
|
|
else
|
|
|
|
free(t);
|
2010-07-12 18:16:44 +02:00
|
|
|
|
|
|
|
if (path)
|
|
|
|
*path = u;
|
2013-01-19 00:59:19 +01:00
|
|
|
else
|
|
|
|
free(u);
|
2010-07-12 18:16:44 +02:00
|
|
|
|
|
|
|
return 0;
|
2010-06-21 23:27:18 +02:00
|
|
|
}
|
2010-07-11 00:50:49 +02:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
int cg_mangle_path(const char *path, char **result) {
|
2013-04-25 00:01:29 +02:00
|
|
|
_cleanup_free_ char *c = NULL, *p = NULL;
|
|
|
|
char *t;
|
2010-07-12 18:16:44 +02:00
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(path);
|
|
|
|
assert(result);
|
|
|
|
|
2014-02-17 03:37:13 +01:00
|
|
|
/* First, check if it already is a filesystem path */
|
2013-04-16 04:36:06 +02:00
|
|
|
if (path_startswith(path, "/sys/fs/cgroup")) {
|
2010-07-12 18:16:44 +02:00
|
|
|
|
2012-04-16 18:56:18 +02:00
|
|
|
t = strdup(path);
|
|
|
|
if (!t)
|
2010-07-12 18:16:44 +02:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2014-04-16 04:16:40 +02:00
|
|
|
*result = path_kill_slashes(t);
|
2010-07-12 18:16:44 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-02-17 03:37:13 +01:00
|
|
|
/* Otherwise, treat it as cg spec */
|
2012-04-16 18:56:18 +02:00
|
|
|
r = cg_split_spec(path, &c, &p);
|
|
|
|
if (r < 0)
|
2010-07-12 18:16:44 +02:00
|
|
|
return r;
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
|
2010-07-12 18:16:44 +02:00
|
|
|
}
|
2011-03-14 23:13:57 +01:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
int cg_get_root_path(char **path) {
|
2013-06-20 03:45:08 +02:00
|
|
|
char *p, *e;
|
2013-04-16 04:36:06 +02:00
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(path);
|
|
|
|
|
2013-06-20 03:45:08 +02:00
|
|
|
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
|
2013-04-16 04:36:06 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
e = endswith(p, "/" SPECIAL_INIT_SCOPE);
|
|
|
|
if (!e)
|
|
|
|
e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
|
|
|
|
if (!e)
|
|
|
|
e = endswith(p, "/system"); /* even more legacy */
|
2013-06-20 03:45:08 +02:00
|
|
|
if (e)
|
2013-04-16 04:36:06 +02:00
|
|
|
*e = 0;
|
|
|
|
|
2011-03-14 23:13:57 +01:00
|
|
|
*path = p;
|
|
|
|
return 0;
|
|
|
|
}
|
2012-04-16 19:14:11 +02:00
|
|
|
|
2013-12-24 19:31:44 +01:00
|
|
|
int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
|
|
|
|
_cleanup_free_ char *rt = NULL;
|
|
|
|
char *p;
|
2012-05-30 22:25:01 +02:00
|
|
|
int r;
|
|
|
|
|
2013-12-11 23:31:07 +01:00
|
|
|
assert(cgroup);
|
2013-12-24 19:31:44 +01:00
|
|
|
assert(shifted);
|
2013-12-11 23:31:07 +01:00
|
|
|
|
|
|
|
if (!root) {
|
|
|
|
/* If the root was specified let's use that, otherwise
|
|
|
|
* let's determine it from PID 1 */
|
|
|
|
|
2013-12-24 19:31:44 +01:00
|
|
|
r = cg_get_root_path(&rt);
|
2013-12-11 23:31:07 +01:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
2013-12-24 19:31:44 +01:00
|
|
|
root = rt;
|
2013-12-11 23:31:07 +01:00
|
|
|
}
|
2012-05-30 22:25:01 +02:00
|
|
|
|
2013-12-24 19:31:44 +01:00
|
|
|
p = path_startswith(cgroup, root);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (p && p > cgroup)
|
2013-12-24 19:31:44 +01:00
|
|
|
*shifted = p - 1;
|
|
|
|
else
|
|
|
|
*shifted = cgroup;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
|
|
|
|
_cleanup_free_ char *raw = NULL;
|
|
|
|
const char *c;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(pid >= 0);
|
|
|
|
assert(cgroup);
|
|
|
|
|
|
|
|
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
|
2013-04-16 04:36:06 +02:00
|
|
|
if (r < 0)
|
2012-05-30 22:25:01 +02:00
|
|
|
return r;
|
|
|
|
|
2013-12-24 19:31:44 +01:00
|
|
|
r = cg_shift_path(raw, root, &c);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2012-05-30 22:25:01 +02:00
|
|
|
|
2013-12-24 19:31:44 +01:00
|
|
|
if (c == raw) {
|
|
|
|
*cgroup = raw;
|
|
|
|
raw = NULL;
|
|
|
|
} else {
|
|
|
|
char *n;
|
2012-05-30 22:25:01 +02:00
|
|
|
|
2013-12-24 19:31:44 +01:00
|
|
|
n = strdup(c);
|
|
|
|
if (!n)
|
2012-05-30 22:25:01 +02:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2013-12-24 19:31:44 +01:00
|
|
|
*cgroup = n;
|
|
|
|
}
|
2012-05-30 22:25:01 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-02-23 18:52:52 +01:00
|
|
|
int cg_path_decode_unit(const char *cgroup, char **unit) {
|
2015-04-30 00:47:41 +02:00
|
|
|
char *c, *s;
|
|
|
|
size_t n;
|
2013-01-17 18:55:05 +01:00
|
|
|
|
|
|
|
assert(cgroup);
|
2013-04-15 14:05:00 +02:00
|
|
|
assert(unit);
|
2013-01-17 18:55:05 +01:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
n = strcspn(cgroup, "/");
|
|
|
|
if (n < 3)
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
c = strndupa(cgroup, n);
|
2013-04-23 04:10:13 +02:00
|
|
|
c = cg_unescape(c);
|
2013-01-17 18:55:05 +01:00
|
|
|
|
2015-04-30 20:21:00 +02:00
|
|
|
if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
|
2015-04-29 21:40:54 +02:00
|
|
|
return -ENXIO;
|
2013-01-17 18:55:05 +01:00
|
|
|
|
2013-07-11 18:47:20 +02:00
|
|
|
s = strdup(c);
|
2013-04-15 14:05:00 +02:00
|
|
|
if (!s)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
*unit = s;
|
2013-01-17 18:55:05 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
static bool valid_slice_name(const char *p, size_t n) {
|
|
|
|
|
|
|
|
if (!p)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (n < strlen("x.slice"))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (memcmp(p + n - 6, ".slice", 6) == 0) {
|
|
|
|
char buf[n+1], *c;
|
|
|
|
|
|
|
|
memcpy(buf, p, n);
|
|
|
|
buf[n] = 0;
|
|
|
|
|
|
|
|
c = cg_unescape(buf);
|
|
|
|
|
2015-04-30 20:21:00 +02:00
|
|
|
return unit_name_is_valid(c, UNIT_NAME_PLAIN);
|
2015-04-30 00:47:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-06-20 03:45:08 +02:00
|
|
|
static const char *skip_slices(const char *p) {
|
2015-04-30 00:47:41 +02:00
|
|
|
assert(p);
|
|
|
|
|
2013-06-20 03:45:08 +02:00
|
|
|
/* Skips over all slice assignments */
|
|
|
|
|
|
|
|
for (;;) {
|
2013-06-21 01:46:27 +02:00
|
|
|
size_t n;
|
|
|
|
|
2013-06-20 03:45:08 +02:00
|
|
|
p += strspn(p, "/");
|
|
|
|
|
|
|
|
n = strcspn(p, "/");
|
2015-04-30 00:47:41 +02:00
|
|
|
if (!valid_slice_name(p, n))
|
2013-06-20 03:45:08 +02:00
|
|
|
return p;
|
|
|
|
|
|
|
|
p += n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
int cg_path_get_unit(const char *path, char **ret) {
|
2013-04-15 14:05:00 +02:00
|
|
|
const char *e;
|
2015-04-30 00:47:41 +02:00
|
|
|
char *unit;
|
|
|
|
int r;
|
2013-04-15 14:05:00 +02:00
|
|
|
|
|
|
|
assert(path);
|
2015-04-30 00:47:41 +02:00
|
|
|
assert(ret);
|
2013-04-15 14:05:00 +02:00
|
|
|
|
2013-06-20 03:45:08 +02:00
|
|
|
e = skip_slices(path);
|
2013-04-15 14:05:00 +02:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
r = cg_path_decode_unit(e, &unit);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
/* We skipped over the slices, don't accept any now */
|
|
|
|
if (endswith(unit, ".slice")) {
|
|
|
|
free(unit);
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
*ret = unit;
|
|
|
|
return 0;
|
2013-04-15 14:05:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int cg_pid_get_unit(pid_t pid, char **unit) {
|
2013-04-18 09:11:22 +02:00
|
|
|
_cleanup_free_ char *cgroup = NULL;
|
2012-05-30 22:25:01 +02:00
|
|
|
int r;
|
|
|
|
|
2013-01-17 18:55:05 +01:00
|
|
|
assert(unit);
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
|
2013-01-17 18:55:05 +01:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
2013-04-15 14:05:00 +02:00
|
|
|
return cg_path_get_unit(cgroup, unit);
|
|
|
|
}
|
2013-01-17 18:55:05 +01:00
|
|
|
|
2013-12-23 00:28:03 +01:00
|
|
|
/**
|
|
|
|
* Skip session-*.scope, but require it to be there.
|
|
|
|
*/
|
2013-06-20 03:45:08 +02:00
|
|
|
static const char *skip_session(const char *p) {
|
|
|
|
size_t n;
|
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
if (isempty(p))
|
|
|
|
return NULL;
|
2013-06-20 03:45:08 +02:00
|
|
|
|
|
|
|
p += strspn(p, "/");
|
|
|
|
|
|
|
|
n = strcspn(p, "/");
|
2015-04-30 00:47:41 +02:00
|
|
|
if (n < strlen("session-x.scope"))
|
2013-12-23 00:28:03 +01:00
|
|
|
return NULL;
|
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
|
|
|
|
char buf[n - 8 - 6 + 1];
|
|
|
|
|
|
|
|
memcpy(buf, p + 8, n - 8 - 6);
|
|
|
|
buf[n - 8 - 6] = 0;
|
2013-12-23 00:28:03 +01:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
/* Note that session scopes never need unescaping,
|
|
|
|
* since they cannot conflict with the kernel's own
|
|
|
|
* names, hence we don't need to call cg_unescape()
|
|
|
|
* here. */
|
|
|
|
|
|
|
|
if (!session_id_valid(buf))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
p += n;
|
|
|
|
p += strspn(p, "/");
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
2013-12-23 00:28:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Skip user@*.service, but require it to be there.
|
|
|
|
*/
|
|
|
|
static const char *skip_user_manager(const char *p) {
|
|
|
|
size_t n;
|
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
if (isempty(p))
|
|
|
|
return NULL;
|
2013-12-23 00:28:03 +01:00
|
|
|
|
|
|
|
p += strspn(p, "/");
|
|
|
|
|
|
|
|
n = strcspn(p, "/");
|
2015-04-30 00:47:41 +02:00
|
|
|
if (n < strlen("user@x.service"))
|
2013-04-15 14:05:00 +02:00
|
|
|
return NULL;
|
2013-01-17 18:55:05 +01:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
|
|
|
|
char buf[n - 5 - 8 + 1];
|
2013-06-20 03:45:08 +02:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
memcpy(buf, p + 5, n - 5 - 8);
|
|
|
|
buf[n - 5 - 8] = 0;
|
|
|
|
|
|
|
|
/* Note that user manager services never need unescaping,
|
|
|
|
* since they cannot conflict with the kernel's own
|
|
|
|
* names, hence we don't need to call cg_unescape()
|
|
|
|
* here. */
|
|
|
|
|
|
|
|
if (parse_uid(buf, NULL) < 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
p += n;
|
|
|
|
p += strspn(p, "/");
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
2013-06-20 03:45:08 +02:00
|
|
|
}
|
|
|
|
|
2015-04-30 11:58:06 +02:00
|
|
|
static const char *skip_user_prefix(const char *path) {
|
2013-12-23 00:28:03 +01:00
|
|
|
const char *e, *t;
|
2013-01-17 18:55:05 +01:00
|
|
|
|
2013-04-15 14:05:00 +02:00
|
|
|
assert(path);
|
2012-05-30 22:25:01 +02:00
|
|
|
|
2013-06-20 03:45:08 +02:00
|
|
|
/* Skip slices, if there are any */
|
|
|
|
e = skip_slices(path);
|
2012-05-30 22:25:01 +02:00
|
|
|
|
2015-04-30 11:58:06 +02:00
|
|
|
/* Skip the user manager, if it's in the path now... */
|
2015-04-30 00:47:41 +02:00
|
|
|
t = skip_user_manager(e);
|
2015-04-30 11:58:06 +02:00
|
|
|
if (t)
|
|
|
|
return t;
|
2015-04-30 00:47:41 +02:00
|
|
|
|
2015-04-30 11:58:06 +02:00
|
|
|
/* Alternatively skip the user session if it is in the path... */
|
|
|
|
return skip_session(e);
|
|
|
|
}
|
2015-02-04 02:07:37 +01:00
|
|
|
|
2015-04-30 11:58:06 +02:00
|
|
|
int cg_path_get_user_unit(const char *path, char **ret) {
|
|
|
|
const char *t;
|
2013-04-15 14:05:00 +02:00
|
|
|
|
2015-04-30 11:58:06 +02:00
|
|
|
assert(path);
|
|
|
|
assert(ret);
|
2015-04-30 00:47:41 +02:00
|
|
|
|
2015-04-30 11:58:06 +02:00
|
|
|
t = skip_user_prefix(path);
|
|
|
|
if (!t)
|
2015-04-30 00:47:41 +02:00
|
|
|
return -ENXIO;
|
|
|
|
|
2015-04-30 11:58:06 +02:00
|
|
|
/* And from here on it looks pretty much the same as for a
|
|
|
|
* system unit, hence let's use the same parser from here
|
|
|
|
* on. */
|
|
|
|
return cg_path_get_unit(t, ret);
|
2013-01-17 18:55:05 +01:00
|
|
|
}
|
2012-05-30 22:25:01 +02:00
|
|
|
|
2013-01-17 18:55:05 +01:00
|
|
|
int cg_pid_get_user_unit(pid_t pid, char **unit) {
|
2013-04-18 09:11:22 +02:00
|
|
|
_cleanup_free_ char *cgroup = NULL;
|
2013-04-15 14:05:00 +02:00
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(unit);
|
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
|
2013-04-15 14:05:00 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
return cg_path_get_user_unit(cgroup, unit);
|
2012-05-30 22:25:01 +02:00
|
|
|
}
|
2013-01-19 00:59:19 +01:00
|
|
|
|
2013-04-16 04:36:06 +02:00
|
|
|
int cg_path_get_machine_name(const char *path, char **machine) {
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
_cleanup_free_ char *u = NULL;
|
|
|
|
const char *sl;
|
2014-02-11 17:15:38 +01:00
|
|
|
int r;
|
2013-07-02 02:32:24 +02:00
|
|
|
|
2014-02-11 17:15:38 +01:00
|
|
|
r = cg_path_get_unit(path, &u);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2013-04-16 04:36:06 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
sl = strjoina("/run/systemd/machines/unit:", u);
|
2014-02-11 17:15:38 +01:00
|
|
|
return readlink_malloc(sl, machine);
|
2013-04-16 04:36:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int cg_pid_get_machine_name(pid_t pid, char **machine) {
|
2013-04-18 09:11:22 +02:00
|
|
|
_cleanup_free_ char *cgroup = NULL;
|
2013-04-16 04:36:06 +02:00
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(machine);
|
|
|
|
|
|
|
|
r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
return cg_path_get_machine_name(cgroup, machine);
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_path_get_session(const char *path, char **session) {
|
2015-04-30 00:47:41 +02:00
|
|
|
_cleanup_free_ char *unit = NULL;
|
|
|
|
char *start, *end;
|
|
|
|
int r;
|
2013-04-16 04:36:06 +02:00
|
|
|
|
|
|
|
assert(path);
|
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
r = cg_path_get_unit(path, &unit);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2013-04-16 04:36:06 +02:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
start = startswith(unit, "session-");
|
|
|
|
if (!start)
|
2015-04-29 21:40:54 +02:00
|
|
|
return -ENXIO;
|
2015-04-30 00:47:41 +02:00
|
|
|
end = endswith(start, ".scope");
|
|
|
|
if (!end)
|
2015-04-29 21:40:54 +02:00
|
|
|
return -ENXIO;
|
2015-04-30 00:47:41 +02:00
|
|
|
|
|
|
|
*end = 0;
|
|
|
|
if (!session_id_valid(start))
|
2015-04-29 21:40:54 +02:00
|
|
|
return -ENXIO;
|
2013-07-02 02:32:24 +02:00
|
|
|
|
2014-01-27 21:34:54 +01:00
|
|
|
if (session) {
|
2015-04-30 00:47:41 +02:00
|
|
|
char *rr;
|
2014-01-27 21:34:54 +01:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
rr = strdup(start);
|
|
|
|
if (!rr)
|
2014-01-27 21:34:54 +01:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
*session = rr;
|
2014-01-27 21:34:54 +01:00
|
|
|
}
|
2013-04-16 04:36:06 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_pid_get_session(pid_t pid, char **session) {
|
2013-04-18 09:11:22 +02:00
|
|
|
_cleanup_free_ char *cgroup = NULL;
|
2013-04-16 04:36:06 +02:00
|
|
|
int r;
|
|
|
|
|
|
|
|
r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
return cg_path_get_session(cgroup, session);
|
|
|
|
}
|
|
|
|
|
2013-04-23 04:10:13 +02:00
|
|
|
int cg_path_get_owner_uid(const char *path, uid_t *uid) {
|
2013-07-02 02:32:24 +02:00
|
|
|
_cleanup_free_ char *slice = NULL;
|
2015-04-30 00:47:41 +02:00
|
|
|
char *start, *end;
|
2013-07-02 02:32:24 +02:00
|
|
|
int r;
|
2013-04-23 04:10:13 +02:00
|
|
|
|
|
|
|
assert(path);
|
|
|
|
|
2013-07-02 02:32:24 +02:00
|
|
|
r = cg_path_get_slice(path, &slice);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2013-04-23 04:10:13 +02:00
|
|
|
|
2013-12-03 01:13:03 +01:00
|
|
|
start = startswith(slice, "user-");
|
|
|
|
if (!start)
|
2015-04-29 21:40:54 +02:00
|
|
|
return -ENXIO;
|
2015-04-30 00:47:41 +02:00
|
|
|
end = endswith(start, ".slice");
|
2013-12-03 01:13:03 +01:00
|
|
|
if (!end)
|
2015-04-29 21:40:54 +02:00
|
|
|
return -ENXIO;
|
2013-04-23 04:10:13 +02:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
*end = 0;
|
|
|
|
if (parse_uid(start, uid) < 0)
|
2015-04-29 21:40:54 +02:00
|
|
|
return -ENXIO;
|
2013-12-03 01:13:03 +01:00
|
|
|
|
|
|
|
return 0;
|
2013-04-23 04:10:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
|
|
|
|
_cleanup_free_ char *cgroup = NULL;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
return cg_path_get_owner_uid(cgroup, uid);
|
|
|
|
}
|
|
|
|
|
2013-06-21 01:46:27 +02:00
|
|
|
int cg_path_get_slice(const char *p, char **slice) {
|
|
|
|
const char *e = NULL;
|
|
|
|
|
|
|
|
assert(p);
|
|
|
|
assert(slice);
|
|
|
|
|
2015-04-30 11:58:06 +02:00
|
|
|
/* Finds the right-most slice unit from the beginning, but
|
|
|
|
* stops before we come to the first non-slice unit. */
|
|
|
|
|
2013-06-21 01:46:27 +02:00
|
|
|
for (;;) {
|
|
|
|
size_t n;
|
|
|
|
|
|
|
|
p += strspn(p, "/");
|
|
|
|
|
|
|
|
n = strcspn(p, "/");
|
2015-04-30 00:47:41 +02:00
|
|
|
if (!valid_slice_name(p, n)) {
|
2013-06-21 01:46:27 +02:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
if (!e) {
|
|
|
|
char *s;
|
2013-06-21 01:46:27 +02:00
|
|
|
|
2016-08-30 21:49:26 +02:00
|
|
|
s = strdup(SPECIAL_ROOT_SLICE);
|
2015-04-30 00:47:41 +02:00
|
|
|
if (!s)
|
|
|
|
return -ENOMEM;
|
2013-06-21 01:46:27 +02:00
|
|
|
|
2015-04-30 00:47:41 +02:00
|
|
|
*slice = s;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return cg_path_decode_unit(e, slice);
|
2013-06-21 01:46:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
e = p;
|
|
|
|
p += n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_pid_get_slice(pid_t pid, char **slice) {
|
|
|
|
_cleanup_free_ char *cgroup = NULL;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(slice);
|
|
|
|
|
|
|
|
r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
return cg_path_get_slice(cgroup, slice);
|
|
|
|
}
|
|
|
|
|
2015-04-30 11:58:06 +02:00
|
|
|
int cg_path_get_user_slice(const char *p, char **slice) {
|
|
|
|
const char *t;
|
|
|
|
assert(p);
|
|
|
|
assert(slice);
|
|
|
|
|
|
|
|
t = skip_user_prefix(p);
|
|
|
|
if (!t)
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
/* And now it looks pretty much the same as for a system
|
|
|
|
* slice, so let's just use the same parser from here on. */
|
|
|
|
return cg_path_get_slice(t, slice);
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_pid_get_user_slice(pid_t pid, char **slice) {
|
|
|
|
_cleanup_free_ char *cgroup = NULL;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(slice);
|
|
|
|
|
|
|
|
r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
return cg_path_get_user_slice(cgroup, slice);
|
|
|
|
}
|
|
|
|
|
2013-04-23 04:10:13 +02:00
|
|
|
char *cg_escape(const char *p) {
|
|
|
|
bool need_prefix = false;
|
|
|
|
|
|
|
|
/* This implements very minimal escaping for names to be used
|
|
|
|
* as file names in the cgroup tree: any name which might
|
|
|
|
* conflict with a kernel name or is prefixed with '_' is
|
|
|
|
* prefixed with a '_'. That way, when reading cgroup names it
|
|
|
|
* is sufficient to remove a single prefixing underscore if
|
|
|
|
* there is one. */
|
|
|
|
|
|
|
|
/* The return value of this function (unlike cg_unescape())
|
|
|
|
* needs free()! */
|
|
|
|
|
2013-05-03 19:02:24 +02:00
|
|
|
if (p[0] == 0 ||
|
|
|
|
p[0] == '_' ||
|
|
|
|
p[0] == '.' ||
|
|
|
|
streq(p, "notify_on_release") ||
|
|
|
|
streq(p, "release_agent") ||
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
streq(p, "tasks") ||
|
|
|
|
startswith(p, "cgroup."))
|
2013-04-23 04:10:13 +02:00
|
|
|
need_prefix = true;
|
|
|
|
else {
|
|
|
|
const char *dot;
|
|
|
|
|
|
|
|
dot = strrchr(p, '.');
|
|
|
|
if (dot) {
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
CGroupController c;
|
|
|
|
size_t l = dot - p;
|
2013-04-23 04:10:13 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
|
|
const char *n;
|
|
|
|
|
|
|
|
n = cgroup_controller_to_string(c);
|
2013-04-23 04:10:13 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (l != strlen(n))
|
|
|
|
continue;
|
2013-04-23 04:10:13 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (memcmp(p, n, l) != 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
need_prefix = true;
|
|
|
|
break;
|
2013-04-23 04:10:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (need_prefix)
|
|
|
|
return strappend("_", p);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
return strdup(p);
|
2013-04-23 04:10:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
char *cg_unescape(const char *p) {
|
|
|
|
assert(p);
|
|
|
|
|
|
|
|
/* The return value of this function (unlike cg_escape())
|
|
|
|
* doesn't need free()! */
|
|
|
|
|
|
|
|
if (p[0] == '_')
|
|
|
|
return (char*) p+1;
|
|
|
|
|
|
|
|
return (char*) p;
|
|
|
|
}
|
2013-04-25 00:01:29 +02:00
|
|
|
|
|
|
|
#define CONTROLLER_VALID \
|
2013-09-16 04:26:56 +02:00
|
|
|
DIGITS LETTERS \
|
2013-04-25 00:01:29 +02:00
|
|
|
"_"
|
|
|
|
|
2015-06-01 13:46:52 +02:00
|
|
|
bool cg_controller_is_valid(const char *p) {
|
2013-04-25 00:01:29 +02:00
|
|
|
const char *t, *s;
|
|
|
|
|
|
|
|
if (!p)
|
|
|
|
return false;
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
|
|
|
|
return true;
|
|
|
|
|
2015-06-01 13:46:52 +02:00
|
|
|
s = startswith(p, "name=");
|
|
|
|
if (s)
|
|
|
|
p = s;
|
2013-04-25 00:01:29 +02:00
|
|
|
|
|
|
|
if (*p == 0 || *p == '_')
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (t = p; *t; t++)
|
|
|
|
if (!strchr(CONTROLLER_VALID, *t))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (t - p > FILENAME_MAX)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2013-06-17 21:33:26 +02:00
|
|
|
|
|
|
|
int cg_slice_to_path(const char *unit, char **ret) {
|
|
|
|
_cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
|
|
|
|
const char *dash;
|
2015-04-30 20:21:00 +02:00
|
|
|
int r;
|
2013-06-17 21:33:26 +02:00
|
|
|
|
|
|
|
assert(unit);
|
|
|
|
assert(ret);
|
|
|
|
|
2016-08-30 21:49:26 +02:00
|
|
|
if (streq(unit, SPECIAL_ROOT_SLICE)) {
|
2015-04-30 12:33:35 +02:00
|
|
|
char *x;
|
|
|
|
|
|
|
|
x = strdup("");
|
|
|
|
if (!x)
|
|
|
|
return -ENOMEM;
|
|
|
|
*ret = x;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-04-30 20:21:00 +02:00
|
|
|
if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
|
2013-06-17 21:33:26 +02:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!endswith(unit, ".slice"))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2015-04-30 20:21:00 +02:00
|
|
|
r = unit_name_to_prefix(unit, &p);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2013-06-17 21:33:26 +02:00
|
|
|
|
|
|
|
dash = strchr(p, '-');
|
2015-04-30 19:42:48 +02:00
|
|
|
|
|
|
|
/* Don't allow initial dashes */
|
|
|
|
if (dash == p)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2013-06-17 21:33:26 +02:00
|
|
|
while (dash) {
|
|
|
|
_cleanup_free_ char *escaped = NULL;
|
|
|
|
char n[dash - p + sizeof(".slice")];
|
|
|
|
|
2015-04-30 19:42:48 +02:00
|
|
|
/* Don't allow trailing or double dashes */
|
|
|
|
if (dash[1] == 0 || dash[1] == '-')
|
2015-04-30 12:33:35 +02:00
|
|
|
return -EINVAL;
|
2013-06-17 21:33:26 +02:00
|
|
|
|
2015-04-30 12:33:35 +02:00
|
|
|
strcpy(stpncpy(n, p, dash - p), ".slice");
|
2015-04-30 20:21:00 +02:00
|
|
|
if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
|
2013-06-17 21:33:26 +02:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
escaped = cg_escape(n);
|
|
|
|
if (!escaped)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
if (!strextend(&s, escaped, "/", NULL))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
dash = strchr(dash+1, '-');
|
|
|
|
}
|
|
|
|
|
|
|
|
e = cg_escape(unit);
|
|
|
|
if (!e)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
if (!strextend(&s, e, NULL))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
*ret = s;
|
|
|
|
s = NULL;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2013-06-27 04:14:27 +02:00
|
|
|
|
|
|
|
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
|
|
|
|
_cleanup_free_ char *p = NULL;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = cg_get_path(controller, path, attribute, &p);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
2015-07-07 01:19:25 +02:00
|
|
|
return write_string_file(p, value, 0);
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
2015-01-23 02:58:02 +01:00
|
|
|
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
|
|
|
|
_cleanup_free_ char *p = NULL;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = cg_get_path(controller, path, attribute, &p);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
return read_one_line_file(p, ret);
|
|
|
|
}
|
|
|
|
|
2016-08-07 15:45:39 +02:00
|
|
|
int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
|
|
|
|
_cleanup_free_ char *filename = NULL, *content = NULL;
|
|
|
|
char *line, *p;
|
|
|
|
int i, r;
|
|
|
|
|
|
|
|
for (i = 0; keys[i]; i++)
|
|
|
|
values[i] = NULL;
|
|
|
|
|
|
|
|
r = cg_get_path(controller, path, attribute, &filename);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
r = read_full_file(filename, &content, NULL);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
p = content;
|
|
|
|
while ((line = strsep(&p, "\n"))) {
|
|
|
|
char *key;
|
|
|
|
|
|
|
|
key = strsep(&line, " ");
|
|
|
|
|
|
|
|
for (i = 0; keys[i]; i++) {
|
|
|
|
if (streq(key, keys[i])) {
|
|
|
|
values[i] = strdup(line);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; keys[i]; i++) {
|
|
|
|
if (!values[i]) {
|
|
|
|
for (i = 0; keys[i]; i++) {
|
|
|
|
free(values[i]);
|
|
|
|
values[i] = NULL;
|
|
|
|
}
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
|
|
|
|
CGroupController c;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
2013-06-27 04:14:27 +02:00
|
|
|
|
|
|
|
/* This one will create a cgroup in our private tree, but also
|
|
|
|
* duplicate it in the trees specified in mask, and remove it
|
|
|
|
* in all others */
|
|
|
|
|
|
|
|
/* First create the cgroup in our own hierarchy. */
|
|
|
|
r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
/* If we are in the unified hierarchy, we are done now */
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_all_unified())
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Otherwise, do the same in the other hierarchies */
|
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
|
|
const char *n;
|
|
|
|
|
|
|
|
n = cgroup_controller_to_string(c);
|
|
|
|
|
2013-09-24 04:56:05 +02:00
|
|
|
if (mask & bit)
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
(void) cg_create(n, path);
|
2013-09-24 04:56:05 +02:00
|
|
|
else if (supported & bit)
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
(void) cg_trim(n, path, true);
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
2013-09-24 04:56:05 +02:00
|
|
|
return 0;
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
|
|
|
|
CGroupController c;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
2013-06-27 04:14:27 +02:00
|
|
|
|
|
|
|
r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
|
2013-09-24 04:56:05 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2013-06-27 04:14:27 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_all_unified())
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return 0;
|
2014-12-10 22:06:44 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
|
|
const char *p = NULL;
|
2014-12-10 22:06:44 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!(supported & bit))
|
|
|
|
continue;
|
2014-12-10 22:06:44 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (path_callback)
|
|
|
|
p = path_callback(bit, userdata);
|
2014-12-10 22:06:44 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!p)
|
|
|
|
p = path;
|
2013-06-27 04:14:27 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
(void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
2013-09-24 04:56:05 +02:00
|
|
|
return 0;
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
|
2013-07-01 00:03:57 +02:00
|
|
|
Iterator i;
|
|
|
|
void *pidp;
|
|
|
|
int r = 0;
|
|
|
|
|
|
|
|
SET_FOREACH(pidp, pids, i) {
|
2015-09-03 13:22:51 +02:00
|
|
|
pid_t pid = PTR_TO_PID(pidp);
|
2013-09-24 04:56:05 +02:00
|
|
|
int q;
|
2013-07-01 00:03:57 +02:00
|
|
|
|
2014-12-10 22:06:44 +01:00
|
|
|
q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (q < 0 && r >= 0)
|
2013-09-24 04:56:05 +02:00
|
|
|
r = q;
|
2013-07-01 00:03:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
|
2015-09-02 20:46:22 +02:00
|
|
|
CGroupController c;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r = 0;
|
2013-06-27 04:14:27 +02:00
|
|
|
|
2013-09-24 04:56:05 +02:00
|
|
|
if (!path_equal(from, to)) {
|
2016-07-20 11:16:05 +02:00
|
|
|
r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
|
2013-09-24 04:56:05 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
}
|
2013-06-27 04:14:27 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_all_unified())
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return r;
|
2014-02-17 02:06:32 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
|
|
const char *p = NULL;
|
2014-02-17 02:06:32 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!(supported & bit))
|
|
|
|
continue;
|
2014-02-17 02:06:32 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (to_callback)
|
|
|
|
p = to_callback(bit, userdata);
|
2013-06-27 04:14:27 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!p)
|
|
|
|
p = to;
|
|
|
|
|
2016-07-20 11:16:05 +02:00
|
|
|
(void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
2013-09-24 04:56:05 +02:00
|
|
|
return 0;
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
|
|
|
|
CGroupController c;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
2013-06-27 04:14:27 +02:00
|
|
|
|
|
|
|
r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_all_unified())
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return r;
|
|
|
|
|
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
|
|
|
|
|
|
if (!(supported & bit))
|
|
|
|
continue;
|
2013-06-27 04:14:27 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
(void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
2013-09-24 04:56:05 +02:00
|
|
|
return 0;
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
int cg_mask_supported(CGroupMask *ret) {
|
|
|
|
CGroupMask mask = 0;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
/* Determines the mask of supported cgroup controllers. Only
|
|
|
|
* includes controllers we can make sense of and that are
|
|
|
|
* actually accessible. */
|
2013-06-27 04:14:27 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_all_unified()) {
|
2015-09-03 19:50:37 +02:00
|
|
|
_cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
const char *c;
|
|
|
|
|
|
|
|
/* In the unified hierarchy we can read the supported
|
|
|
|
* and accessible controllers from a the top-level
|
|
|
|
* cgroup attribute */
|
|
|
|
|
2015-09-03 19:50:37 +02:00
|
|
|
r = cg_get_root_path(&root);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
r = read_one_line_file(path, &controllers);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
2013-06-27 04:14:27 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
c = controllers;
|
|
|
|
for (;;) {
|
|
|
|
_cleanup_free_ char *n = NULL;
|
|
|
|
CGroupController v;
|
|
|
|
|
|
|
|
r = extract_first_word(&c, &n, NULL, 0);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
if (r == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
v = cgroup_controller_from_string(n);
|
|
|
|
if (v < 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
mask |= CGROUP_CONTROLLER_TO_MASK(v);
|
|
|
|
}
|
|
|
|
|
2016-08-07 15:45:39 +02:00
|
|
|
/* Currently, we support the cpu, memory, io and pids
|
2015-09-10 12:32:16 +02:00
|
|
|
* controller in the unified hierarchy, mask
|
|
|
|
* everything else off. */
|
2016-08-07 15:45:39 +02:00
|
|
|
mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
} else {
|
|
|
|
CGroupController c;
|
|
|
|
|
|
|
|
/* In the legacy hierarchy, we check whether which
|
|
|
|
* hierarchies are mounted. */
|
|
|
|
|
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
|
|
const char *n;
|
|
|
|
|
|
|
|
n = cgroup_controller_to_string(c);
|
|
|
|
if (controller_is_accessible(n) >= 0)
|
|
|
|
mask |= CGROUP_CONTROLLER_TO_MASK(c);
|
|
|
|
}
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
*ret = mask;
|
|
|
|
return 0;
|
2013-06-27 04:14:27 +02:00
|
|
|
}
|
2014-12-30 01:57:23 +01:00
|
|
|
|
|
|
|
int cg_kernel_controllers(Set *controllers) {
|
|
|
|
_cleanup_fclose_ FILE *f = NULL;
|
|
|
|
char buf[LINE_MAX];
|
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(controllers);
|
|
|
|
|
2015-09-01 18:02:43 +02:00
|
|
|
/* Determines the full list of kernel-known controllers. Might
|
|
|
|
* include controllers we don't actually support, arbitrary
|
|
|
|
* named hierarchies and controllers that aren't currently
|
|
|
|
* accessible (because not mounted). */
|
|
|
|
|
2014-12-30 01:57:23 +01:00
|
|
|
f = fopen("/proc/cgroups", "re");
|
|
|
|
if (!f) {
|
|
|
|
if (errno == ENOENT)
|
|
|
|
return 0;
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ignore the header line */
|
|
|
|
(void) fgets(buf, sizeof(buf), f);
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
char *controller;
|
|
|
|
int enabled = 0;
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
|
|
|
|
|
|
|
|
if (feof(f))
|
|
|
|
break;
|
|
|
|
|
2016-01-11 20:31:14 +01:00
|
|
|
if (ferror(f) && errno > 0)
|
2014-12-30 01:57:23 +01:00
|
|
|
return -errno;
|
|
|
|
|
|
|
|
return -EBADMSG;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!enabled) {
|
|
|
|
free(controller);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (!cg_controller_is_valid(controller)) {
|
2014-12-30 01:57:23 +01:00
|
|
|
free(controller);
|
|
|
|
return -EBADMSG;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = set_consume(controllers, controller);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
2016-08-16 00:13:36 +02:00
|
|
|
static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
|
|
|
|
|
2016-11-23 18:27:32 +01:00
|
|
|
/* The hybrid mode was initially implemented in v232 and simply mounted
|
|
|
|
* cgroup v2 on /sys/fs/cgroup/systemd. This unfortunately broke other
|
|
|
|
* tools (such as docker) which expected the v1 "name=systemd" hierarchy
|
|
|
|
* on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs
|
|
|
|
* v2 on /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy
|
|
|
|
* on /sys/fs/cgroup/systemd for compatibility with other tools.
|
|
|
|
*
|
|
|
|
* To keep live upgrade working, we detect and support v232 layout. When
|
|
|
|
* v232 layout is detected, to keep cgroup v2 process management but
|
|
|
|
* disable the compat dual layout, we return %true on
|
|
|
|
* cg_unified(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
|
|
|
|
*/
|
|
|
|
static thread_local bool unified_systemd_v232;
|
|
|
|
|
2016-08-16 00:13:36 +02:00
|
|
|
static int cg_update_unified(void) {
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
struct statfs fs;
|
|
|
|
|
|
|
|
/* Checks if we support the unified hierarchy. Returns an
|
|
|
|
* error when the cgroup hierarchies aren't mounted yet or we
|
|
|
|
* have any other trouble determining if the unified hierarchy
|
|
|
|
* is supported. */
|
|
|
|
|
2016-08-16 00:13:36 +02:00
|
|
|
if (unified_cache >= CGROUP_UNIFIED_NONE)
|
|
|
|
return 0;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
if (statfs("/sys/fs/cgroup/", &fs) < 0)
|
|
|
|
return -errno;
|
|
|
|
|
2015-12-07 01:10:50 +01:00
|
|
|
if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
|
2016-08-16 00:13:36 +02:00
|
|
|
unified_cache = CGROUP_UNIFIED_ALL;
|
|
|
|
else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
|
2016-11-21 20:45:53 +01:00
|
|
|
if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
|
2016-11-23 18:27:32 +01:00
|
|
|
F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
|
2016-11-21 20:45:53 +01:00
|
|
|
unified_cache = CGROUP_UNIFIED_SYSTEMD;
|
2016-11-23 18:27:32 +01:00
|
|
|
unified_systemd_v232 = false;
|
|
|
|
} else if (statfs("/sys/fs/cgroup/systemd/", &fs) == 0 &&
|
|
|
|
F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
|
|
|
|
unified_cache = CGROUP_UNIFIED_SYSTEMD;
|
|
|
|
unified_systemd_v232 = true;
|
|
|
|
} else {
|
2016-11-21 20:45:53 +01:00
|
|
|
if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
|
|
|
|
return -errno;
|
|
|
|
if (!F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
|
|
|
|
return -ENOMEDIUM;
|
|
|
|
unified_cache = CGROUP_UNIFIED_NONE;
|
|
|
|
}
|
2016-08-16 00:13:36 +02:00
|
|
|
} else
|
2015-12-02 04:35:16 +01:00
|
|
|
return -ENOMEDIUM;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
2016-08-16 00:13:36 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
bool cg_unified(const char *controller) {
|
2016-08-16 00:13:36 +02:00
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
assert(cg_update_unified() >= 0);
|
2016-08-16 00:13:36 +02:00
|
|
|
|
|
|
|
if (streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER))
|
|
|
|
return unified_cache >= CGROUP_UNIFIED_SYSTEMD;
|
|
|
|
else
|
|
|
|
return unified_cache >= CGROUP_UNIFIED_ALL;
|
|
|
|
}
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
bool cg_all_unified(void) {
|
2016-08-16 00:13:36 +02:00
|
|
|
|
|
|
|
return cg_unified(NULL);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
}
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
bool cg_hybrid_unified(void) {
|
|
|
|
|
|
|
|
assert(cg_update_unified() >= 0);
|
|
|
|
|
2016-11-23 18:27:32 +01:00
|
|
|
return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
|
2016-11-21 20:45:53 +01:00
|
|
|
}
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
int cg_unified_flush(void) {
|
2016-08-16 00:13:36 +02:00
|
|
|
unified_cache = CGROUP_UNIFIED_UNKNOWN;
|
2016-11-21 20:45:53 +01:00
|
|
|
|
|
|
|
return cg_update_unified();
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
|
|
|
|
_cleanup_free_ char *fs = NULL;
|
|
|
|
CGroupController c;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
assert(p);
|
|
|
|
|
|
|
|
if (supported == 0)
|
|
|
|
return 0;
|
|
|
|
|
2016-11-21 20:45:53 +01:00
|
|
|
if (!cg_all_unified()) /* on the legacy hiearchy there's no joining of controllers defined */
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
|
|
const char *n;
|
|
|
|
|
|
|
|
if (!(supported & bit))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
n = cgroup_controller_to_string(c);
|
|
|
|
{
|
|
|
|
char s[1 + strlen(n) + 1];
|
|
|
|
|
|
|
|
s[0] = mask & bit ? '+' : '-';
|
|
|
|
strcpy(s + 1, n);
|
|
|
|
|
|
|
|
r = write_string_file(fs, s, 0);
|
|
|
|
if (r < 0)
|
2015-09-03 20:10:00 +02:00
|
|
|
log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool cg_is_unified_wanted(void) {
|
|
|
|
static thread_local int wanted = -1;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
util-lib: various improvements to kernel command line parsing
This improves kernel command line parsing in a number of ways:
a) An kernel option "foo_bar=xyz" is now considered equivalent to
"foo-bar-xyz", i.e. when comparing kernel command line option names "-" and
"_" are now considered equivalent (this only applies to the option names
though, not the option values!). Most of our kernel options used "-" as word
separator in kernel command line options so far, but some used "_". With
this change, which was a source of confusion for users (well, at least of
one user: myself, I just couldn't remember that it's systemd.debug-shell,
not systemd.debug_shell). Considering both as equivalent is inspired how
modern kernel module loading normalizes all kernel module names to use
underscores now too.
b) All options previously using a dash for separating words in kernel command
line options now use an underscore instead, in all documentation and in
code. Since a) has been implemented this should not create any compatibility
problems, but normalizes our documentation and our code.
c) All kernel command line options which take booleans (or are boolean-like)
have been reworked so that "foobar" (without argument) is now equivalent to
"foobar=1" (but not "foobar=0"), thus normalizing the handling of our
boolean arguments. Specifically this means systemd.debug-shell and
systemd_debug_shell=1 are now entirely equivalent.
d) All kernel command line options which take an argument, and where no
argument is specified will now result in a log message. e.g. passing just
"systemd.unit" will no result in a complain that it needs an argument. This
is implemented in the proc_cmdline_missing_value() function.
e) There's now a call proc_cmdline_get_bool() similar to proc_cmdline_get_key()
that parses booleans (following the logic explained in c).
f) The proc_cmdline_parse() call's boolean argument has been replaced by a new
flags argument that takes a common set of bits with proc_cmdline_get_key().
g) All kernel command line APIs now begin with the same "proc_cmdline_" prefix.
h) There are now tests for much of this. Yay!
2016-12-12 18:29:15 +01:00
|
|
|
bool b;
|
2017-02-19 05:13:15 +01:00
|
|
|
const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
/* If the hierarchy is already mounted, then follow whatever
|
|
|
|
* was chosen for it. */
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_unified_flush() >= 0)
|
|
|
|
return cg_all_unified();
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
2017-02-19 05:13:15 +01:00
|
|
|
/* If we have a cached value, return that. */
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
if (wanted >= 0)
|
|
|
|
return wanted;
|
|
|
|
|
2017-02-19 05:13:15 +01:00
|
|
|
/* Otherwise, let's see what the kernel command line has to say.
|
|
|
|
* Since checking is expensive, cache a non-error result. */
|
util-lib: various improvements to kernel command line parsing
This improves kernel command line parsing in a number of ways:
a) An kernel option "foo_bar=xyz" is now considered equivalent to
"foo-bar-xyz", i.e. when comparing kernel command line option names "-" and
"_" are now considered equivalent (this only applies to the option names
though, not the option values!). Most of our kernel options used "-" as word
separator in kernel command line options so far, but some used "_". With
this change, which was a source of confusion for users (well, at least of
one user: myself, I just couldn't remember that it's systemd.debug-shell,
not systemd.debug_shell). Considering both as equivalent is inspired how
modern kernel module loading normalizes all kernel module names to use
underscores now too.
b) All options previously using a dash for separating words in kernel command
line options now use an underscore instead, in all documentation and in
code. Since a) has been implemented this should not create any compatibility
problems, but normalizes our documentation and our code.
c) All kernel command line options which take booleans (or are boolean-like)
have been reworked so that "foobar" (without argument) is now equivalent to
"foobar=1" (but not "foobar=0"), thus normalizing the handling of our
boolean arguments. Specifically this means systemd.debug-shell and
systemd_debug_shell=1 are now entirely equivalent.
d) All kernel command line options which take an argument, and where no
argument is specified will now result in a log message. e.g. passing just
"systemd.unit" will no result in a complain that it needs an argument. This
is implemented in the proc_cmdline_missing_value() function.
e) There's now a call proc_cmdline_get_bool() similar to proc_cmdline_get_key()
that parses booleans (following the logic explained in c).
f) The proc_cmdline_parse() call's boolean argument has been replaced by a new
flags argument that takes a common set of bits with proc_cmdline_get_key().
g) All kernel command line APIs now begin with the same "proc_cmdline_" prefix.
h) There are now tests for much of this. Yay!
2016-12-12 18:29:15 +01:00
|
|
|
r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
|
|
|
|
if (r < 0)
|
2017-02-19 05:13:15 +01:00
|
|
|
return is_default;
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
2017-02-19 05:13:15 +01:00
|
|
|
return (wanted = r > 0 ? b : is_default);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool cg_is_legacy_wanted(void) {
|
|
|
|
return !cg_is_unified_wanted();
|
|
|
|
}
|
|
|
|
|
2016-08-16 00:13:36 +02:00
|
|
|
bool cg_is_unified_systemd_controller_wanted(void) {
|
|
|
|
static thread_local int wanted = -1;
|
2016-11-21 20:45:53 +01:00
|
|
|
int r;
|
util-lib: various improvements to kernel command line parsing
This improves kernel command line parsing in a number of ways:
a) An kernel option "foo_bar=xyz" is now considered equivalent to
"foo-bar-xyz", i.e. when comparing kernel command line option names "-" and
"_" are now considered equivalent (this only applies to the option names
though, not the option values!). Most of our kernel options used "-" as word
separator in kernel command line options so far, but some used "_". With
this change, which was a source of confusion for users (well, at least of
one user: myself, I just couldn't remember that it's systemd.debug-shell,
not systemd.debug_shell). Considering both as equivalent is inspired how
modern kernel module loading normalizes all kernel module names to use
underscores now too.
b) All options previously using a dash for separating words in kernel command
line options now use an underscore instead, in all documentation and in
code. Since a) has been implemented this should not create any compatibility
problems, but normalizes our documentation and our code.
c) All kernel command line options which take booleans (or are boolean-like)
have been reworked so that "foobar" (without argument) is now equivalent to
"foobar=1" (but not "foobar=0"), thus normalizing the handling of our
boolean arguments. Specifically this means systemd.debug-shell and
systemd_debug_shell=1 are now entirely equivalent.
d) All kernel command line options which take an argument, and where no
argument is specified will now result in a log message. e.g. passing just
"systemd.unit" will no result in a complain that it needs an argument. This
is implemented in the proc_cmdline_missing_value() function.
e) There's now a call proc_cmdline_get_bool() similar to proc_cmdline_get_key()
that parses booleans (following the logic explained in c).
f) The proc_cmdline_parse() call's boolean argument has been replaced by a new
flags argument that takes a common set of bits with proc_cmdline_get_key().
g) All kernel command line APIs now begin with the same "proc_cmdline_" prefix.
h) There are now tests for much of this. Yay!
2016-12-12 18:29:15 +01:00
|
|
|
bool b;
|
2017-02-19 05:13:15 +01:00
|
|
|
const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_SYSTEMD;
|
2016-08-16 00:13:36 +02:00
|
|
|
|
|
|
|
/* If the unified hierarchy is requested in full, no need to
|
|
|
|
* bother with this. */
|
|
|
|
if (cg_is_unified_wanted())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* If the hierarchy is already mounted, then follow whatever
|
|
|
|
* was chosen for it. */
|
2016-11-21 20:45:53 +01:00
|
|
|
if (cg_unified_flush() >= 0)
|
|
|
|
return cg_unified(SYSTEMD_CGROUP_CONTROLLER);
|
2016-08-16 00:13:36 +02:00
|
|
|
|
2017-02-19 05:13:15 +01:00
|
|
|
/* If we have a cached value, return that. */
|
2016-08-16 00:13:36 +02:00
|
|
|
if (wanted >= 0)
|
|
|
|
return wanted;
|
|
|
|
|
2017-02-19 05:13:15 +01:00
|
|
|
/* Otherwise, let's see what the kernel command line has to say.
|
|
|
|
* Since checking is expensive, cache a non-error result. */
|
util-lib: various improvements to kernel command line parsing
This improves kernel command line parsing in a number of ways:
a) An kernel option "foo_bar=xyz" is now considered equivalent to
"foo-bar-xyz", i.e. when comparing kernel command line option names "-" and
"_" are now considered equivalent (this only applies to the option names
though, not the option values!). Most of our kernel options used "-" as word
separator in kernel command line options so far, but some used "_". With
this change, which was a source of confusion for users (well, at least of
one user: myself, I just couldn't remember that it's systemd.debug-shell,
not systemd.debug_shell). Considering both as equivalent is inspired how
modern kernel module loading normalizes all kernel module names to use
underscores now too.
b) All options previously using a dash for separating words in kernel command
line options now use an underscore instead, in all documentation and in
code. Since a) has been implemented this should not create any compatibility
problems, but normalizes our documentation and our code.
c) All kernel command line options which take booleans (or are boolean-like)
have been reworked so that "foobar" (without argument) is now equivalent to
"foobar=1" (but not "foobar=0"), thus normalizing the handling of our
boolean arguments. Specifically this means systemd.debug-shell and
systemd_debug_shell=1 are now entirely equivalent.
d) All kernel command line options which take an argument, and where no
argument is specified will now result in a log message. e.g. passing just
"systemd.unit" will no result in a complain that it needs an argument. This
is implemented in the proc_cmdline_missing_value() function.
e) There's now a call proc_cmdline_get_bool() similar to proc_cmdline_get_key()
that parses booleans (following the logic explained in c).
f) The proc_cmdline_parse() call's boolean argument has been replaced by a new
flags argument that takes a common set of bits with proc_cmdline_get_key().
g) All kernel command line APIs now begin with the same "proc_cmdline_" prefix.
h) There are now tests for much of this. Yay!
2016-12-12 18:29:15 +01:00
|
|
|
r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
|
|
|
|
if (r < 0)
|
2017-02-19 05:13:15 +01:00
|
|
|
return is_default;
|
2016-08-16 00:13:36 +02:00
|
|
|
|
2017-02-20 18:26:53 +01:00
|
|
|
/* The meaning of the kernel option is reversed wrt. to the return value
|
|
|
|
* of this function, hence the negation. */
|
2017-02-19 05:13:15 +01:00
|
|
|
return (wanted = r > 0 ? !b : is_default);
|
2016-08-16 00:13:36 +02:00
|
|
|
}
|
|
|
|
|
2016-05-05 22:42:55 +02:00
|
|
|
int cg_weight_parse(const char *s, uint64_t *ret) {
|
|
|
|
uint64_t u;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (isempty(s)) {
|
|
|
|
*ret = CGROUP_WEIGHT_INVALID;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = safe_atou64(s, &u);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
|
|
|
|
return -ERANGE;
|
|
|
|
|
|
|
|
*ret = u;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-05-18 22:50:56 +02:00
|
|
|
const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
|
|
|
|
[CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
|
|
|
|
[CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
|
2016-05-18 22:50:56 +02:00
|
|
|
[CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
|
|
|
|
[CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
|
2016-05-18 22:50:56 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
|
|
|
|
[CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
|
|
|
|
[CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
|
2016-05-18 22:50:56 +02:00
|
|
|
[CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
|
|
|
|
[CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
|
2016-05-18 22:50:56 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
|
|
|
|
|
2015-09-11 16:48:24 +02:00
|
|
|
int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
|
|
|
|
uint64_t u;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (isempty(s)) {
|
|
|
|
*ret = CGROUP_CPU_SHARES_INVALID;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = safe_atou64(s, &u);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
|
|
|
|
return -ERANGE;
|
|
|
|
|
|
|
|
*ret = u;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
|
|
|
|
uint64_t u;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (isempty(s)) {
|
|
|
|
*ret = CGROUP_BLKIO_WEIGHT_INVALID;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = safe_atou64(s, &u);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
|
|
|
|
return -ERANGE;
|
|
|
|
|
|
|
|
*ret = u;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-10-13 15:50:46 +02:00
|
|
|
bool is_cgroup_fs(const struct statfs *s) {
|
|
|
|
return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
|
|
|
|
is_fs_type(s, CGROUP2_SUPER_MAGIC);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool fd_is_cgroup_fs(int fd) {
|
|
|
|
struct statfs s;
|
|
|
|
|
|
|
|
if (fstatfs(fd, &s) < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
return is_cgroup_fs(&s);
|
|
|
|
}
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
|
|
|
|
[CGROUP_CONTROLLER_CPU] = "cpu",
|
|
|
|
[CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
|
2016-05-05 22:42:55 +02:00
|
|
|
[CGROUP_CONTROLLER_IO] = "io",
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
[CGROUP_CONTROLLER_BLKIO] = "blkio",
|
|
|
|
[CGROUP_CONTROLLER_MEMORY] = "memory",
|
2015-09-08 18:15:50 +02:00
|
|
|
[CGROUP_CONTROLLER_DEVICES] = "devices",
|
2015-09-10 12:32:16 +02:00
|
|
|
[CGROUP_CONTROLLER_PIDS] = "pids",
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
|