diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index b5ca10a2de..6601f16e01 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -208,6 +208,12 @@ int cg_rmdir(const char *controller, const char *path) { if (r < 0 && errno != ENOENT) return -errno; + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path); + } + return 0; } @@ -542,8 +548,12 @@ static const char *controller_to_dirname(const char *controller) { * just cuts off the name= prefixed used for named * hierarchies, if it is specified. */ - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) - controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + if (cg_hybrid_unified()) + controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID; + else + controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + } e = startswith(controller, "name="); if (e) @@ -703,7 +713,7 @@ static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct int cg_trim(const char *controller, const char *path, bool delete_root) { _cleanup_free_ char *fs = NULL; - int r = 0; + int r = 0, q; assert(path); @@ -726,6 +736,12 @@ int cg_trim(const char *controller, const char *path, bool delete_root) { return -errno; } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root); + if (q < 0) + log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path); + } + return r; } @@ -749,6 +765,12 @@ int cg_create(const char *controller, const char *path) { return -errno; } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path); + } + return 1; } @@ -786,7 +808,17 @@ int cg_attach(const char *controller, const char *path, pid_t pid) { xsprintf(c, PID_FMT "\n", pid); - return write_string_file(fs, c, 0); + r = write_string_file(fs, c, 0); + if (r < 0) + return r; + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid); + if (r < 0) + log_warning_errno(r, "Failed to attach %d to compat systemd cgroup %s: %m", pid, path); + } + + return 0; } int cg_attach_fallback(const char *controller, const char *path, pid_t pid) { @@ -835,7 +867,17 @@ int cg_set_group_access( if (r < 0) return r; - return chmod_and_chown(fs, mode, uid, gid); + r = chmod_and_chown(fs, mode, uid, gid); + if (r < 0) + return r; + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid); + if (r < 0) + log_warning_errno(r, "Failed to set group access on compat systemd cgroup %s: %m", path); + } + + return 0; } int cg_set_task_access( @@ -864,13 +906,18 @@ int cg_set_task_access( if (r < 0) return r; - if (cg_unified(controller)) - return 0; + if (!cg_unified(controller)) { + /* Compatibility, Always keep values for "tasks" in sync with + * "cgroup.procs" */ + if (cg_get_path(controller, path, "tasks", &procs) >= 0) + (void) chmod_and_chown(procs, mode, uid, gid); + } - /* Compatibility, Always keep values for "tasks" in sync with - * "cgroup.procs" */ - if (cg_get_path(controller, path, "tasks", &procs) >= 0) - (void) chmod_and_chown(procs, mode, uid, gid); + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid); + if (r < 0) + log_warning_errno(r, "Failed to set task access on compat systemd cgroup %s: %m", path); + } return 0; } @@ -2254,11 +2301,16 @@ static int cg_update_unified(void) { if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) unified_cache = CGROUP_UNIFIED_ALL; else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { - if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) - return -errno; - - unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ? - CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE; + if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 && + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) + unified_cache = CGROUP_UNIFIED_SYSTEMD; + else { + if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) + return -errno; + if (!F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) + return -ENOMEDIUM; + unified_cache = CGROUP_UNIFIED_NONE; + } } else return -ENOMEDIUM; @@ -2280,6 +2332,13 @@ bool cg_all_unified(void) { return cg_unified(NULL); } +bool cg_hybrid_unified(void) { + + assert(cg_update_unified() >= 0); + + return unified_cache == CGROUP_UNIFIED_SYSTEMD; +} + int cg_unified_flush(void) { unified_cache = CGROUP_UNIFIED_UNKNOWN; @@ -2383,10 +2442,6 @@ bool cg_is_unified_systemd_controller_wanted(void) { return (wanted = r > 0 ? !b : false); } -bool cg_is_legacy_systemd_controller_wanted(void) { - return cg_is_legacy_wanted() && !cg_is_unified_systemd_controller_wanted(); -} - int cg_weight_parse(const char *s, uint64_t *ret) { uint64_t u; int r; diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 09dcc2b38e..5a82de71fe 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -241,13 +241,13 @@ int cg_kernel_controllers(Set *controllers); bool cg_ns_supported(void); bool cg_all_unified(void); +bool cg_hybrid_unified(void); bool cg_unified(const char *controller); int cg_unified_flush(void); bool cg_is_unified_wanted(void); bool cg_is_legacy_wanted(void); bool cg_is_unified_systemd_controller_wanted(void); -bool cg_is_legacy_systemd_controller_wanted(void); const char* cgroup_controller_to_string(CGroupController c) _const_; CGroupController cgroup_controller_from_string(const char *s) _pure_; diff --git a/src/basic/def.h b/src/basic/def.h index 3ba275f64b..fbab066564 100644 --- a/src/basic/def.h +++ b/src/basic/def.h @@ -37,6 +37,7 @@ #define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL #define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd" +#define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified" #define SYSTEMD_CGROUP_CONTROLLER "_systemd" #define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 9c2bf3a0ef..4e2df01346 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -99,12 +99,12 @@ static const MountPoint mount_table[] = { cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, - { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER }, + cg_is_legacy_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, #ifdef ENABLE_EFI diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 1493ef6aad..ed4f1f9db8 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -890,7 +890,7 @@ static int get_controllers(Set *subsystems) { *e = 0; - if (STR_IN_SET(l, "", "name=systemd")) + if (STR_IN_SET(l, "", "name=systemd", "name=unified")) continue; p = strdup(l); @@ -909,7 +909,6 @@ static int mount_legacy_cgroup_hierarchy( const char *dest, const char *controller, const char *hierarchy, - CGroupUnified unified_requested, bool read_only) { const char *to, *fstype, *opts; @@ -927,14 +926,12 @@ static int mount_legacy_cgroup_hierarchy( /* The superblock mount options of the mount point need to be * identical to the hosts', and hence writable... */ - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { - fstype = "cgroup2"; - opts = NULL; - } else { - fstype = "cgroup"; - opts = "none,name=systemd,xattr"; - } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) { + fstype = "cgroup2"; + opts = NULL; + } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) { + fstype = "cgroup"; + opts = "none,name=systemd,xattr"; } else { fstype = "cgroup"; opts = controller; @@ -1012,7 +1009,7 @@ static int mount_legacy_cgns_supported( if (!controller) break; - r = mount_legacy_cgroup_hierarchy("", controller, controller, unified_requested, !userns); + r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns); if (r < 0) return r; @@ -1046,7 +1043,13 @@ static int mount_legacy_cgns_supported( } skip_controllers: - r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false); + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); + if (r < 0) + return r; + } + + r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); if (r < 0) return r; @@ -1117,7 +1120,7 @@ static int mount_legacy_cgns_unsupported( if (r == -EINVAL) { /* Not a symbolic link, but directly a single cgroup hierarchy */ - r = mount_legacy_cgroup_hierarchy(dest, controller, controller, unified_requested, true); + r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true); if (r < 0) return r; @@ -1137,7 +1140,7 @@ static int mount_legacy_cgns_unsupported( continue; } - r = mount_legacy_cgroup_hierarchy(dest, combined, combined, unified_requested, true); + r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true); if (r < 0) return r; @@ -1150,7 +1153,13 @@ static int mount_legacy_cgns_unsupported( } skip_controllers: - r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false); + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); + if (r < 0) + return r; + } + + r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); if (r < 0) return r; @@ -1202,12 +1211,25 @@ int mount_cgroups( return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context); } +static int mount_systemd_cgroup_writable_one(const char *systemd_own, const char *systemd_root) +{ + int r; + + /* Make our own cgroup a (writable) bind mount */ + r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL); + if (r < 0) + return r; + + /* And then remount the systemd cgroup root read-only */ + return mount_verbose(LOG_ERR, NULL, systemd_root, NULL, + MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); +} + int mount_systemd_cgroup_writable( const char *dest, CGroupUnified unified_requested) { _cleanup_free_ char *own_cgroup_path = NULL; - const char *systemd_root, *systemd_own; int r; assert(dest); @@ -1220,22 +1242,19 @@ int mount_systemd_cgroup_writable( if (path_equal(own_cgroup_path, "/")) return 0; - if (unified_requested >= CGROUP_UNIFIED_ALL) { - systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path); - systemd_root = prefix_roota(dest, "/sys/fs/cgroup"); - } else { - systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path); - systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd"); + if (unified_requested >= CGROUP_UNIFIED_ALL) + return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup")); + + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/unified", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup/unified")); + if (r < 0) + return r; } - /* Make our own cgroup a (writable) bind mount */ - r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL); - if (r < 0) - return r; - - /* And then remount the systemd cgroup root read-only */ - return mount_verbose(LOG_ERR, NULL, systemd_root, NULL, - MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); + return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup/systemd")); } int setup_volatile_state( diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 75b6728688..42355115ff 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -344,8 +344,8 @@ static int detect_unified_cgroup_hierarchy(const char *directory) { else arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; } else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) { - /* Mixed cgroup hierarchy support was added in 232 */ - r = systemd_installation_has_version(directory, 232); + /* Mixed cgroup hierarchy support was added in 233 */ + r = systemd_installation_has_version(directory, 233); if (r < 0) return log_error_errno(r, "Failed to determine systemd version in container: %m"); if (r > 0)