core: don't try to write CPU quota and memory limit cgroup attrs on root cgroup

In the kernel sources attempts to write to either are refused with EINVAL. Not sure why these attributes are exported anyway on cgroupsv1, but this means we really should ignore them altogether. This simplifies our code as this means cgroupsv1 is more alike cgroupsv2 in this regard. Fixes: #10969
2018-11-30 18:45:22 +01:00 · 2018-11-30 18:45:22 +01:00 · be2c032781
parent 09e5c07fbd
commit be2c032781
1 changed files with 70 additions and 91 deletions
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@ -855,68 +855,53 @@ static void cgroup_context_apply(
        if (is_local_root) /* Make sure we don't try to display messages with an empty path. */
                path = "/";

-        /* We generally ignore errors caused by read-only mounted
-         * cgroup trees (assuming we are running in a container then),
-         * and missing cgroups, i.e. EROFS and ENOENT. */
+        /* We generally ignore errors caused by read-only mounted cgroup trees (assuming we are running in a container
+         * then), and missing cgroups, i.e. EROFS and ENOENT. */

-        if (apply_mask & CGROUP_MASK_CPU) {
-                bool has_weight, has_shares;
-
-                has_weight = cgroup_context_has_cpu_weight(c);
-                has_shares = cgroup_context_has_cpu_shares(c);
+        /* In fully unified mode these attributes don't exist on the host cgroup root. On legacy the weights exist, but
+         * setting the weight makes very little sense on the host root cgroup, as there are no other cgroups at this
+         * level. The quota exists there too, but any attempt to write to it is refused with EINVAL. Inside of
+         * containers we want to leave control of these to the container manager (and if cgroupsv2 delegation is used
+         * we couldn't even write to them if we wanted to). */
+        if ((apply_mask & CGROUP_MASK_CPU) && !is_local_root) {

                if (cg_all_unified() > 0) {
+                        uint64_t weight;

-                        /* In fully unified mode these attributes don't exist on the host cgroup root, and inside of
-                         * containers we want to leave control of these to the container manager (and if delegation is
-                         * used we couldn't even write to them if we wanted to). */
-                        if (!is_local_root) {
-                                uint64_t weight;
-
-                                if (has_weight)
-                                        weight = cgroup_context_cpu_weight(c, state);
-                                else if (has_shares) {
-                                        uint64_t shares;
-
-                                        shares = cgroup_context_cpu_shares(c, state);
-                                        weight = cgroup_cpu_shares_to_weight(shares);
-
-                                        log_cgroup_compat(u, "Applying [Startup]CPUShares %" PRIu64 " as [Startup]CPUWeight %" PRIu64 " on %s",
-                                                          shares, weight, path);
-                                } else
-                                        weight = CGROUP_WEIGHT_DEFAULT;
-
-                                cgroup_apply_unified_cpu_weight(u, weight);
-                                cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec);
-                        }
-
-                } else {
-                        /* Setting the weight makes very little sense on the host root cgroup, as there are no other
-                         * cgroups at this level. And for containers we want to leave management of this to the
-                         * container manager */
-                        if (!is_local_root) {
+                        if (cgroup_context_has_cpu_weight(c))
+                                weight = cgroup_context_cpu_weight(c, state);
+                        else if (cgroup_context_has_cpu_shares(c)) {
                                uint64_t shares;

-                                if (has_weight) {
-                                        uint64_t weight;
+                                shares = cgroup_context_cpu_shares(c, state);
+                                weight = cgroup_cpu_shares_to_weight(shares);

-                                        weight = cgroup_context_cpu_weight(c, state);
-                                        shares = cgroup_cpu_weight_to_shares(weight);
+                                log_cgroup_compat(u, "Applying [Startup]CPUShares=%" PRIu64 " as [Startup]CPUWeight=%" PRIu64 " on %s",
+                                                  shares, weight, path);
+                        } else
+                                weight = CGROUP_WEIGHT_DEFAULT;

-                                        log_cgroup_compat(u, "Applying [Startup]CPUWeight %" PRIu64 " as [Startup]CPUShares %" PRIu64 " on %s",
-                                                          weight, shares, path);
-                                } else if (has_shares)
-                                        shares = cgroup_context_cpu_shares(c, state);
-                                else
-                                        shares = CGROUP_CPU_SHARES_DEFAULT;
+                        cgroup_apply_unified_cpu_weight(u, weight);
+                        cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec);

-                                cgroup_apply_legacy_cpu_shares(u, shares);
-                        }
+                } else {
+                        uint64_t shares;

-                        /* The "cpu" quota attribute is available on the host root, hence manage it there. But in
-                         * containers let's leave this to the container manager. */
-                        if (is_host_root || !is_local_root)
-                                cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec);
+                        if (cgroup_context_has_cpu_weight(c)) {
+                                uint64_t weight;
+
+                                weight = cgroup_context_cpu_weight(c, state);
+                                shares = cgroup_cpu_weight_to_shares(weight);
+
+                                log_cgroup_compat(u, "Applying [Startup]CPUWeight=%" PRIu64 " as [Startup]CPUShares=%" PRIu64 " on %s",
+                                                  weight, shares, path);
+                        } else if (cgroup_context_has_cpu_shares(c))
+                                shares = cgroup_context_cpu_shares(c, state);
+                        else
+                                shares = CGROUP_CPU_SHARES_DEFAULT;
+
+                        cgroup_apply_legacy_cpu_shares(u, shares);
+                        cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec);
                }
        }

@ -1060,56 +1045,51 @@ static void cgroup_context_apply(
                }
        }

-        if (apply_mask & CGROUP_MASK_MEMORY) {
+        /* In unified mode 'memory' attributes do not exist on the root cgroup. In legacy mode 'memory.limit_in_bytes'
+         * exists on the root cgroup, but any writes to it are refused with EINVAL. And if we run in a container we
+         * want to leave control to the container manager (and if proper cgroupsv2 delegation is used we couldn't even
+         * write to this if we wanted to.) */
+        if ((apply_mask & CGROUP_MASK_MEMORY) && !is_local_root) {

                if (cg_all_unified() > 0) {
-                        /* In unified mode 'memory' attributes do not exist on the root cgroup. And if we run in a
-                         * container we want to leave control to the container manager (and if proper delegation is
-                         * used we couldn't even write to this if we wanted to. */
-                        if (!is_local_root) {
-                                uint64_t max, swap_max = CGROUP_LIMIT_MAX;
+                        uint64_t max, swap_max = CGROUP_LIMIT_MAX;

-                                if (cgroup_context_has_unified_memory_config(c)) {
-                                        max = c->memory_max;
-                                        swap_max = c->memory_swap_max;
-                                } else {
-                                        max = c->memory_limit;
+                        if (cgroup_context_has_unified_memory_config(c)) {
+                                max = c->memory_max;
+                                swap_max = c->memory_swap_max;
+                        } else {
+                                max = c->memory_limit;

-                                        if (max != CGROUP_LIMIT_MAX)
-                                                log_cgroup_compat(u, "Applying MemoryLimit=%" PRIu64 " as MemoryMax=", max);
-                                }
-
-                                cgroup_apply_unified_memory_limit(u, "memory.min", c->memory_min);
-                                cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
-                                cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
-                                cgroup_apply_unified_memory_limit(u, "memory.max", max);
-                                cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
+                                if (max != CGROUP_LIMIT_MAX)
+                                        log_cgroup_compat(u, "Applying MemoryLimit=%" PRIu64 " as MemoryMax=", max);
                        }
+
+                        cgroup_apply_unified_memory_limit(u, "memory.min", c->memory_min);
+                        cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
+                        cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
+                        cgroup_apply_unified_memory_limit(u, "memory.max", max);
+                        cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
+
                } else {
+                        char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+                        uint64_t val;

-                        /* In legacy mode 'memory' exists on the host root, but in container mode we want to leave it
-                         * to the container manager around us */
-                        if (is_host_root || !is_local_root) {
-                                char buf[DECIMAL_STR_MAX(uint64_t) + 1];
-                                uint64_t val;
+                        if (cgroup_context_has_unified_memory_config(c)) {
+                                val = c->memory_max;
+                                log_cgroup_compat(u, "Applying MemoryMax=%" PRIi64 " as MemoryLimit=", val);
+                        } else
+                                val = c->memory_limit;

-                                if (cgroup_context_has_unified_memory_config(c)) {
-                                        val = c->memory_max;
-                                        log_cgroup_compat(u, "Applying MemoryMax=%" PRIi64 " as MemoryLimit=", val);
-                                } else
-                                        val = c->memory_limit;
+                        if (val == CGROUP_LIMIT_MAX)
+                                strncpy(buf, "-1\n", sizeof(buf));
+                        else
+                                xsprintf(buf, "%" PRIu64 "\n", val);

-                                if (val == CGROUP_LIMIT_MAX)
-                                        strncpy(buf, "-1\n", sizeof(buf));
-                                else
-                                        xsprintf(buf, "%" PRIu64 "\n", val);
-
-                                (void) set_attribute_and_warn(u, "memory", "memory.limit_in_bytes", buf);
-                        }
+                        (void) set_attribute_and_warn(u, "memory", "memory.limit_in_bytes", buf);
                }
        }

-        /* On cgroupsv2 we can apply BPF everywhre. On cgroupsv1 we apply it everywhere except for the root of
+        /* On cgroupsv2 we can apply BPF everywhere. On cgroupsv1 we apply it everywhere except for the root of
         * containers, where we leave this to the manager */
        if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) &&
            (is_host_root || cg_all_unified() > 0 || !is_local_root)) {
@ -1218,7 +1198,6 @@ static void cgroup_context_apply(
                                r = procfs_tasks_set_limit(TASKS_MAX);
                        else
                                r = 0;
-
                        if (r < 0)
                                log_unit_full(u, LOG_LEVEL_CGROUP_WRITE(r), r,
                                              "Failed to write to tasks limit sysctls: %m");