cgroup: Implement default propagation of MemoryLow with DefaultMemoryLow

In cgroup v2 we have protection tunables -- currently MemoryLow and
MemoryMin (there will be more in future for other resources, too). The
design of these protection tunables requires not only intermediate
cgroups to propagate protections, but also the units at the leaf of that
resource's operation to accept it (by setting MemoryLow or MemoryMin).

This makes sense from an low-level API design perspective, but it's a
good idea to also have a higher-level abstraction that can, by default,
propagate these resources to children recursively. In this patch, this
happens by having descendants set memory.low to N if their ancestor has
DefaultMemoryLow=N -- assuming they don't set a separate MemoryLow
value.

Any affected unit can opt out of this propagation by manually setting
`MemoryLow` to some value in its unit configuration. A unit can also
stop further propagation by setting `DefaultMemoryLow=` with no
argument. This removes further propagation in the subtree, but has no
effect on the unit itself (for that, use `MemoryLow=0`).

Our use case in production is simplifying the configuration of machines
which heavily rely on memory protection tunables, but currently require
tweaking a huge number of unit files to make that a reality. This
directive makes that significantly less fragile, and decreases the risk
of misconfiguration.

After this patch is merged, I will implement DefaultMemoryMin= using the
same principles.
This commit is contained in:
Chris Down 2019-03-28 12:50:50 +00:00 committed by Lennart Poettering
parent b51629ad84
commit c52db42b78
23 changed files with 310 additions and 13 deletions

View File

@ -228,6 +228,7 @@ All cgroup/resource control settings are available for transient units
✓ CPUQuotaPeriodSec=
✓ MemoryAccounting=
✓ MemoryMin=
✓ DefaultMemoryLow=
✓ MemoryLow=
✓ MemoryHigh=
✓ MemoryMax=

View File

@ -265,6 +265,10 @@
<para>This setting is supported only if the unified control group hierarchy is used and disables
<varname>MemoryLimit=</varname>.</para>
<para>Units may can have their children use a default <literal>memory.low</literal> value by specifying
<varname>DefaultMemoryLow=</varname>, which has the same usage as <varname>MemoryLow=</varname>. This setting
does not affect <literal>memory.low</literal> in the unit itself.</para>
</listitem>
</varlistentry>

View File

@ -234,6 +234,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sStartupIOWeight=%" PRIu64 "\n"
"%sBlockIOWeight=%" PRIu64 "\n"
"%sStartupBlockIOWeight=%" PRIu64 "\n"
"%sDefaultMemoryLow=%" PRIu64 "\n"
"%sMemoryMin=%" PRIu64 "\n"
"%sMemoryLow=%" PRIu64 "\n"
"%sMemoryHigh=%" PRIu64 "\n"
@ -259,6 +260,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->startup_io_weight,
prefix, c->blockio_weight,
prefix, c->startup_blockio_weight,
prefix, c->default_memory_low,
prefix, c->memory_min,
prefix, c->memory_low,
prefix, c->memory_high,
@ -382,6 +384,32 @@ int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode)
return 0;
}
uint64_t unit_get_ancestor_memory_low(Unit *u) {
CGroupContext *c;
/* 1. Is MemoryLow set in this unit? If so, use that.
* 2. Is DefaultMemoryLow set in any ancestor? If so, use that.
* 3. Otherwise, return CGROUP_LIMIT_MIN. */
assert(u);
c = unit_get_cgroup_context(u);
if (c->memory_low_set)
return c->memory_low;
while (UNIT_ISSET(u->slice)) {
u = UNIT_DEREF(u->slice);
c = unit_get_cgroup_context(u);
if (c->default_memory_low_set)
return c->default_memory_low;
}
/* We've reached the root, but nobody had DefaultMemoryLow set, so set it to the kernel default. */
return CGROUP_LIMIT_MIN;
}
static void cgroup_xattr_apply(Unit *u) {
char ids[SD_ID128_STRING_MAX];
int r;
@ -877,8 +905,17 @@ static void cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint6
(void) set_attribute_and_warn(u, "blkio", "blkio.throttle.write_bps_device", buf);
}
static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
return c->memory_min > 0 || c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX;
static bool unit_has_unified_memory_config(Unit *u) {
CGroupContext *c;
assert(u);
c = unit_get_cgroup_context(u);
assert(c);
return c->memory_min > 0 || unit_get_ancestor_memory_low(u) > 0 ||
c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX ||
c->memory_swap_max != CGROUP_LIMIT_MAX;
}
static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
@ -1127,7 +1164,7 @@ static void cgroup_context_apply(
if (cg_all_unified() > 0) {
uint64_t max, swap_max = CGROUP_LIMIT_MAX;
if (cgroup_context_has_unified_memory_config(c)) {
if (unit_has_unified_memory_config(u)) {
max = c->memory_max;
swap_max = c->memory_swap_max;
} else {
@ -1138,7 +1175,7 @@ static void cgroup_context_apply(
}
cgroup_apply_unified_memory_limit(u, "memory.min", c->memory_min);
cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
cgroup_apply_unified_memory_limit(u, "memory.low", unit_get_ancestor_memory_low(u));
cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
cgroup_apply_unified_memory_limit(u, "memory.max", max);
cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
@ -1149,7 +1186,7 @@ static void cgroup_context_apply(
char buf[DECIMAL_STR_MAX(uint64_t) + 1];
uint64_t val;
if (cgroup_context_has_unified_memory_config(c)) {
if (unit_has_unified_memory_config(u)) {
val = c->memory_max;
log_cgroup_compat(u, "Applying MemoryMax=%" PRIi64 " as MemoryLimit=", val);
} else
@ -1323,8 +1360,13 @@ static bool unit_get_needs_bpf_firewall(Unit *u) {
return false;
}
static CGroupMask cgroup_context_get_mask(CGroupContext *c) {
static CGroupMask unit_get_cgroup_mask(Unit *u) {
CGroupMask mask = 0;
CGroupContext *c;
assert(u);
c = unit_get_cgroup_context(u);
/* Figure out which controllers we need, based on the cgroup context object */
@ -1341,7 +1383,7 @@ static CGroupMask cgroup_context_get_mask(CGroupContext *c) {
if (c->memory_accounting ||
c->memory_limit != CGROUP_LIMIT_MAX ||
cgroup_context_has_unified_memory_config(c))
unit_has_unified_memory_config(u))
mask |= CGROUP_MASK_MEMORY;
if (c->device_allow ||
@ -1380,7 +1422,7 @@ CGroupMask unit_get_own_mask(Unit *u) {
if (!c)
return 0;
return (cgroup_context_get_mask(c) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u)) & ~unit_get_ancestor_disable_mask(u);
return (unit_get_cgroup_mask(u) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u)) & ~unit_get_ancestor_disable_mask(u);
}
CGroupMask unit_get_delegate_mask(Unit *u) {

View File

@ -98,12 +98,16 @@ struct CGroupContext {
LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
LIST_HEAD(CGroupIODeviceLatency, io_device_latencies);
uint64_t default_memory_low;
uint64_t memory_min;
uint64_t memory_low;
uint64_t memory_high;
uint64_t memory_max;
uint64_t memory_swap_max;
bool default_memory_low_set;
bool memory_low_set;
LIST_HEAD(IPAddressAccessItem, ip_address_allow);
LIST_HEAD(IPAddressAccessItem, ip_address_deny);
@ -192,6 +196,8 @@ Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);
Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid);
Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
uint64_t unit_get_ancestor_memory_low(Unit *u);
int unit_search_main_pid(Unit *u, pid_t *ret);
int unit_watch_all_pids(Unit *u);

View File

@ -347,6 +347,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0),
SD_BUS_PROPERTY("DefaultMemoryLow", "t", NULL, offsetof(CGroupContext, default_memory_low), 0),
SD_BUS_PROPERTY("MemoryMin", "t", NULL, offsetof(CGroupContext, memory_min), 0),
SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
@ -663,6 +664,9 @@ int bus_cgroup_set_property(
if (streq(name, "MemoryLow"))
return bus_cgroup_set_memory(u, name, &c->memory_low, message, flags, error);
if (streq(name, "DefaultMemoryLow"))
return bus_cgroup_set_memory(u, name, &c->default_memory_low, message, flags, error);
if (streq(name, "MemoryHigh"))
return bus_cgroup_set_memory(u, name, &c->memory_high, message, flags, error);
@ -681,6 +685,9 @@ int bus_cgroup_set_property(
if (streq(name, "MemoryLowScale"))
return bus_cgroup_set_memory_scale(u, name, &c->memory_low, message, flags, error);
if (streq(name, "DefaultMemoryLowScale"))
return bus_cgroup_set_memory_scale(u, name, &c->default_memory_low, message, flags, error);
if (streq(name, "MemoryHighScale"))
return bus_cgroup_set_memory_scale(u, name, &c->memory_high, message, flags, error);

View File

@ -172,6 +172,7 @@ $1.CPUQuota, config_parse_cpu_quota, 0,
$1.CPUQuotaPeriodSec, config_parse_sec_def_infinity, 0, offsetof($1, cgroup_context.cpu_quota_period_usec)
$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
$1.MemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.DefaultMemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)

View File

@ -3107,11 +3107,18 @@ int config_parse_memory_limit(
}
}
if (streq(lvalue, "MemoryMin"))
if (streq(lvalue, "DefaultMemoryLow")) {
c->default_memory_low_set = true;
if (isempty(rvalue))
c->default_memory_low = CGROUP_LIMIT_MIN;
else
c->default_memory_low = bytes;
} else if (streq(lvalue, "MemoryMin"))
c->memory_min = bytes;
else if (streq(lvalue, "MemoryLow"))
else if (streq(lvalue, "MemoryLow")) {
c->memory_low = bytes;
else if (streq(lvalue, "MemoryHigh"))
c->memory_low_set = true;
} else if (streq(lvalue, "MemoryHigh"))
c->memory_high = bytes;
else if (streq(lvalue, "MemoryMax"))
c->memory_max = bytes;

View File

@ -409,7 +409,7 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
return 1;
}
if (STR_IN_SET(field, "MemoryMin", "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit", "TasksMax")) {
if (STR_IN_SET(field, "MemoryMin", "DefaultMemoryLow", "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit", "TasksMax")) {
if (isempty(eq) || streq(eq, "infinity")) {
r = sd_bus_message_append(m, "(sv)", field, "t", CGROUP_LIMIT_MAX);

View File

@ -794,7 +794,7 @@ static int bus_print_property(const char *name, const char *expected_value, sd_b
bus_print_property_value(name, expected_value, value, "[not set]");
else if ((STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit") && u == CGROUP_LIMIT_MAX) ||
else if ((STR_IN_SET(name, "DefaultMemoryLow", "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit") && u == CGROUP_LIMIT_MAX) ||
(STR_IN_SET(name, "TasksMax", "DefaultTasksMax") && u == (uint64_t) -1) ||
(startswith(name, "Limit") && u == (uint64_t) -1) ||
(startswith(name, "DefaultLimit") && u == (uint64_t) -1))

View File

@ -4129,6 +4129,8 @@ typedef struct UnitStatusInfo {
uint64_t ip_ingress_bytes;
uint64_t ip_egress_bytes;
uint64_t default_memory_low;
LIST_HEAD(ExecStatusInfo, exec);
} UnitStatusInfo;
@ -5479,6 +5481,7 @@ static int show_one(
{ "Where", "s", NULL, offsetof(UnitStatusInfo, where) },
{ "What", "s", NULL, offsetof(UnitStatusInfo, what) },
{ "MemoryCurrent", "t", NULL, offsetof(UnitStatusInfo, memory_current) },
{ "DefaultMemoryLow", "t", NULL, offsetof(UnitStatusInfo, default_memory_low) },
{ "MemoryMin", "t", NULL, offsetof(UnitStatusInfo, memory_min) },
{ "MemoryLow", "t", NULL, offsetof(UnitStatusInfo, memory_low) },
{ "MemoryHigh", "t", NULL, offsetof(UnitStatusInfo, memory_high) },

View File

@ -574,6 +574,12 @@ tests += [
libshared],
[]],
[['src/test/test-cgroup-unit-default.c',
'src/test/test-helper.c'],
[libcore,
libshared],
[]],
[['src/test/test-cgroup-mask.c',
'src/test/test-helper.c'],
[libcore,

View File

@ -0,0 +1,145 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <stdio.h>
#include "cgroup.h"
#include "manager.h"
#include "rm-rf.h"
#include "test-helper.h"
#include "tests.h"
#include "unit.h"
static int test_default_memory_low(void) {
_cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
_cleanup_(manager_freep) Manager *m = NULL;
Unit *root, *dml,
*dml_passthrough, *dml_passthrough_empty, *dml_passthrough_set_dml, *dml_passthrough_set_ml,
*dml_override, *dml_override_empty,
*dml_discard, *dml_discard_empty, *dml_discard_set_ml;
uint64_t dml_tree_default;
int r;
r = enter_cgroup_subroot();
if (r == -ENOMEDIUM)
return log_tests_skipped("cgroupfs not available");
assert_se(set_unit_path(get_testdata_dir()) >= 0);
assert_se(runtime_dir = setup_fake_runtime_dir());
r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
if (IN_SET(r, -EPERM, -EACCES)) {
log_error_errno(r, "manager_new: %m");
return log_tests_skipped("cannot create manager");
}
assert_se(r >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0);
/* dml.slice has DefaultMemoryLow=50. Beyond that, individual subhierarchies look like this:
*
* 1. dml-passthrough.slice sets MemoryLow=100. This should not affect its children, as only
* DefaultMemoryLow is propagated, not MemoryLow. As such, all leaf services should end up with
* memory.low as 50, inherited from dml.slice, *except* for dml-passthrough-set-ml.service, which
* should have the value of 25, as it has MemoryLow explicitly set.
*
*
* dml.slice
*
* MemoryLow=100
*
* dml-passthrough.slice
*
*
* no new settings DefaultMemoryLow=15 MemoryLow=25
*
* dml-passthrough-empty.service dml-passthrough-set-dml.service dml-passthrough-set-ml.service
*
*
* 2. dml-override.slice sets DefaultMemoryLow=10. As such, dml-override-empty.service should also
* end up with a memory.low of 10. dml-override.slice should still have a memory.low of 50.
*
*
* dml.slice
*
* DefaultMemoryLow=10
*
* dml-override.slice
*
* no new settings
*
* dml-override-empty.service
*
*
* 3. dml-discard.slice sets DefaultMemoryLow= with no rvalue. As such,
* dml-discard-empty.service should end up with a value of 0.
* dml-discard-explicit-ml.service sets MemoryLow=70, and as such should have that override the
* reset DefaultMemoryLow value. dml-discard.slice should still have an eventual memory.low of 50.
*
*
* dml.slice
*
* DefaultMemoryLow=
*
* dml-discard.slice
*
*
* no new settings MemoryLow=15
*
* dml-discard-empty.service dml-discard-set-ml.service
*
*/
assert_se(manager_load_startable_unit_or_warn(m, "dml.slice", NULL, &dml) >= 0);
assert_se(manager_load_startable_unit_or_warn(m, "dml-passthrough.slice", NULL, &dml_passthrough) >= 0);
assert_se(UNIT_DEREF(dml_passthrough->slice) == dml);
assert_se(manager_load_startable_unit_or_warn(m, "dml-passthrough-empty.service", NULL, &dml_passthrough_empty) >= 0);
assert_se(UNIT_DEREF(dml_passthrough_empty->slice) == dml_passthrough);
assert_se(manager_load_startable_unit_or_warn(m, "dml-passthrough-set-dml.service", NULL, &dml_passthrough_set_dml) >= 0);
assert_se(UNIT_DEREF(dml_passthrough_set_dml->slice) == dml_passthrough);
assert_se(manager_load_startable_unit_or_warn(m, "dml-passthrough-set-ml.service", NULL, &dml_passthrough_set_ml) >= 0);
assert_se(UNIT_DEREF(dml_passthrough_set_ml->slice) == dml_passthrough);
assert_se(manager_load_startable_unit_or_warn(m, "dml-override.slice", NULL, &dml_override) >= 0);
assert_se(UNIT_DEREF(dml_override->slice) == dml);
assert_se(manager_load_startable_unit_or_warn(m, "dml-override-empty.service", NULL, &dml_override_empty) >= 0);
assert_se(UNIT_DEREF(dml_override_empty->slice) == dml_override);
assert_se(manager_load_startable_unit_or_warn(m, "dml-discard.slice", NULL, &dml_discard) >= 0);
assert_se(UNIT_DEREF(dml_discard->slice) == dml);
assert_se(manager_load_startable_unit_or_warn(m, "dml-discard-empty.service", NULL, &dml_discard_empty) >= 0);
assert_se(UNIT_DEREF(dml_discard_empty->slice) == dml_discard);
assert_se(manager_load_startable_unit_or_warn(m, "dml-discard-set-ml.service", NULL, &dml_discard_set_ml) >= 0);
assert_se(UNIT_DEREF(dml_discard_set_ml->slice) == dml_discard);
root = UNIT_DEREF(dml->slice);
assert_se(!UNIT_ISSET(root->slice));
assert_se(unit_get_ancestor_memory_low(root) == CGROUP_LIMIT_MIN);
assert_se(unit_get_ancestor_memory_low(dml) == CGROUP_LIMIT_MIN);
dml_tree_default = unit_get_cgroup_context(dml)->default_memory_low;
assert_se(dml_tree_default == 50);
assert_se(unit_get_ancestor_memory_low(dml_passthrough) == 100);
assert_se(unit_get_ancestor_memory_low(dml_passthrough_empty) == dml_tree_default);
assert_se(unit_get_ancestor_memory_low(dml_passthrough_set_dml) == 50);
assert_se(unit_get_ancestor_memory_low(dml_passthrough_set_ml) == 25);
assert_se(unit_get_ancestor_memory_low(dml_override) == dml_tree_default);
assert_se(unit_get_ancestor_memory_low(dml_override_empty) == 10);
assert_se(unit_get_ancestor_memory_low(dml_discard) == dml_tree_default);
assert_se(unit_get_ancestor_memory_low(dml_discard_empty) == CGROUP_LIMIT_MIN);
assert_se(unit_get_ancestor_memory_low(dml_discard_set_ml) == 15);
return 0;
}
int main(int argc, char* argv[]) {
int rc = EXIT_SUCCESS;
test_setup_logging(LOG_DEBUG);
TEST_REQ_RUNNING_SYSTEMD(rc = test_default_memory_low());
return rc;
}

View File

@ -0,0 +1,7 @@
[Unit]
Description=DML discard empty service
[Service]
Slice=dml-discard.slice
Type=oneshot
ExecStart=/bin/true

View File

@ -0,0 +1,8 @@
[Unit]
Description=DML discard set ml service
[Service]
Slice=dml-discard.slice
Type=oneshot
ExecStart=/bin/true
MemoryLow=15

5
test/dml-discard.slice Normal file
View File

@ -0,0 +1,5 @@
[Unit]
Description=DML discard slice
[Slice]
DefaultMemoryLow=

View File

@ -0,0 +1,7 @@
[Unit]
Description=DML override empty service
[Service]
Slice=dml-override.slice
Type=oneshot
ExecStart=/bin/true

5
test/dml-override.slice Normal file
View File

@ -0,0 +1,5 @@
[Unit]
Description=DML override slice
[Slice]
DefaultMemoryLow=10

View File

@ -0,0 +1,7 @@
[Unit]
Description=DML passthrough empty service
[Service]
Slice=dml-passthrough.slice
Type=oneshot
ExecStart=/bin/true

View File

@ -0,0 +1,8 @@
[Unit]
Description=DML passthrough set DML service
[Service]
Slice=dml-passthrough.slice
Type=oneshot
ExecStart=/bin/true
DefaultMemoryLow=15

View File

@ -0,0 +1,8 @@
[Unit]
Description=DML passthrough set ML service
[Service]
Slice=dml-passthrough.slice
Type=oneshot
ExecStart=/bin/true
MemoryLow=25

View File

@ -0,0 +1,5 @@
[Unit]
Description=DML passthrough slice
[Slice]
MemoryLow=100

5
test/dml.slice Normal file
View File

@ -0,0 +1,5 @@
[Unit]
Description=DML slice
[Slice]
DefaultMemoryLow=50

View File

@ -7,6 +7,16 @@ test_data_files = '''
c.service
d.service
daughter.service
dml.slice
dml-passthrough.slice
dml-passthrough-empty.service
dml-passthrough-set-dml.service
dml-passthrough-set-ml.service
dml-override.slice
dml-override-empty.service
dml-discard.slice
dml-discard-empty.service
dml-discard-set-ml.service
e.service
end.service
f.service