diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md index 0ac77f0497..ac89fb174f 100644 --- a/docs/TRANSIENT-SETTINGS.md +++ b/docs/TRANSIENT-SETTINGS.md @@ -224,6 +224,7 @@ All cgroup/resource control settings are available for transient units ✓ CPUShares= ✓ StartupCPUShares= ✓ CPUQuota= +✓ CPUQuotaPeriodSec= ✓ MemoryAccounting= ✓ MemoryMin= ✓ MemoryLow= diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index a4d793c32e..744bfa938f 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -198,6 +198,25 @@ + + CPUQuotaPeriodSec= + + + Assign the duration over which the CPU time quota specified by CPUQuota= is measured. + Takes a time duration value in seconds, with an optional suffix such as "ms" for milliseconds (or "s" for seconds.) + The default setting is 100ms. The period is clamped to the range supported by the kernel, which is [1ms, 1000ms]. + Additionally, the period is adjusted up so that the quota interval is also at least 1ms. + Setting CPUQuotaPeriodSec= to an empty value resets it to the default. + + This controls the second field of cpu.max attribute on the unified control group hierarchy + and cpu.cfs_period_us on legacy. For details about these control group attributes, see + cgroup-v2.txt and + sched-design-CFS.txt. + + Example: CPUQuotaPeriodSec=10ms to request that the CPU quota is measured in periods of 10ms. + + + MemoryAccounting= diff --git a/src/basic/time-util.c b/src/basic/time-util.c index 62cdc305f9..25a5c116e8 100644 --- a/src/basic/time-util.c +++ b/src/basic/time-util.c @@ -1031,6 +1031,15 @@ int parse_sec_fix_0(const char *t, usec_t *ret) { return r; } +int parse_sec_def_infinity(const char *t, usec_t *ret) { + t += strspn(t, WHITESPACE); + if (isempty(t)) { + *ret = USEC_INFINITY; + return 0; + } + return parse_sec(t, ret); +} + static const char* extract_nsec_multiplier(const char *p, nsec_t *multiplier) { static const struct { const char *suffix; diff --git a/src/basic/time-util.h b/src/basic/time-util.h index 5316305062..a238f6914d 100644 --- a/src/basic/time-util.h +++ b/src/basic/time-util.h @@ -112,6 +112,7 @@ int parse_timestamp(const char *t, usec_t *usec); int parse_sec(const char *t, usec_t *usec); int parse_sec_fix_0(const char *t, usec_t *usec); +int parse_sec_def_infinity(const char *t, usec_t *usec); int parse_time(const char *t, usec_t *usec, usec_t default_unit); int parse_nsec(const char *t, nsec_t *nsec); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 18d470b6d6..ccf06173db 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -25,7 +25,7 @@ #include "string-util.h" #include "virt.h" -#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) +#define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) /* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access * problems we downgrade to LOG_DEBUG. This is supposed to be nice to container managers and kernels which want to mask @@ -98,6 +98,7 @@ void cgroup_context_init(CGroupContext *c) { .cpu_weight = CGROUP_WEIGHT_INVALID, .startup_cpu_weight = CGROUP_WEIGHT_INVALID, .cpu_quota_per_sec_usec = USEC_INFINITY, + .cpu_quota_period_usec = USEC_INFINITY, .cpu_shares = CGROUP_CPU_SHARES_INVALID, .startup_cpu_shares = CGROUP_CPU_SHARES_INVALID, @@ -206,6 +207,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { CGroupDeviceAllow *a; IPAddressAccessItem *iaai; char u[FORMAT_TIMESPAN_MAX]; + char v[FORMAT_TIMESPAN_MAX]; assert(c); assert(f); @@ -224,6 +226,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sCPUShares=%" PRIu64 "\n" "%sStartupCPUShares=%" PRIu64 "\n" "%sCPUQuotaPerSecSec=%s\n" + "%sCPUQuotaPeriodSec=%s\n" "%sIOWeight=%" PRIu64 "\n" "%sStartupIOWeight=%" PRIu64 "\n" "%sBlockIOWeight=%" PRIu64 "\n" @@ -248,6 +251,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, c->cpu_shares, prefix, c->startup_cpu_shares, prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1), + prefix, format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1), prefix, c->io_weight, prefix, c->startup_io_weight, prefix, c->blockio_weight, @@ -656,6 +660,40 @@ static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) return CGROUP_CPU_SHARES_DEFAULT; } +usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period) { + /* kernel uses a minimum resolution of 1ms, so both period and (quota * period) + * need to be higher than that boundary. quota is specified in USecPerSec. + * Additionally, period must be at most max_period. */ + assert(quota > 0); + + return MIN(MAX3(period, resolution, resolution * USEC_PER_SEC / quota), max_period); +} + +static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t quota) { + usec_t new_period; + + if (quota == USEC_INFINITY) + /* Always use default period for infinity quota. */ + return CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC; + + if (period == USEC_INFINITY) + /* Default period was requested. */ + period = CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC; + + /* Clamp to interval [1ms, 1s] */ + new_period = cgroup_cpu_adjust_period(period, quota, USEC_PER_MSEC, USEC_PER_SEC); + + if (new_period != period) { + char v[FORMAT_TIMESPAN_MAX]; + log_unit_full(u, u->warned_clamping_cpu_quota_period ? LOG_DEBUG : LOG_WARNING, 0, + "Clamping CPU interval for cpu.max: period is now %s", + format_timespan(v, sizeof(v), new_period, 1)); + u->warned_clamping_cpu_quota_period = true; + } + + return new_period; +} + static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) { char buf[DECIMAL_STR_MAX(uint64_t) + 2]; @@ -663,14 +701,15 @@ static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) { (void) set_attribute_and_warn(u, "cpu", "cpu.weight", buf); } -static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota) { +static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota, usec_t period) { char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1]; + period = cgroup_cpu_adjust_period_and_log(u, period, quota); if (quota != USEC_INFINITY) xsprintf(buf, USEC_FMT " " USEC_FMT "\n", - quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC); + MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC), period); else - xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + xsprintf(buf, "max " USEC_FMT "\n", period); (void) set_attribute_and_warn(u, "cpu", "cpu.max", buf); } @@ -681,14 +720,16 @@ static void cgroup_apply_legacy_cpu_shares(Unit *u, uint64_t shares) { (void) set_attribute_and_warn(u, "cpu", "cpu.shares", buf); } -static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota) { +static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota, usec_t period) { char buf[DECIMAL_STR_MAX(usec_t) + 2]; - xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + period = cgroup_cpu_adjust_period_and_log(u, period, quota); + + xsprintf(buf, USEC_FMT "\n", period); (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_period_us", buf); if (quota != USEC_INFINITY) { - xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); + xsprintf(buf, USEC_FMT "\n", MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC)); (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", buf); } else (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", "-1\n"); @@ -911,7 +952,7 @@ static void cgroup_context_apply( weight = CGROUP_WEIGHT_DEFAULT; cgroup_apply_unified_cpu_weight(u, weight); - cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec); + cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec); } else { uint64_t shares; @@ -930,7 +971,7 @@ static void cgroup_context_apply( shares = CGROUP_CPU_SHARES_DEFAULT; cgroup_apply_legacy_cpu_shares(u, shares); - cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec); + cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec); } } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 266daa20a5..42da777c29 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -83,6 +83,7 @@ struct CGroupContext { uint64_t cpu_weight; uint64_t startup_cpu_weight; usec_t cpu_quota_per_sec_usec; + usec_t cpu_quota_period_usec; uint64_t io_weight; uint64_t startup_io_weight; @@ -135,6 +136,8 @@ typedef enum CGroupIPAccountingMetric { typedef struct Unit Unit; typedef struct Manager Manager; +usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period); + void cgroup_context_init(CGroupContext *c); void cgroup_context_done(CGroupContext *c); void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 53890bcafb..7ab53a1c2a 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -330,6 +330,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0), SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0), SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0), + SD_BUS_PROPERTY("CPUQuotaPeriodUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_period_usec), 0), SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0), SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0), SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0), @@ -713,6 +714,7 @@ int bus_cgroup_set_property( if (!UNIT_WRITE_FLAGS_NOOP(flags)) { c->cpu_quota_per_sec_usec = u64; + u->warned_clamping_cpu_quota_period = false; unit_invalidate_cgroup(u, CGROUP_MASK_CPU); if (c->cpu_quota_per_sec_usec == USEC_INFINITY) @@ -727,6 +729,29 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "CPUQuotaPeriodUSec")) { + uint64_t u64; + + r = sd_bus_message_read(message, "t", &u64); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + c->cpu_quota_period_usec = u64; + u->warned_clamping_cpu_quota_period = false; + unit_invalidate_cgroup(u, CGROUP_MASK_CPU); + if (c->cpu_quota_period_usec == USEC_INFINITY) + unit_write_setting(u, flags, "CPUQuotaPeriodSec", "CPUQuotaPeriodSec="); + else { + char v[FORMAT_TIMESPAN_MAX]; + unit_write_settingf(u, flags, "CPUQuotaPeriodSec", + "CPUQuotaPeriodSec=%s", + format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1)); + } + } + + return 1; + } else if ((iol_type = cgroup_io_limit_type_from_string(name)) >= 0) { const char *path; unsigned n = 0; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index cdbc67f885..ca3923bbb0 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -165,6 +165,7 @@ $1.StartupCPUWeight, config_parse_cg_weight, 0, $1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares) $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) +$1.CPUQuotaPeriodSec, config_parse_sec_def_infinity, 0, offsetof($1, cgroup_context.cpu_quota_period_usec) $1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting) $1.MemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 44f00a3046..1d4a1bd343 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -54,6 +54,7 @@ #include "unit-name.h" #include "unit-printf.h" #include "user-util.h" +#include "time-util.h" #include "web-util.h" static int parse_socket_protocol(const char *s) { diff --git a/src/core/unit.h b/src/core/unit.h index 43cf15715a..4d6e6cf467 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -349,6 +349,9 @@ typedef struct Unit { bool exported_log_rate_limit_interval:1; bool exported_log_rate_limit_burst:1; + /* Whether we warned about clamping the CPU quota period */ + bool warned_clamping_cpu_quota_period:1; + /* When writing transient unit files, stores which section we stored last. If < 0, we didn't write any yet. If * == 0 we are in the [Unit] section, if > 0 we are in the unit type-specific section. */ signed int last_section_private:2; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 9a8051d063..a74b213155 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -464,6 +464,20 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons return 1; } + if (streq(field, "CPUQuotaPeriodSec")) { + usec_t u = USEC_INFINITY; + + r = parse_sec_def_infinity(eq, &u); + if (r < 0) + return log_error_errno(r, "CPU quota period '%s' invalid.", eq); + + r = sd_bus_message_append(m, "(sv)", "CPUQuotaPeriodUSec", "t", u); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + if (streq(field, "DeviceAllow")) { if (isempty(eq)) diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c index b80c147807..aa8283313f 100644 --- a/src/shared/conf-parser.c +++ b/src/shared/conf-parser.c @@ -506,6 +506,7 @@ DEFINE_PARSER(unsigned, unsigned, safe_atou); DEFINE_PARSER(double, double, safe_atod); DEFINE_PARSER(nsec, nsec_t, parse_nsec); DEFINE_PARSER(sec, usec_t, parse_sec); +DEFINE_PARSER(sec_def_infinity, usec_t, parse_sec_def_infinity); DEFINE_PARSER(mode, mode_t, parse_mode); int config_parse_iec_size(const char* unit, diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h index 865db4278b..17b4bdf1a2 100644 --- a/src/shared/conf-parser.h +++ b/src/shared/conf-parser.h @@ -127,6 +127,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_string); CONFIG_PARSER_PROTOTYPE(config_parse_path); CONFIG_PARSER_PROTOTYPE(config_parse_strv); CONFIG_PARSER_PROTOTYPE(config_parse_sec); +CONFIG_PARSER_PROTOTYPE(config_parse_sec_def_infinity); CONFIG_PARSER_PROTOTYPE(config_parse_nsec); CONFIG_PARSER_PROTOTYPE(config_parse_mode); CONFIG_PARSER_PROTOTYPE(config_parse_warn_compat); diff --git a/src/test/meson.build b/src/test/meson.build index 08026ea8c4..7537bdae34 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -560,6 +560,11 @@ tests += [ [], '', 'manual'], + [['src/test/test-cgroup-cpu.c'], + [libcore, + libshared], + []], + [['src/test/test-cgroup-mask.c', 'src/test/test-helper.c'], [libcore, diff --git a/src/test/test-cgroup-cpu.c b/src/test/test-cgroup-cpu.c new file mode 100644 index 0000000000..a445acc955 --- /dev/null +++ b/src/test/test-cgroup-cpu.c @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "cgroup.h" +#include "log.h" + +static void test_cgroup_cpu_adjust_period(void) { + log_info("/* %s */", __func__); + + /* Period 1ms, quota 40% -> Period 2.5ms */ + assert_se(2500 == cgroup_cpu_adjust_period(USEC_PER_MSEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10ms, quota 10% -> keep. */ + assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 100 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 1ms, quota 1000% -> keep. */ + assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(USEC_PER_MSEC, 10000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 100ms, quota 30% -> keep. */ + assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(100 * USEC_PER_MSEC, 300 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 5s, quota 40% -> adjust to 1s. */ + assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(5 * USEC_PER_SEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 2s, quota 250% -> adjust to 1s. */ + assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(2 * USEC_PER_SEC, 2500 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10us, quota 5,000,000% -> adjust to 1ms. */ + assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(10, 50000000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10ms, quota 50,000% -> keep. */ + assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 500000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10ms, quota 1% -> adjust to 100ms. */ + assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 10ms, quota .001% -> adjust to 1s. */ + assert_se(1 * USEC_PER_SEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 0ms, quota 200% -> adjust to 1ms. */ + assert_se(1 * USEC_PER_MSEC == cgroup_cpu_adjust_period(0, 2 * USEC_PER_SEC, USEC_PER_MSEC, USEC_PER_SEC)); + /* Period 0ms, quota 40% -> adjust to 2.5ms. */ + assert_se(2500 == cgroup_cpu_adjust_period(0, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC)); +} + +int main(int argc, char *argv[]) { + test_cgroup_cpu_adjust_period(); + return 0; +} diff --git a/src/test/test-time-util.c b/src/test/test-time-util.c index eb6041c152..633544ff2e 100644 --- a/src/test/test-time-util.c +++ b/src/test/test-time-util.c @@ -85,6 +85,26 @@ static void test_parse_sec_fix_0(void) { assert_se(u == USEC_INFINITY); } +static void test_parse_sec_def_infinity(void) { + usec_t u; + + log_info("/* %s */", __func__); + + assert_se(parse_sec_def_infinity("5s", &u) >= 0); + assert_se(u == 5 * USEC_PER_SEC); + assert_se(parse_sec_def_infinity("", &u) >= 0); + assert_se(u == USEC_INFINITY); + assert_se(parse_sec_def_infinity(" ", &u) >= 0); + assert_se(u == USEC_INFINITY); + assert_se(parse_sec_def_infinity("0s", &u) >= 0); + assert_se(u == 0); + assert_se(parse_sec_def_infinity("0", &u) >= 0); + assert_se(u == 0); + assert_se(parse_sec_def_infinity(" 0", &u) >= 0); + assert_se(u == 0); + assert_se(parse_sec_def_infinity("-5s", &u) < 0); +} + static void test_parse_time(void) { usec_t u; @@ -472,6 +492,7 @@ int main(int argc, char *argv[]) { test_parse_sec(); test_parse_sec_fix_0(); + test_parse_sec_def_infinity(); test_parse_time(); test_parse_nsec(); test_format_timespan(1);