core: add CPUQuotaPeriodSec=

This new setting allows configuration of CFS period on the CPU cgroup, instead
of using a hardcoded default of 100ms.

Tested:
- Legacy cgroup + Unified cgroup
- systemctl set-property
- systemctl show
- Confirmed that the cgroup settings (such as cpu.cfs_period_ns) were set
  appropriately, including updating the CPU quota (cpu.cfs_quota_ns) when
  CPUQuotaPeriodSec= is updated.
- Checked that clamping works properly when either period or (quota * period)
  are below the resolution of 1ms, or if period is above the max of 1s.
This commit is contained in:
Filipe Brandenburger 2018-11-02 09:21:57 -07:00
parent 7b61ce3c44
commit 10f2864111
10 changed files with 154 additions and 9 deletions

View file

@ -224,6 +224,7 @@ All cgroup/resource control settings are available for transient units
✓ CPUShares=
✓ StartupCPUShares=
✓ CPUQuota=
✓ CPUQuotaPeriodSec=
✓ MemoryAccounting=
✓ MemoryMin=
✓ MemoryLow=

View file

@ -198,6 +198,25 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>CPUQuotaPeriodSec=</varname></term>
<listitem>
<para>Assign the duration over which the CPU time quota specified by <varname>CPUQuota=</varname> is measured.
Takes a time duration value in seconds, with an optional suffix such as "ms" for milliseconds (or "s" for seconds.)
The default setting is 100ms. The period is clamped to the range supported by the kernel, which is [1ms, 1000ms].
Additionally, the period is adjusted up so that the quota interval is also at least 1ms.
Setting <varname>CPUQuotaPeriodSec=</varname> to an empty value resets it to the default.</para>
<para>This controls the second field of <literal>cpu.max</literal> attribute on the unified control group hierarchy
and <literal>cpu.cfs_period_us</literal> on legacy. For details about these control group attributes, see
<ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and
<ulink url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para>
<para>Example: <varname>CPUQuotaPeriodSec=10ms</varname> to request that the CPU quota is measured in periods of 10ms.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>MemoryAccounting=</varname></term>

View file

@ -25,7 +25,7 @@
#include "string-util.h"
#include "virt.h"
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
#define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
/* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access
* problems we downgrade to LOG_DEBUG. This is supposed to be nice to container managers and kernels which want to mask
@ -98,6 +98,7 @@ void cgroup_context_init(CGroupContext *c) {
.cpu_weight = CGROUP_WEIGHT_INVALID,
.startup_cpu_weight = CGROUP_WEIGHT_INVALID,
.cpu_quota_per_sec_usec = USEC_INFINITY,
.cpu_quota_period_usec = USEC_INFINITY,
.cpu_shares = CGROUP_CPU_SHARES_INVALID,
.startup_cpu_shares = CGROUP_CPU_SHARES_INVALID,
@ -206,6 +207,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupDeviceAllow *a;
IPAddressAccessItem *iaai;
char u[FORMAT_TIMESPAN_MAX];
char v[FORMAT_TIMESPAN_MAX];
assert(c);
assert(f);
@ -224,6 +226,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sCPUShares=%" PRIu64 "\n"
"%sStartupCPUShares=%" PRIu64 "\n"
"%sCPUQuotaPerSecSec=%s\n"
"%sCPUQuotaPeriodSec=%s\n"
"%sIOWeight=%" PRIu64 "\n"
"%sStartupIOWeight=%" PRIu64 "\n"
"%sBlockIOWeight=%" PRIu64 "\n"
@ -248,6 +251,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->cpu_shares,
prefix, c->startup_cpu_shares,
prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
prefix, format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1),
prefix, c->io_weight,
prefix, c->startup_io_weight,
prefix, c->blockio_weight,
@ -656,6 +660,39 @@ static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state)
return CGROUP_CPU_SHARES_DEFAULT;
}
usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period) {
/* kernel uses a minimum resolution of 1ms, so both period and (quota * period)
* need to be higher than that boundary. quota is specified in USecPerSec.
* Additionally, period must be at most max_period. */
assert(quota > 0);
return MIN(MAX3(period, resolution, resolution * USEC_PER_SEC / quota), max_period);
}
static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t quota) {
usec_t new_period;
if (quota == USEC_INFINITY)
/* Always use default period for infinity quota. */
return CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
if (period == USEC_INFINITY)
/* Default period was requested. */
period = CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
/* Clamp to interval [1ms, 1s] */
new_period = cgroup_cpu_adjust_period(period, quota, USEC_PER_MSEC, USEC_PER_SEC);
if (new_period != period) {
char v[FORMAT_TIMESPAN_MAX];
log_unit_full(u, LOG_WARNING, 0,
"Clamping CPU interval for cpu.max: period is now %s",
format_timespan(v, sizeof(v), new_period, 1));
}
return new_period;
}
static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) {
char buf[DECIMAL_STR_MAX(uint64_t) + 2];
@ -663,14 +700,15 @@ static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) {
(void) set_attribute_and_warn(u, "cpu", "cpu.weight", buf);
}
static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota) {
static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota, usec_t period) {
char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1];
period = cgroup_cpu_adjust_period_and_log(u, period, quota);
if (quota != USEC_INFINITY)
xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC), period);
else
xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
xsprintf(buf, "max " USEC_FMT "\n", period);
(void) set_attribute_and_warn(u, "cpu", "cpu.max", buf);
}
@ -681,14 +719,16 @@ static void cgroup_apply_legacy_cpu_shares(Unit *u, uint64_t shares) {
(void) set_attribute_and_warn(u, "cpu", "cpu.shares", buf);
}
static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota) {
static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota, usec_t period) {
char buf[DECIMAL_STR_MAX(usec_t) + 2];
xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
period = cgroup_cpu_adjust_period_and_log(u, period, quota);
xsprintf(buf, USEC_FMT "\n", period);
(void) set_attribute_and_warn(u, "cpu", "cpu.cfs_period_us", buf);
if (quota != USEC_INFINITY) {
xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
xsprintf(buf, USEC_FMT "\n", MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC));
(void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", buf);
} else
(void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", "-1\n");
@ -911,7 +951,7 @@ static void cgroup_context_apply(
weight = CGROUP_WEIGHT_DEFAULT;
cgroup_apply_unified_cpu_weight(u, weight);
cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec);
cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
} else {
uint64_t shares;
@ -930,7 +970,7 @@ static void cgroup_context_apply(
shares = CGROUP_CPU_SHARES_DEFAULT;
cgroup_apply_legacy_cpu_shares(u, shares);
cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec);
cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
}
}

View file

@ -83,6 +83,7 @@ struct CGroupContext {
uint64_t cpu_weight;
uint64_t startup_cpu_weight;
usec_t cpu_quota_per_sec_usec;
usec_t cpu_quota_period_usec;
uint64_t io_weight;
uint64_t startup_io_weight;
@ -135,6 +136,8 @@ typedef enum CGroupIPAccountingMetric {
typedef struct Unit Unit;
typedef struct Manager Manager;
usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period);
void cgroup_context_init(CGroupContext *c);
void cgroup_context_done(CGroupContext *c);
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix);

View file

@ -330,6 +330,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
SD_BUS_PROPERTY("CPUQuotaPeriodUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_period_usec), 0),
SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0),
SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0),
SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0),
@ -727,6 +728,28 @@ int bus_cgroup_set_property(
return 1;
} else if (streq(name, "CPUQuotaPeriodUSec")) {
uint64_t u64;
r = sd_bus_message_read(message, "t", &u64);
if (r < 0)
return r;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
c->cpu_quota_period_usec = u64;
unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
if (c->cpu_quota_period_usec == USEC_INFINITY)
unit_write_setting(u, flags, "CPUQuotaPeriodSec", "CPUQuotaPeriodSec=");
else {
char v[FORMAT_TIMESPAN_MAX];
unit_write_settingf(u, flags, "CPUQuotaPeriodSec",
"CPUQuotaPeriodSec=%s",
format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1));
}
}
return 1;
} else if ((iol_type = cgroup_io_limit_type_from_string(name)) >= 0) {
const char *path;
unsigned n = 0;

View file

@ -165,6 +165,7 @@ $1.StartupCPUWeight, config_parse_cg_weight, 0,
$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares)
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
$1.CPUQuotaPeriodSec, config_parse_sec_def_infinity, 0, offsetof($1, cgroup_context.cpu_quota_period_usec)
$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
$1.MemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)

View file

@ -54,6 +54,7 @@
#include "unit-name.h"
#include "unit-printf.h"
#include "user-util.h"
#include "time-util.h"
#include "web-util.h"
static int parse_socket_protocol(const char *s) {

View file

@ -464,6 +464,20 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
return 1;
}
if (streq(field, "CPUQuotaPeriodSec")) {
usec_t u = USEC_INFINITY;
r = parse_sec_def_infinity(eq, &u);
if (r < 0)
return log_error_errno(r, "CPU quota period '%s' invalid.", eq);
r = sd_bus_message_append(m, "(sv)", "CPUQuotaPeriodUSec", "t", u);
if (r < 0)
return bus_log_create_error(r);
return 1;
}
if (streq(field, "DeviceAllow")) {
if (isempty(eq))

View file

@ -560,6 +560,11 @@ tests += [
[],
'', 'manual'],
[['src/test/test-cgroup-cpu.c'],
[libcore,
libshared],
[]],
[['src/test/test-cgroup-mask.c',
'src/test/test-helper.c'],
[libcore,

View file

@ -0,0 +1,38 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include "cgroup.h"
#include "log.h"
static void test_cgroup_cpu_adjust_period(void) {
log_info("/* %s */", __func__);
/* Period 1ms, quota 40% -> Period 2.5ms */
assert_se(2500 == cgroup_cpu_adjust_period(USEC_PER_MSEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 10ms, quota 10% -> keep. */
assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 100 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 1ms, quota 1000% -> keep. */
assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(USEC_PER_MSEC, 10000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 100ms, quota 30% -> keep. */
assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(100 * USEC_PER_MSEC, 300 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 5s, quota 40% -> adjust to 1s. */
assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(5 * USEC_PER_SEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 2s, quota 250% -> adjust to 1s. */
assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(2 * USEC_PER_SEC, 2500 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 10us, quota 5,000,000% -> adjust to 1ms. */
assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(10, 50000000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 10ms, quota 50,000% -> keep. */
assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 500000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 10ms, quota 1% -> adjust to 100ms. */
assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 10ms, quota .001% -> adjust to 1s. */
assert_se(1 * USEC_PER_SEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 0ms, quota 200% -> adjust to 1ms. */
assert_se(1 * USEC_PER_MSEC == cgroup_cpu_adjust_period(0, 2 * USEC_PER_SEC, USEC_PER_MSEC, USEC_PER_SEC));
/* Period 0ms, quota 40% -> adjust to 2.5ms. */
assert_se(2500 == cgroup_cpu_adjust_period(0, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
}
int main(int argc, char *argv[]) {
test_cgroup_cpu_adjust_period();
return 0;
}