core: expose CFS CPU time quota as high-level unit properties

This commit is contained in:
Lennart Poettering 2014-04-25 13:27:25 +02:00
parent 3051f1871e
commit b2f8b02ec2
14 changed files with 340 additions and 7 deletions

View file

@ -301,6 +301,18 @@
above.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>DefaultCPUQuotaPeriodSec=</varname></term>
<listitem><para>Sets the default CPU
quota period. Defaults to 100ms. This
controls th global default for the
<varname>CPUQuotaPeriodSec=</varname>
setting of units, see
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>DefaultTimeoutStartSec=</varname></term>
<term><varname>DefaultTimeoutStopSec=</varname></term>

View file

@ -117,11 +117,53 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>.
<term><varname>CPUShares=<replaceable>weight</replaceable></varname></term>
<listitem>
<para>Assign the specified overall CPU time share weight to
the processes executed. Takes an integer value. This
controls the <literal>cpu.shares</literal> control group
attribute, which defaults to 1024. For details about this
control group attribute, see <ulink
<para>Assign the specified CPU time share weight to the
processes executed. Takes an integer value. This controls
the <literal>cpu.shares</literal> control group attribute,
which defaults to 1024. For details about this control group
attribute, see <ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>
The available CPU time is split up among all units within a
slice relative to their CPU time share weight.</para>
<para>Implies <literal>CPUAccounting=true</literal>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>CPUQuota=</varname></term>
<listitem>
<para>Assign the specified CPU time quota to the processes
executed. Takes a percentage value (suffixed with "%") or an
absolute time (suffixed by one of the common time units, us,
ms, s, ...). The percentage specifies how much CPU time the
unit shall get at maximum, relative to the total CPU time
available on one CPU. Use values > 100% for alloting CPU
time on more than one CPU. If an absolute time is specified
the processes of this unit will get this much absolute time
within each quota period, at maximum. This controls the
<literal>cpu.cfs_quota_us</literal> control group
attribute. For details about this control group attribute,
see <ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para>
<para>Example: <varname>CPUShares=20%</varname> ensures that
the executed processes will never get more than 20% CPU time
on one CPU.</para>
<para>Implies <literal>CPUAccounting=true</literal>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>CPUQuotaPeriodSec=</varname></term>
<listitem>
<para>Specify the CPU quota period to use. Defaults to
100ms. This controls the <literal>cpu.cfs_period_us</literal>
control group attribute. For details about this control
group attribute, see <ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para>
<para>Implies <literal>CPUAccounting=true</literal>.</para>

View file

@ -36,6 +36,10 @@ void cgroup_context_init(CGroupContext *c) {
c->cpu_shares = 1024;
c->memory_limit = (uint64_t) -1;
c->blockio_weight = 1000;
c->cpu_quota_per_sec_usec = (usec_t) -1;
c->cpu_quota_usec = (usec_t) -1;
c->cpu_quota_period_usec = 100*USEC_PER_MSEC;
}
void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
@ -78,10 +82,37 @@ void cgroup_context_done(CGroupContext *c) {
cgroup_context_free_device_allow(c, c->device_allow);
}
usec_t cgroup_context_get_cpu_quota_usec(CGroupContext *c) {
assert(c);
/* Returns the absolute CPU quota */
if (c->cpu_quota_usec != (usec_t) -1)
return c->cpu_quota_usec;
else if (c->cpu_quota_per_sec_usec != (usec_t) -1)
return c->cpu_quota_per_sec_usec*c->cpu_quota_period_usec/USEC_PER_SEC;
else
return (usec_t) -1;
}
usec_t cgroup_context_get_cpu_quota_per_sec_usec(CGroupContext *c) {
assert(c);
/* Returns the CPU quota relative to 1s */
if (c->cpu_quota_usec != (usec_t) -1)
return c->cpu_quota_usec*USEC_PER_SEC/c->cpu_quota_period_usec;
else if (c->cpu_quota_per_sec_usec != (usec_t) -1)
return c->cpu_quota_per_sec_usec;
else
return (usec_t) -1;
}
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupBlockIODeviceBandwidth *b;
CGroupBlockIODeviceWeight *w;
CGroupDeviceAllow *a;
char t[FORMAT_TIMESPAN_MAX], s[FORMAT_TIMESPAN_MAX], u[FORMAT_TIMESPAN_MAX];
assert(c);
assert(f);
@ -93,6 +124,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sBlockIOAccounting=%s\n"
"%sMemoryAccounting=%s\n"
"%sCPUShares=%lu\n"
"%sCPUQuota=%s\n"
"%sCPUQuotaPerSecSec=%s\n"
"%sCPUQuotaPeriodSec=%s\n"
"%sBlockIOWeight=%lu\n"
"%sMemoryLimit=%" PRIu64 "\n"
"%sDevicePolicy=%s\n",
@ -100,6 +134,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting),
prefix, c->cpu_shares,
prefix, strna(format_timespan(u, sizeof(u), cgroup_context_get_cpu_quota_usec(c), 1)),
prefix, strna(format_timespan(t, sizeof(t), cgroup_context_get_cpu_quota_per_sec_usec(c), 1)),
prefix, strna(format_timespan(s, sizeof(s), c->cpu_quota_period_usec, 1)),
prefix, c->blockio_weight,
prefix, c->memory_limit,
prefix, cgroup_device_policy_to_string(c->device_policy));
@ -284,12 +321,27 @@ void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const cha
is_root = isempty(path) || path_equal(path, "/");
if ((mask & CGROUP_CPU) && !is_root) {
char buf[DECIMAL_STR_MAX(unsigned long) + 1];
char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1];
usec_t q;
sprintf(buf, "%lu\n", c->cpu_shares);
r = cg_set_attribute("cpu", path, "cpu.shares", buf);
if (r < 0)
log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
sprintf(buf, USEC_FMT "\n", c->cpu_quota_period_usec);
r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
if (r < 0)
log_warning("Failed to set cpu.cfs_period_us on %s: %s", path, strerror(-r));
q = cgroup_context_get_cpu_quota_usec(c);
if (q != (usec_t) -1) {
sprintf(buf, USEC_FMT "\n", q);
r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
} else
r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
if (r < 0)
log_warning("Failed to set cpu.cfs_quota_us on %s: %s", path, strerror(-r));
}
if (mask & CGROUP_BLKIO) {
@ -415,7 +467,10 @@ CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
/* Figure out which controllers we need */
if (c->cpu_accounting || c->cpu_shares != 1024)
if (c->cpu_accounting ||
c->cpu_shares != 1024 ||
c->cpu_quota_usec != (usec_t) -1 ||
c->cpu_quota_per_sec_usec != (usec_t) -1)
mask |= CGROUP_CPUACCT | CGROUP_CPU;
if (c->blockio_accounting ||

View file

@ -71,6 +71,9 @@ struct CGroupContext {
bool memory_accounting;
unsigned long cpu_shares;
usec_t cpu_quota_per_sec_usec;
usec_t cpu_quota_usec;
usec_t cpu_quota_period_usec;
unsigned long blockio_weight;
LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights);
@ -97,6 +100,9 @@ void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);
void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
usec_t cgroup_context_get_cpu_quota_usec(CGroupContext *c);
usec_t cgroup_context_get_cpu_quota_per_sec_usec(CGroupContext *c);
CGroupControllerMask unit_get_cgroup_mask(Unit *u);
CGroupControllerMask unit_get_siblings_mask(Unit *u);
CGroupControllerMask unit_get_members_mask(Unit *u);

View file

@ -133,10 +133,49 @@ static int property_get_device_allow(
return sd_bus_message_close_container(reply);
}
static int property_get_cpu_quota_usec(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
CGroupContext *c = userdata;
assert(bus);
assert(reply);
assert(c);
return sd_bus_message_append(reply, "t", cgroup_context_get_cpu_quota_usec(c));
}
static int property_get_cpu_quota_per_sec_usec(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
CGroupContext *c = userdata;
assert(bus);
assert(reply);
assert(c);
return sd_bus_message_append(reply, "t", cgroup_context_get_cpu_quota_per_sec_usec(c));
}
const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
SD_BUS_PROPERTY("CPUShares", "t", bus_property_get_ulong, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", property_get_cpu_quota_per_sec_usec, 0, 0),
SD_BUS_PROPERTY("CPUQuotaUSec", "t", property_get_cpu_quota_usec, 0, 0),
SD_BUS_PROPERTY("CPUQuotaPeriodUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_period_usec), 0),
SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0),
SD_BUS_PROPERTY("BlockIOWeight", "t", bus_property_get_ulong, offsetof(CGroupContext, blockio_weight), 0),
SD_BUS_PROPERTY("BlockIODeviceWeight", "a(st)", property_get_blockio_device_weight, 0, 0),
@ -199,6 +238,63 @@ int bus_cgroup_set_property(
return 1;
} else if (streq(name, "CPUQuotaPerSecUSec")) {
uint64_t u64;
r = sd_bus_message_read(message, "t", &u64);
if (r < 0)
return r;
if (u64 <= 0)
return sd_bus_error_set_errnof(error, EINVAL, "CPUQuotaPerSecUSec value out of range");
if (mode != UNIT_CHECK) {
c->cpu_quota_per_sec_usec = u64;
c->cpu_quota_usec = (uint64_t) -1;
u->cgroup_realized_mask &= ~CGROUP_CPU;
unit_write_drop_in_private_format(u, mode, "CPUQuota", "CPUQuota=%0.f%%", (double) (c->cpu_quota_per_sec_usec / 10000));
}
return 1;
} else if (streq(name, "CPUQuotaUSec")) {
uint64_t u64;
r = sd_bus_message_read(message, "t", &u64);
if (r < 0)
return r;
if (u64 <= 0)
return sd_bus_error_set_errnof(error, EINVAL, "CPUQuotaUSec value out of range");
if (mode != UNIT_CHECK) {
c->cpu_quota_usec = u64;
c->cpu_quota_per_sec_usec = (uint64_t) -1;
u->cgroup_realized_mask &= ~CGROUP_CPU;
unit_write_drop_in_private_format(u, mode, "CPUQuota", "CPUQuota=%" PRIu64 "us", u64);
}
return 1;
} else if (streq(name, "CPUQuotaPeriodUSec")) {
uint64_t u64;
r = sd_bus_message_read(message, "t", &u64);
if (r < 0)
return r;
if (u64 <= 0 || u64 >= (usec_t) -1)
return sd_bus_error_set_errnof(error, EINVAL, "CPUQuotaPeriodUSec value out of range");
if (mode != UNIT_CHECK) {
c->cpu_quota_period_usec = u64;
u->cgroup_realized_mask &= ~CGROUP_CPU;
unit_write_drop_in_private_format(u, mode, name, "CPUQuotaPeriodSec=%" PRIu64 "us", c->cpu_quota_period_usec);
}
return 1;
} else if (streq(name, "BlockIOAccounting")) {
int b;

View file

@ -106,6 +106,8 @@ m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
`$1.Slice, config_parse_unit_slice, 0, 0
$1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting)
$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
$1.CPUQuotaPeriodSec, config_parse_sec, 0, offsetof($1, cgroup_context.cpu_quota_period_usec)
$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context)

View file

@ -2455,6 +2455,54 @@ int config_parse_cpu_shares(
return 0;
}
int config_parse_cpu_quota(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
CGroupContext *c = data;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
if (isempty(rvalue)) {
c->cpu_quota_per_sec_usec = (usec_t) -1;
c->cpu_quota_usec = (usec_t) -1;
return 0;
}
if (endswith(rvalue, "%")) {
double percent;
if (sscanf(rvalue, "%lf%%", &percent) != 1 || percent <= 0) {
log_syntax(unit, LOG_ERR, filename, line, EINVAL, "CPU quota '%s' invalid. Ignoring.", rvalue);
return 0;
}
c->cpu_quota_per_sec_usec = (usec_t) (percent * USEC_PER_SEC / 100);
c->cpu_quota_usec = (usec_t) -1;
} else {
r = parse_sec(rvalue, &c->cpu_quota_usec);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, EINVAL, "CPU quota '%s' invalid. Ignoring.", rvalue);
return 0;
}
c->cpu_quota_per_sec_usec = (usec_t) -1;
}
return 0;
}
int config_parse_memory_limit(
const char *unit,
const char *filename,

View file

@ -96,6 +96,7 @@ int config_parse_runtime_directory(const char *unit, const char *filename, unsig
int config_parse_set_status(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_no_new_priviliges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);

View file

@ -109,6 +109,7 @@ static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
static uint64_t arg_capability_bounding_set_drop = 0;
static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
static usec_t arg_default_cpu_quota_period_usec = 100 * USEC_PER_MSEC;
static Set* arg_syscall_archs = NULL;
static FILE* arg_serialization = NULL;
static bool arg_default_cpu_accounting = false;
@ -683,6 +684,7 @@ static int parse_config_file(void) {
#endif
{ "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
{ "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
{ "Manager", "DefaultCPUQuotaPeriodSec", config_parse_sec, 0, &arg_default_cpu_quota_period_usec },
{ "Manager", "DefaultStandardOutput", config_parse_output, 0, &arg_default_std_output },
{ "Manager", "DefaultStandardError", config_parse_output, 0, &arg_default_std_error },
{ "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
@ -1633,6 +1635,7 @@ int main(int argc, char *argv[]) {
m->confirm_spawn = arg_confirm_spawn;
m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
m->default_cpu_quota_period_usec = arg_default_cpu_quota_period_usec;
m->default_std_output = arg_default_std_output;
m->default_std_error = arg_default_std_error;
m->default_restart_usec = arg_default_restart_usec;

View file

@ -429,6 +429,7 @@ int manager_new(SystemdRunningAs running_as, Manager **_m) {
m->running_as = running_as;
m->exit_code = _MANAGER_EXIT_CODE_INVALID;
m->default_timer_accuracy_usec = USEC_PER_MINUTE;
m->default_cpu_quota_period_usec = 100 * USEC_PER_MSEC;
m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;

View file

@ -240,6 +240,7 @@ struct Manager {
bool default_cpu_accounting;
bool default_memory_accounting;
bool default_blockio_accounting;
usec_t default_cpu_quota_period_usec;
usec_t default_timer_accuracy_usec;

View file

@ -24,6 +24,7 @@
#SystemCallArchitectures=
#TimerSlackNSec=
#DefaultTimerAccuracySec=1min
#DefaultCPUQuotaPeriodSec=100ms
#DefaultStandardOutput=journal
#DefaultStandardError=inherit
#DefaultTimeoutStartSec=90s

View file

@ -121,6 +121,7 @@ static void unit_init(Unit *u) {
cc->cpu_accounting = u->manager->default_cpu_accounting;
cc->blockio_accounting = u->manager->default_blockio_accounting;
cc->memory_accounting = u->manager->default_memory_accounting;
cc->cpu_quota_period_usec = u->manager->default_cpu_quota_period_usec;
}
ec = unit_get_exec_context(u);

View file

@ -1250,6 +1250,70 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
field = strndupa(assignment, eq - assignment);
eq ++;
if (streq(field, "CPUQuota")) {
if (isempty(eq)) {
r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, "CPUQuotaPerSecUSec");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_append(m, "v", "t", (usec_t) -1);
} else if (endswith(eq, "%")) {
double percent;
if (sscanf(eq, "%lf%%", &percent) != 1 || percent <= 0) {
log_error("CPU quota '%s' invalid.", eq);
return -EINVAL;
}
r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, "CPUQuotaPerSecUSec");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_append(m, "v", "t", (usec_t) percent * USEC_PER_SEC / 100);
} else {
usec_t us;
r = parse_sec(eq, &us);
if (r < 0) {
log_error("CPU quota '%s' invalid.", eq);
return -EINVAL;
}
r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, "CPUQuotaUSec");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_append(m, "v", "t", us);
}
if (r < 0)
return bus_log_create_error(r);
return 0;
} else if (streq(field, "CPUQuotaPeriodSec")) {
usec_t us;
r = parse_sec(eq, &us);
if (r < 0) {
log_error("CPU period '%s' invalid.", eq);
return -EINVAL;
}
r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, "CPUQuotaPeriodUSec");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_append(m, "v", "t", us);
if (r < 0)
return bus_log_create_error(r);
return 0;
}
r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
if (r < 0)
return bus_log_create_error(r);