core: add support for setting CPUAffinity= to special "numa" value

systemd will automatically derive CPU affinity mask from NUMA node
mask.

Fixes #13248
This commit is contained in:
Michal Sekletár 2020-02-17 13:50:31 +01:00
parent 1808f76870
commit e2b2fb7f56
9 changed files with 125 additions and 13 deletions

View File

@ -774,10 +774,11 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
<term><varname>CPUAffinity=</varname></term>
<listitem><para>Controls the CPU affinity of the executed processes. Takes a list of CPU indices or ranges
separated by either whitespace or commas. CPU ranges are specified by the lower and upper CPU indices separated
by a dash. This option may be specified more than once, in which case the specified CPU affinity masks are
merged. If the empty string is assigned, the mask is reset, all assignments prior to this will have no
effect. See
separated by either whitespace or commas. Alternatively, takes a special "numa" value in which case systemd
automatically derives allowed CPU range based on the value of <varname>NUMAMask=</varname> option. CPU ranges
are specified by the lower and upper CPU indices separated by a dash. This option may be specified more than
once, in which case the specified CPU affinity masks are merged. If the empty string is assigned, the mask
is reset, all assignments prior to this will have no effect. See
<citerefentry><refentrytitle>sched_setaffinity</refentrytitle><manvolnum>2</manvolnum></citerefentry> for
details.</para></listitem>
</varlistentry>

View File

@ -56,6 +56,8 @@ static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext,
static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI);
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC);
static BUS_DEFINE_PROPERTY_GET(property_get_cpu_affinity_from_numa, "b", ExecContext, exec_context_get_cpu_affinity_from_numa);
static int property_get_environment_files(
sd_bus *bus,
@ -213,6 +215,7 @@ static int property_get_cpu_affinity(
sd_bus_error *error) {
ExecContext *c = userdata;
_cleanup_(cpu_set_reset) CPUSet s = {};
_cleanup_free_ uint8_t *array = NULL;
size_t allocated;
@ -220,7 +223,16 @@ static int property_get_cpu_affinity(
assert(reply);
assert(c);
(void) cpu_set_to_dbus(&c->cpu_set, &array, &allocated);
if (c->cpu_affinity_from_numa) {
int r;
r = numa_to_cpu_set(&c->numa_policy, &s);
if (r < 0)
return r;
}
(void) cpu_set_to_dbus(c->cpu_affinity_from_numa ? &s : &c->cpu_set, &array, &allocated);
return sd_bus_message_append_array(reply, 'y', array, allocated);
}
@ -741,6 +753,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("CPUAffinityFromNUMA", "b", property_get_cpu_affinity_from_numa, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NUMAPolicy", "i", property_get_numa_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NUMAMask", "ay", property_get_numa_mask, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@ -1770,6 +1783,20 @@ int bus_exec_context_set_transient_property(
return 1;
} else if (streq(name, "CPUAffinityFromNUMA")) {
int q;
r = sd_bus_message_read_basic(message, 'b', &q);
if (r < 0)
return r;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
c->cpu_affinity_from_numa = q;
unit_write_settingf(u, flags, name, "%s=%s", "CPUAffinity", "numa");
}
return 1;
} else if (streq(name, "NUMAPolicy")) {
int32_t type;
@ -1784,6 +1811,7 @@ int bus_exec_context_set_transient_property(
c->numa_policy.type = type;
return 1;
} else if (streq(name, "Nice")) {
int32_t q;

View File

@ -3021,6 +3021,33 @@ static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **
return using_subcgroup;
}
static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
_cleanup_(cpu_set_reset) CPUSet s = {};
int r;
assert(c);
assert(ret);
if (!c->numa_policy.nodes.set) {
log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
return 0;
}
r = numa_to_cpu_set(&c->numa_policy, &s);
if (r < 0)
return r;
cpu_set_reset(ret);
return cpu_set_add_all(ret, &s);
}
bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
assert(c);
return c->cpu_affinity_from_numa;
}
static int exec_child(
Unit *unit,
const ExecCommand *command,
@ -3318,11 +3345,26 @@ static int exec_child(
}
}
if (context->cpu_set.set)
if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
if (context->cpu_affinity_from_numa || context->cpu_set.set) {
_cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
const CPUSet *cpu_set;
if (context->cpu_affinity_from_numa) {
r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
if (r < 0) {
*exit_status = EXIT_CPUAFFINITY;
return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
}
cpu_set = &converted_cpu_set;
} else
cpu_set = &context->cpu_set;
if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
*exit_status = EXIT_CPUAFFINITY;
return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
}
}
if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
r = apply_numa_policy(&context->numa_policy);

View File

@ -182,6 +182,7 @@ struct ExecContext {
CPUSet cpu_set;
NUMAPolicy numa_policy;
bool cpu_affinity_from_numa;
ExecInput std_input;
ExecOutput std_output;
@ -406,6 +407,8 @@ void exec_runtime_vacuum(Manager *m);
void exec_params_clear(ExecParameters *p);
bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
const char* exec_output_to_string(ExecOutput i) _const_;
ExecOutput exec_output_from_string(const char *s) _pure_;

View File

@ -1330,13 +1330,25 @@ int config_parse_exec_cpu_affinity(const char *unit,
void *userdata) {
ExecContext *c = data;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
return parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue);
if (streq(rvalue, "numa")) {
c->cpu_affinity_from_numa = true;
cpu_set_reset(&c->cpu_set);
return 0;
}
r = parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue);
if (r >= 0)
c->cpu_affinity_from_numa = false;
return r;
}
int config_parse_capability_set(

View File

@ -29,6 +29,7 @@
#include "signal-util.h"
#include "socket-util.h"
#include "sort-util.h"
#include "stdio-util.h"
#include "string-util.h"
#include "syslog-util.h"
#include "terminal-util.h"
@ -1103,6 +1104,13 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
_cleanup_free_ uint8_t *array = NULL;
size_t allocated;
if (eq && streq(eq, "numa")) {
r = sd_bus_message_append(m, "(sv)", "CPUAffinityFromNUMA", "b", true);
if (r < 0)
return bus_log_create_error(r);
return r;
}
r = parse_cpu_set(eq, &cpuset);
if (r < 0)
return log_error_errno(r, "Failed to parse %s value: %s", field, eq);

View File

@ -131,7 +131,7 @@ int cpu_set_add_all(CPUSet *a, const CPUSet *b) {
return r;
}
return 0;
return 1;
}
int parse_cpu_set_full(
@ -216,7 +216,7 @@ int parse_cpu_set_extend(
if (!old->set) {
*old = cpuset;
cpuset = (CPUSet) {};
return 0;
return 1;
}
return cpu_set_add_all(old, &cpuset);

View File

@ -216,12 +216,12 @@ static void test_parse_cpu_set_extend(void) {
log_info("/* %s */", __func__);
assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
assert_se(CPU_COUNT_S(c.allocated, c.set) == 2);
assert_se(s1 = cpu_set_to_string(&c));
log_info("cpu_set_to_string: %s", s1);
assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
assert_se(CPU_COUNT_S(c.allocated, c.set) == 3);
assert_se(s2 = cpu_set_to_string(&c));
log_info("cpu_set_to_string: %s", s2);
@ -238,7 +238,7 @@ static void test_cpu_set_to_from_dbus(void) {
log_info("/* %s */", __func__);
assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
assert_se(s = cpu_set_to_string(&c));
log_info("cpu_set_to_string: %s", s);
assert_se(CPU_COUNT_S(c.allocated, c.set) == 104);

View File

@ -279,6 +279,18 @@ else
# Maks must be ignored
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" $straceLog
echo "Unit file CPUAffinity=NUMA support"
writeTestUnitNUMAPolicy "bind" "0"
echo "CPUAffinity=numa" >> $testUnitNUMAConf
systemctl daemon-reload
systemctl start $testUnit
systemctlCheckNUMAProperties $testUnit "bind" "0"
pid=$(systemctl show --value -p MainPID $testUnit)
cpulist=$(cat /sys/devices/system/node/node0/cpulist)
affinity_systemd=$(systemctl show --value -p CPUAffinity $testUnit)
[ $cpulist = $affinity_systemd ]
pid1StopUnit $testUnit
echo "systemd-run NUMAPolicy support"
runUnit='numa-systemd-run-test.service'
@ -309,6 +321,12 @@ else
systemd-run -p NUMAPolicy=local -p NUMAMask=0 --unit $runUnit sleep 1000
systemctlCheckNUMAProperties $runUnit "local" ""
pid1StopUnit $runUnit
systemd-run -p NUMAPolicy=local -p NUMAMask=0 -p CPUAffinity=numa --unit $runUnit sleep 1000
systemctlCheckNUMAProperties $runUnit "local" ""
systemctl cat $runUnit | grep -q 'CPUAffinity=numa'
pid1StopUnit $runUnit
fi
# Cleanup