core: add support for setting CPUAffinity= to special "numa" value
systemd will automatically derive CPU affinity mask from NUMA node mask. Fixes #13248
This commit is contained in:
parent
1808f76870
commit
e2b2fb7f56
|
@ -774,10 +774,11 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
|
||||||
<term><varname>CPUAffinity=</varname></term>
|
<term><varname>CPUAffinity=</varname></term>
|
||||||
|
|
||||||
<listitem><para>Controls the CPU affinity of the executed processes. Takes a list of CPU indices or ranges
|
<listitem><para>Controls the CPU affinity of the executed processes. Takes a list of CPU indices or ranges
|
||||||
separated by either whitespace or commas. CPU ranges are specified by the lower and upper CPU indices separated
|
separated by either whitespace or commas. Alternatively, takes a special "numa" value in which case systemd
|
||||||
by a dash. This option may be specified more than once, in which case the specified CPU affinity masks are
|
automatically derives allowed CPU range based on the value of <varname>NUMAMask=</varname> option. CPU ranges
|
||||||
merged. If the empty string is assigned, the mask is reset, all assignments prior to this will have no
|
are specified by the lower and upper CPU indices separated by a dash. This option may be specified more than
|
||||||
effect. See
|
once, in which case the specified CPU affinity masks are merged. If the empty string is assigned, the mask
|
||||||
|
is reset, all assignments prior to this will have no effect. See
|
||||||
<citerefentry><refentrytitle>sched_setaffinity</refentrytitle><manvolnum>2</manvolnum></citerefentry> for
|
<citerefentry><refentrytitle>sched_setaffinity</refentrytitle><manvolnum>2</manvolnum></citerefentry> for
|
||||||
details.</para></listitem>
|
details.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
|
@ -56,6 +56,8 @@ static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext,
|
||||||
static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
|
static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
|
||||||
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI);
|
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI);
|
||||||
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC);
|
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC);
|
||||||
|
static BUS_DEFINE_PROPERTY_GET(property_get_cpu_affinity_from_numa, "b", ExecContext, exec_context_get_cpu_affinity_from_numa);
|
||||||
|
|
||||||
|
|
||||||
static int property_get_environment_files(
|
static int property_get_environment_files(
|
||||||
sd_bus *bus,
|
sd_bus *bus,
|
||||||
|
@ -213,6 +215,7 @@ static int property_get_cpu_affinity(
|
||||||
sd_bus_error *error) {
|
sd_bus_error *error) {
|
||||||
|
|
||||||
ExecContext *c = userdata;
|
ExecContext *c = userdata;
|
||||||
|
_cleanup_(cpu_set_reset) CPUSet s = {};
|
||||||
_cleanup_free_ uint8_t *array = NULL;
|
_cleanup_free_ uint8_t *array = NULL;
|
||||||
size_t allocated;
|
size_t allocated;
|
||||||
|
|
||||||
|
@ -220,7 +223,16 @@ static int property_get_cpu_affinity(
|
||||||
assert(reply);
|
assert(reply);
|
||||||
assert(c);
|
assert(c);
|
||||||
|
|
||||||
(void) cpu_set_to_dbus(&c->cpu_set, &array, &allocated);
|
if (c->cpu_affinity_from_numa) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
r = numa_to_cpu_set(&c->numa_policy, &s);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
(void) cpu_set_to_dbus(c->cpu_affinity_from_numa ? &s : &c->cpu_set, &array, &allocated);
|
||||||
|
|
||||||
return sd_bus_message_append_array(reply, 'y', array, allocated);
|
return sd_bus_message_append_array(reply, 'y', array, allocated);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -741,6 +753,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
||||||
SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
|
SD_BUS_PROPERTY("CPUAffinityFromNUMA", "b", property_get_cpu_affinity_from_numa, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("NUMAPolicy", "i", property_get_numa_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("NUMAPolicy", "i", property_get_numa_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("NUMAMask", "ay", property_get_numa_mask, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("NUMAMask", "ay", property_get_numa_mask, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
|
@ -1770,6 +1783,20 @@ int bus_exec_context_set_transient_property(
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
} else if (streq(name, "CPUAffinityFromNUMA")) {
|
||||||
|
int q;
|
||||||
|
|
||||||
|
r = sd_bus_message_read_basic(message, 'b', &q);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
|
||||||
|
c->cpu_affinity_from_numa = q;
|
||||||
|
unit_write_settingf(u, flags, name, "%s=%s", "CPUAffinity", "numa");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
|
||||||
} else if (streq(name, "NUMAPolicy")) {
|
} else if (streq(name, "NUMAPolicy")) {
|
||||||
int32_t type;
|
int32_t type;
|
||||||
|
|
||||||
|
@ -1784,6 +1811,7 @@ int bus_exec_context_set_transient_property(
|
||||||
c->numa_policy.type = type;
|
c->numa_policy.type = type;
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
} else if (streq(name, "Nice")) {
|
} else if (streq(name, "Nice")) {
|
||||||
int32_t q;
|
int32_t q;
|
||||||
|
|
||||||
|
|
|
@ -3021,6 +3021,33 @@ static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **
|
||||||
return using_subcgroup;
|
return using_subcgroup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
|
||||||
|
_cleanup_(cpu_set_reset) CPUSet s = {};
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(c);
|
||||||
|
assert(ret);
|
||||||
|
|
||||||
|
if (!c->numa_policy.nodes.set) {
|
||||||
|
log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = numa_to_cpu_set(&c->numa_policy, &s);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
cpu_set_reset(ret);
|
||||||
|
|
||||||
|
return cpu_set_add_all(ret, &s);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
|
||||||
|
assert(c);
|
||||||
|
|
||||||
|
return c->cpu_affinity_from_numa;
|
||||||
|
}
|
||||||
|
|
||||||
static int exec_child(
|
static int exec_child(
|
||||||
Unit *unit,
|
Unit *unit,
|
||||||
const ExecCommand *command,
|
const ExecCommand *command,
|
||||||
|
@ -3318,11 +3345,26 @@ static int exec_child(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (context->cpu_set.set)
|
if (context->cpu_affinity_from_numa || context->cpu_set.set) {
|
||||||
if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
|
_cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
|
||||||
|
const CPUSet *cpu_set;
|
||||||
|
|
||||||
|
if (context->cpu_affinity_from_numa) {
|
||||||
|
r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
|
||||||
|
if (r < 0) {
|
||||||
|
*exit_status = EXIT_CPUAFFINITY;
|
||||||
|
return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
|
||||||
|
}
|
||||||
|
|
||||||
|
cpu_set = &converted_cpu_set;
|
||||||
|
} else
|
||||||
|
cpu_set = &context->cpu_set;
|
||||||
|
|
||||||
|
if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
|
||||||
*exit_status = EXIT_CPUAFFINITY;
|
*exit_status = EXIT_CPUAFFINITY;
|
||||||
return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
|
return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
|
if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
|
||||||
r = apply_numa_policy(&context->numa_policy);
|
r = apply_numa_policy(&context->numa_policy);
|
||||||
|
|
|
@ -182,6 +182,7 @@ struct ExecContext {
|
||||||
|
|
||||||
CPUSet cpu_set;
|
CPUSet cpu_set;
|
||||||
NUMAPolicy numa_policy;
|
NUMAPolicy numa_policy;
|
||||||
|
bool cpu_affinity_from_numa;
|
||||||
|
|
||||||
ExecInput std_input;
|
ExecInput std_input;
|
||||||
ExecOutput std_output;
|
ExecOutput std_output;
|
||||||
|
@ -406,6 +407,8 @@ void exec_runtime_vacuum(Manager *m);
|
||||||
|
|
||||||
void exec_params_clear(ExecParameters *p);
|
void exec_params_clear(ExecParameters *p);
|
||||||
|
|
||||||
|
bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
|
||||||
|
|
||||||
const char* exec_output_to_string(ExecOutput i) _const_;
|
const char* exec_output_to_string(ExecOutput i) _const_;
|
||||||
ExecOutput exec_output_from_string(const char *s) _pure_;
|
ExecOutput exec_output_from_string(const char *s) _pure_;
|
||||||
|
|
||||||
|
|
|
@ -1330,13 +1330,25 @@ int config_parse_exec_cpu_affinity(const char *unit,
|
||||||
void *userdata) {
|
void *userdata) {
|
||||||
|
|
||||||
ExecContext *c = data;
|
ExecContext *c = data;
|
||||||
|
int r;
|
||||||
|
|
||||||
assert(filename);
|
assert(filename);
|
||||||
assert(lvalue);
|
assert(lvalue);
|
||||||
assert(rvalue);
|
assert(rvalue);
|
||||||
assert(data);
|
assert(data);
|
||||||
|
|
||||||
return parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue);
|
if (streq(rvalue, "numa")) {
|
||||||
|
c->cpu_affinity_from_numa = true;
|
||||||
|
cpu_set_reset(&c->cpu_set);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue);
|
||||||
|
if (r >= 0)
|
||||||
|
c->cpu_affinity_from_numa = false;
|
||||||
|
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int config_parse_capability_set(
|
int config_parse_capability_set(
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include "signal-util.h"
|
#include "signal-util.h"
|
||||||
#include "socket-util.h"
|
#include "socket-util.h"
|
||||||
#include "sort-util.h"
|
#include "sort-util.h"
|
||||||
|
#include "stdio-util.h"
|
||||||
#include "string-util.h"
|
#include "string-util.h"
|
||||||
#include "syslog-util.h"
|
#include "syslog-util.h"
|
||||||
#include "terminal-util.h"
|
#include "terminal-util.h"
|
||||||
|
@ -1103,6 +1104,13 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
|
||||||
_cleanup_free_ uint8_t *array = NULL;
|
_cleanup_free_ uint8_t *array = NULL;
|
||||||
size_t allocated;
|
size_t allocated;
|
||||||
|
|
||||||
|
if (eq && streq(eq, "numa")) {
|
||||||
|
r = sd_bus_message_append(m, "(sv)", "CPUAffinityFromNUMA", "b", true);
|
||||||
|
if (r < 0)
|
||||||
|
return bus_log_create_error(r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
r = parse_cpu_set(eq, &cpuset);
|
r = parse_cpu_set(eq, &cpuset);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
|
return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
|
||||||
|
|
|
@ -131,7 +131,7 @@ int cpu_set_add_all(CPUSet *a, const CPUSet *b) {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int parse_cpu_set_full(
|
int parse_cpu_set_full(
|
||||||
|
@ -216,7 +216,7 @@ int parse_cpu_set_extend(
|
||||||
if (!old->set) {
|
if (!old->set) {
|
||||||
*old = cpuset;
|
*old = cpuset;
|
||||||
cpuset = (CPUSet) {};
|
cpuset = (CPUSet) {};
|
||||||
return 0;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return cpu_set_add_all(old, &cpuset);
|
return cpu_set_add_all(old, &cpuset);
|
||||||
|
|
|
@ -216,12 +216,12 @@ static void test_parse_cpu_set_extend(void) {
|
||||||
|
|
||||||
log_info("/* %s */", __func__);
|
log_info("/* %s */", __func__);
|
||||||
|
|
||||||
assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
|
assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
|
||||||
assert_se(CPU_COUNT_S(c.allocated, c.set) == 2);
|
assert_se(CPU_COUNT_S(c.allocated, c.set) == 2);
|
||||||
assert_se(s1 = cpu_set_to_string(&c));
|
assert_se(s1 = cpu_set_to_string(&c));
|
||||||
log_info("cpu_set_to_string: %s", s1);
|
log_info("cpu_set_to_string: %s", s1);
|
||||||
|
|
||||||
assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
|
assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
|
||||||
assert_se(CPU_COUNT_S(c.allocated, c.set) == 3);
|
assert_se(CPU_COUNT_S(c.allocated, c.set) == 3);
|
||||||
assert_se(s2 = cpu_set_to_string(&c));
|
assert_se(s2 = cpu_set_to_string(&c));
|
||||||
log_info("cpu_set_to_string: %s", s2);
|
log_info("cpu_set_to_string: %s", s2);
|
||||||
|
@ -238,7 +238,7 @@ static void test_cpu_set_to_from_dbus(void) {
|
||||||
|
|
||||||
log_info("/* %s */", __func__);
|
log_info("/* %s */", __func__);
|
||||||
|
|
||||||
assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
|
assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
|
||||||
assert_se(s = cpu_set_to_string(&c));
|
assert_se(s = cpu_set_to_string(&c));
|
||||||
log_info("cpu_set_to_string: %s", s);
|
log_info("cpu_set_to_string: %s", s);
|
||||||
assert_se(CPU_COUNT_S(c.allocated, c.set) == 104);
|
assert_se(CPU_COUNT_S(c.allocated, c.set) == 104);
|
||||||
|
|
|
@ -279,6 +279,18 @@ else
|
||||||
# Maks must be ignored
|
# Maks must be ignored
|
||||||
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" $straceLog
|
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" $straceLog
|
||||||
|
|
||||||
|
echo "Unit file CPUAffinity=NUMA support"
|
||||||
|
writeTestUnitNUMAPolicy "bind" "0"
|
||||||
|
echo "CPUAffinity=numa" >> $testUnitNUMAConf
|
||||||
|
systemctl daemon-reload
|
||||||
|
systemctl start $testUnit
|
||||||
|
systemctlCheckNUMAProperties $testUnit "bind" "0"
|
||||||
|
pid=$(systemctl show --value -p MainPID $testUnit)
|
||||||
|
cpulist=$(cat /sys/devices/system/node/node0/cpulist)
|
||||||
|
affinity_systemd=$(systemctl show --value -p CPUAffinity $testUnit)
|
||||||
|
[ $cpulist = $affinity_systemd ]
|
||||||
|
pid1StopUnit $testUnit
|
||||||
|
|
||||||
echo "systemd-run NUMAPolicy support"
|
echo "systemd-run NUMAPolicy support"
|
||||||
runUnit='numa-systemd-run-test.service'
|
runUnit='numa-systemd-run-test.service'
|
||||||
|
|
||||||
|
@ -309,6 +321,12 @@ else
|
||||||
systemd-run -p NUMAPolicy=local -p NUMAMask=0 --unit $runUnit sleep 1000
|
systemd-run -p NUMAPolicy=local -p NUMAMask=0 --unit $runUnit sleep 1000
|
||||||
systemctlCheckNUMAProperties $runUnit "local" ""
|
systemctlCheckNUMAProperties $runUnit "local" ""
|
||||||
pid1StopUnit $runUnit
|
pid1StopUnit $runUnit
|
||||||
|
|
||||||
|
systemd-run -p NUMAPolicy=local -p NUMAMask=0 -p CPUAffinity=numa --unit $runUnit sleep 1000
|
||||||
|
systemctlCheckNUMAProperties $runUnit "local" ""
|
||||||
|
systemctl cat $runUnit | grep -q 'CPUAffinity=numa'
|
||||||
|
pid1StopUnit $runUnit
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Cleanup
|
# Cleanup
|
||||||
|
|
Loading…
Reference in a new issue