core:sandbox: Add ProtectKernelModules= option
This is useful to turn off explicit module load and unload operations on modular kernels. This option removes CAP_SYS_MODULE from the capability bounding set for the unit, and installs a system call filter to block module system calls. This option will not prevent the kernel from loading modules using the module auto-load feature which is a system wide operation.
This commit is contained in:
parent
18e51a022c
commit
502d704e5e
|
@ -1404,6 +1404,23 @@
|
||||||
logging. This does not affect commands prefixed with <literal>+</literal>.</para></listitem>
|
logging. This does not affect commands prefixed with <literal>+</literal>.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><varname>ProtectKernelModules=</varname></term>
|
||||||
|
|
||||||
|
<listitem><para>Takes a boolean argument. If true, explicit module loading will
|
||||||
|
be denied. This allows to turn off module load and unload operations on modular
|
||||||
|
kernels. It is recomended to turn this on for most services that do not need special
|
||||||
|
file systems or extra kernel modules to work. Default to off. Enabling this option
|
||||||
|
removes <constant>CAP_SYS_MODULE</constant> from the capability bounding set for
|
||||||
|
the unit, and installs a system call filter to block module system calls.
|
||||||
|
Note that limited automatic module loading due to user configuration or kernel
|
||||||
|
mapping tables might still happen as side effect of requested user operations,
|
||||||
|
both privileged and unprivileged. To disable module auto-load feature please see
|
||||||
|
<citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry>
|
||||||
|
<constant>kernel.modules_disabled</constant> mechanism and
|
||||||
|
<filename>/proc/sys/kernel/modules_disabled</filename> documentation.</para></listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><varname>Personality=</varname></term>
|
<term><varname>Personality=</varname></term>
|
||||||
|
|
||||||
|
|
|
@ -708,6 +708,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
||||||
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
|
SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
|
SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||||
|
@ -1075,7 +1076,7 @@ int bus_exec_context_set_transient_property(
|
||||||
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
|
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
|
||||||
"NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
|
"NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
|
||||||
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
|
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
|
||||||
"ProtectControlGroups")) {
|
"ProtectKernelModules", "ProtectControlGroups")) {
|
||||||
int b;
|
int b;
|
||||||
|
|
||||||
r = sd_bus_message_read(message, "b", &b);
|
r = sd_bus_message_read(message, "b", &b);
|
||||||
|
@ -1111,6 +1112,8 @@ int bus_exec_context_set_transient_property(
|
||||||
c->remove_ipc = b;
|
c->remove_ipc = b;
|
||||||
else if (streq(name, "ProtectKernelTunables"))
|
else if (streq(name, "ProtectKernelTunables"))
|
||||||
c->protect_kernel_tunables = b;
|
c->protect_kernel_tunables = b;
|
||||||
|
else if (streq(name, "ProtectKernelModules"))
|
||||||
|
c->protect_kernel_modules = b;
|
||||||
else if (streq(name, "ProtectControlGroups"))
|
else if (streq(name, "ProtectControlGroups"))
|
||||||
c->protect_control_groups = b;
|
c->protect_control_groups = b;
|
||||||
|
|
||||||
|
|
|
@ -1436,6 +1436,50 @@ finish:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
|
||||||
|
static const int module_syscalls[] = {
|
||||||
|
SCMP_SYS(delete_module),
|
||||||
|
SCMP_SYS(finit_module),
|
||||||
|
SCMP_SYS(init_module),
|
||||||
|
};
|
||||||
|
|
||||||
|
scmp_filter_ctx *seccomp;
|
||||||
|
unsigned i;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(c);
|
||||||
|
|
||||||
|
/* Turn of module syscalls on ProtectKernelModules=yes */
|
||||||
|
|
||||||
|
if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
seccomp = seccomp_init(SCMP_ACT_ALLOW);
|
||||||
|
if (!seccomp)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
r = seccomp_add_secondary_archs(seccomp);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
for (i = 0; i < ELEMENTSOF(module_syscalls); i++) {
|
||||||
|
r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM),
|
||||||
|
module_syscalls[i], 0);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
r = seccomp_load(seccomp);
|
||||||
|
|
||||||
|
finish:
|
||||||
|
seccomp_release(seccomp);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
static int apply_private_devices(Unit *u, const ExecContext *c) {
|
static int apply_private_devices(Unit *u, const ExecContext *c) {
|
||||||
const SystemCallFilterSet *set;
|
const SystemCallFilterSet *set;
|
||||||
scmp_filter_ctx *seccomp;
|
scmp_filter_ctx *seccomp;
|
||||||
|
@ -2690,6 +2734,14 @@ static int exec_child(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (context->protect_kernel_modules) {
|
||||||
|
r = apply_protect_kernel_modules(unit, context);
|
||||||
|
if (r < 0) {
|
||||||
|
*exit_status = EXIT_SECCOMP;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (context->private_devices) {
|
if (context->private_devices) {
|
||||||
r = apply_private_devices(unit, context);
|
r = apply_private_devices(unit, context);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
|
|
|
@ -175,6 +175,7 @@ struct ExecContext {
|
||||||
ProtectSystem protect_system;
|
ProtectSystem protect_system;
|
||||||
ProtectHome protect_home;
|
ProtectHome protect_home;
|
||||||
bool protect_kernel_tunables;
|
bool protect_kernel_tunables;
|
||||||
|
bool protect_kernel_modules;
|
||||||
bool protect_control_groups;
|
bool protect_control_groups;
|
||||||
|
|
||||||
bool no_new_privileges;
|
bool no_new_privileges;
|
||||||
|
|
|
@ -90,6 +90,7 @@ $1.InaccessiblePaths, config_parse_namespace_path_strv, 0,
|
||||||
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
|
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
|
||||||
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
|
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
|
||||||
$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
|
$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
|
||||||
|
$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
|
||||||
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
|
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
|
||||||
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
|
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
|
||||||
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
|
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
|
||||||
|
|
|
@ -3401,6 +3401,9 @@ int unit_patch_contexts(Unit *u) {
|
||||||
if (ec->private_devices)
|
if (ec->private_devices)
|
||||||
ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
|
ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
|
||||||
|
|
||||||
|
if (ec->protect_kernel_modules)
|
||||||
|
ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
|
||||||
|
|
||||||
if (ec->dynamic_user) {
|
if (ec->dynamic_user) {
|
||||||
if (!ec->user) {
|
if (!ec->user) {
|
||||||
r = user_from_unit_name(u, &ec->user);
|
r = user_from_unit_name(u, &ec->user);
|
||||||
|
|
|
@ -204,7 +204,8 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
|
||||||
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit",
|
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit",
|
||||||
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
|
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
|
||||||
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
|
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
|
||||||
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectControlGroups")) {
|
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
|
||||||
|
"ProtectKernelModules", "ProtectControlGroups")) {
|
||||||
|
|
||||||
r = parse_boolean(eq);
|
r = parse_boolean(eq);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
|
|
Loading…
Reference in a new issue