core:sandbox: Add ProtectKernelModules= option

This is useful to turn off explicit module load and unload operations on modular
kernels. This option removes CAP_SYS_MODULE from the capability bounding set for
the unit, and installs a system call filter to block module system calls.

This option will not prevent the kernel from loading modules using the module
auto-load feature which is a system wide operation.
This commit is contained in:
Djalal Harouni 2016-10-12 13:31:21 +02:00
parent 18e51a022c
commit 502d704e5e
7 changed files with 80 additions and 2 deletions

View File

@ -1404,6 +1404,23 @@
logging. This does not affect commands prefixed with <literal>+</literal>.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>ProtectKernelModules=</varname></term>
<listitem><para>Takes a boolean argument. If true, explicit module loading will
be denied. This allows to turn off module load and unload operations on modular
kernels. It is recomended to turn this on for most services that do not need special
file systems or extra kernel modules to work. Default to off. Enabling this option
removes <constant>CAP_SYS_MODULE</constant> from the capability bounding set for
the unit, and installs a system call filter to block module system calls.
Note that limited automatic module loading due to user configuration or kernel
mapping tables might still happen as side effect of requested user operations,
both privileged and unprivileged. To disable module auto-load feature please see
<citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry>
<constant>kernel.modules_disabled</constant> mechanism and
<filename>/proc/sys/kernel/modules_disabled</filename> documentation.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>Personality=</varname></term>

View File

@ -708,6 +708,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
@ -1075,7 +1076,7 @@ int bus_exec_context_set_transient_property(
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
"NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
"ProtectControlGroups")) {
"ProtectKernelModules", "ProtectControlGroups")) {
int b;
r = sd_bus_message_read(message, "b", &b);
@ -1111,6 +1112,8 @@ int bus_exec_context_set_transient_property(
c->remove_ipc = b;
else if (streq(name, "ProtectKernelTunables"))
c->protect_kernel_tunables = b;
else if (streq(name, "ProtectKernelModules"))
c->protect_kernel_modules = b;
else if (streq(name, "ProtectControlGroups"))
c->protect_control_groups = b;

View File

@ -1436,6 +1436,50 @@ finish:
return r;
}
static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
static const int module_syscalls[] = {
SCMP_SYS(delete_module),
SCMP_SYS(finit_module),
SCMP_SYS(init_module),
};
scmp_filter_ctx *seccomp;
unsigned i;
int r;
assert(c);
/* Turn of module syscalls on ProtectKernelModules=yes */
if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
return 0;
seccomp = seccomp_init(SCMP_ACT_ALLOW);
if (!seccomp)
return -ENOMEM;
r = seccomp_add_secondary_archs(seccomp);
if (r < 0)
goto finish;
for (i = 0; i < ELEMENTSOF(module_syscalls); i++) {
r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM),
module_syscalls[i], 0);
if (r < 0)
goto finish;
}
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
if (r < 0)
goto finish;
r = seccomp_load(seccomp);
finish:
seccomp_release(seccomp);
return r;
}
static int apply_private_devices(Unit *u, const ExecContext *c) {
const SystemCallFilterSet *set;
scmp_filter_ctx *seccomp;
@ -2690,6 +2734,14 @@ static int exec_child(
}
}
if (context->protect_kernel_modules) {
r = apply_protect_kernel_modules(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return r;
}
}
if (context->private_devices) {
r = apply_private_devices(unit, context);
if (r < 0) {

View File

@ -175,6 +175,7 @@ struct ExecContext {
ProtectSystem protect_system;
ProtectHome protect_home;
bool protect_kernel_tunables;
bool protect_kernel_modules;
bool protect_control_groups;
bool no_new_privileges;

View File

@ -90,6 +90,7 @@ $1.InaccessiblePaths, config_parse_namespace_path_strv, 0,
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)

View File

@ -3401,6 +3401,9 @@ int unit_patch_contexts(Unit *u) {
if (ec->private_devices)
ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
if (ec->protect_kernel_modules)
ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
if (ec->dynamic_user) {
if (!ec->user) {
r = user_from_unit_name(u, &ec->user);

View File

@ -204,7 +204,8 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit",
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectControlGroups")) {
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
"ProtectKernelModules", "ProtectControlGroups")) {
r = parse_boolean(eq);
if (r < 0)