core: Restrict mmap and mprotect with PAGE_WRITE|PAGE_EXEC (#3319) (#3379)

New exec boolean MemoryDenyWriteExecute, when set, installs
a seccomp filter to reject mmap(2) with PAGE_WRITE|PAGE_EXEC
and mprotect(2) with PAGE_EXEC.
This commit is contained in:
Topi Miettinen 2016-06-03 15:58:18 +00:00 committed by Lennart Poettering
parent de4503c8d9
commit f3e4363593
6 changed files with 75 additions and 4 deletions

View File

@ -1388,6 +1388,22 @@
<citerefentry><refentrytitle>tmpfiles.d</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>MemoryDenyWriteExecute=</varname></term>
<listitem><para>Takes a boolean argument. If set, attempts to create memory mappings that are writable and
executable at the same time, or to change existing memory mappings to become executable are prohibited.
Specifically, a system call filter is added that rejects
<citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry>
system calls with both <constant>PROT_EXEC</constant> and <constant>PROT_WRITE</constant> set
and <citerefentry><refentrytitle>mprotect</refentrytitle><manvolnum>2</manvolnum></citerefentry>
system calls with <constant>PROT_EXEC</constant> set. Note that this option is incompatible with programs
that generate program code dynamically at runtime, such as JIT execution engines, or programs compiled making
use of the code "trampoline" feature of various C compilers. This option improves service security, as it makes
harder for software exploits to change running code dynamically.
</para></listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -719,6 +719,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, runtime_directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectory", "as", NULL, offsetof(ExecContext, runtime_directory), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};
@ -1056,7 +1057,7 @@ int bus_exec_context_set_transient_property(
} else if (STR_IN_SET(name,
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset",
"PrivateTmp", "PrivateDevices", "PrivateNetwork",
"NoNewPrivileges", "SyslogLevelPrefix")) {
"NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute")) {
int b;
r = sd_bus_message_read(message, "b", &b);
@ -1080,6 +1081,8 @@ int bus_exec_context_set_transient_property(
c->no_new_privileges = b;
else if (streq(name, "SyslogLevelPrefix"))
c->syslog_level_prefix = b;
else if (streq(name, "MemoryDenyWriteExecute"))
c->memory_deny_write_execute = b;
unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
}

View File

@ -25,6 +25,7 @@
#include <signal.h>
#include <string.h>
#include <sys/capability.h>
#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/socket.h>
@ -1190,6 +1191,45 @@ finish:
return r;
}
static int apply_memory_deny_write_execute(const ExecContext *c) {
scmp_filter_ctx *seccomp;
int r;
assert(c);
seccomp = seccomp_init(SCMP_ACT_ALLOW);
if (!seccomp)
return -ENOMEM;
r = seccomp_rule_add(
seccomp,
SCMP_ACT_KILL,
SCMP_SYS(mmap),
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
if (r < 0)
goto finish;
r = seccomp_rule_add(
seccomp,
SCMP_ACT_KILL,
SCMP_SYS(mprotect),
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
if (r < 0)
goto finish;
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
if (r < 0)
goto finish;
r = seccomp_load(seccomp);
finish:
seccomp_release(seccomp);
return r;
}
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
@ -1912,6 +1952,13 @@ static int exec_child(
}
}
if (context->memory_deny_write_execute) {
r = apply_memory_deny_write_execute(context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return r;
}
}
if (use_syscall_filter) {
r = apply_seccomp(context);
if (r < 0) {
@ -2371,7 +2418,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sPrivateDevices: %s\n"
"%sProtectHome: %s\n"
"%sProtectSystem: %s\n"
"%sIgnoreSIGPIPE: %s\n",
"%sIgnoreSIGPIPE: %s\n"
"%sMemoryDenyWriteExecute: %s\n",
prefix, c->umask,
prefix, c->working_directory ? c->working_directory : "/",
prefix, c->root_directory ? c->root_directory : "/",
@ -2381,7 +2429,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->private_devices),
prefix, protect_home_to_string(c->protect_home),
prefix, protect_system_to_string(c->protect_system),
prefix, yes_no(c->ignore_sigpipe));
prefix, yes_no(c->ignore_sigpipe),
prefix, yes_no(c->memory_deny_write_execute));
STRV_FOREACH(e, c->environment)
fprintf(f, "%sEnvironment: %s\n", prefix, *e);

View File

@ -197,6 +197,7 @@ struct ExecContext {
bool ioprio_set:1;
bool cpu_sched_set:1;
bool no_new_privileges_set:1;
bool memory_deny_write_execute;
};
#include "cgroup-util.h"

View File

@ -55,10 +55,12 @@ m4_ifdef(`HAVE_SECCOMP',
`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)',
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)

View File

@ -158,7 +158,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
"SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies",
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit",
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "NoNewPrivileges",
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse")) {
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute")) {
r = parse_boolean(eq);
if (r < 0)