seccomp: LockPersonality boolean (#6193)

Add LockPersonality boolean to allow locking down personality(2)
system call so that the execution domain can't be changed.
This may be useful to improve security because odd emulations
may be poorly tested and source of vulnerabilities, while
system services shouldn't need any weird personalities.
This commit is contained in:
Topi Miettinen 2017-07-04 15:48:18 +03:00 committed by Lennart Poettering
parent 54d564a212
commit 78e864e5b3
8 changed files with 106 additions and 3 deletions

View File

@ -1653,6 +1653,18 @@
personality of the host system's kernel.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>LockPersonality=</varname></term>
<listitem><para>Locks down the <citerefentry
project='man-pages'><refentrytitle>personality</refentrytitle><manvolnum>2</manvolnum></citerefentry> system
call so that the kernel execution domain may not be changed from the default or the personality selected with
<varname>Personality=</varname> directive. This may be useful to improve security, because odd personality
emulations may be poorly tested and source of vulnerabilities. If running in user mode, or in system mode, but
without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=</varname>),
<varname>NoNewPrivileges=yes</varname> is implied.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RuntimeDirectory=</varname></term>

View File

@ -853,6 +853,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].mode), SD_BUS_VTABLE_PROPERTY_CONST),

View File

@ -1296,7 +1296,8 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
c->protect_kernel_modules ||
c->private_devices ||
context_has_syscall_filters(c) ||
!set_isempty(c->syscall_archs);
!set_isempty(c->syscall_archs) ||
c->lock_personality;
}
#ifdef HAVE_SECCOMP
@ -1455,6 +1456,25 @@ static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
return seccomp_restrict_namespaces(c->restrict_namespaces);
}
static int apply_lock_personality(const Unit* u, const ExecContext *c) {
unsigned long personality = c->personality;
assert(u);
assert(c);
if (!c->lock_personality)
return 0;
if (skip_seccomp_unavailable(u, "LockPersonality="))
return 0;
/* If personality is not specified, use the default (Linux) */
if (personality == PERSONALITY_INVALID)
personality = PER_LINUX;
return seccomp_lock_personality(personality);
}
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
@ -2972,6 +2992,13 @@ static int exec_child(
return r;
}
r = apply_lock_personality(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
*error_message = strdup("Failed to lock personalities");
return r;
}
/* This really should remain the last step before the execve(), to make sure our own code is unaffected
* by the filter as little as possible. */
r = apply_syscall_filter(unit, context, needs_ambient_hack);
@ -3733,6 +3760,10 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sPersonality: %s\n",
prefix, strna(personality_to_string(c->personality)));
fprintf(f,
"%sLockPersonality: %s\n",
prefix, yes_no(c->lock_personality));
if (c->syscall_filter) {
#ifdef HAVE_SECCOMP
Iterator j;

View File

@ -227,6 +227,7 @@ struct ExecContext {
bool same_pgrp;
unsigned long personality;
bool lock_personality;
unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */

View File

@ -60,14 +60,16 @@ $1.SystemCallErrorNumber, config_parse_syscall_errno, 0,
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context)
$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)',
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)
$1.LockPersonality, config_parse_bool, 0, offsetof($1, exec_context.lock_personality)',
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.LockPersonality, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
$1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit)

View File

@ -29,6 +29,7 @@
#include "alloc-util.h"
#include "macro.h"
#include "nsflags.h"
#include "process-util.h"
#include "seccomp-util.h"
#include "set.h"
#include "string-util.h"
@ -1402,3 +1403,21 @@ int seccomp_filter_set_add(Set *filter, bool add, const SyscallFilterSet *set) {
return 0;
}
int seccomp_lock_personality(unsigned long personality) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
int r;
seccomp = seccomp_init(SCMP_ACT_ALLOW);
if (!seccomp)
return -ENOMEM;
r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM),
SCMP_SYS(personality),
1,
SCMP_A0(SCMP_CMP_NE, personality));
if (r < 0)
return r;
return seccomp_load(seccomp);
}

View File

@ -78,6 +78,7 @@ int seccomp_protect_sysctl(void);
int seccomp_restrict_address_families(Set *address_families, bool whitelist);
int seccomp_restrict_realtime(void);
int seccomp_memory_deny_write_execute(void);
int seccomp_lock_personality(unsigned long personality);
extern const uint32_t seccomp_local_archs[];

View File

@ -21,6 +21,7 @@
#include <stdlib.h>
#include <sys/eventfd.h>
#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/poll.h>
#include <sys/shm.h>
#include <sys/types.h>
@ -565,6 +566,40 @@ static void test_load_syscall_filter_set_raw(void) {
assert_se(wait_for_terminate_and_warn("syscallrawseccomp", pid, true) == EXIT_SUCCESS);
}
static void test_lock_personality(void) {
pid_t pid;
if (!is_seccomp_available())
return;
if (geteuid() != 0)
return;
pid = fork();
assert_se(pid >= 0);
if (pid == 0) {
assert_se(seccomp_lock_personality(PER_LINUX) >= 0);
assert_se(personality(PER_LINUX) == PER_LINUX);
assert_se(personality(PER_LINUX | ADDR_NO_RANDOMIZE) == -1 && errno == EPERM);
assert_se(personality(PER_LINUX | MMAP_PAGE_ZERO) == -1 && errno == EPERM);
assert_se(personality(PER_LINUX | ADDR_COMPAT_LAYOUT) == -1 && errno == EPERM);
assert_se(personality(PER_LINUX | READ_IMPLIES_EXEC) == -1 && errno == EPERM);
assert_se(personality(PER_LINUX_32BIT) == -1 && errno == EPERM);
assert_se(personality(PER_SVR4) == -1 && errno == EPERM);
assert_se(personality(PER_BSD) == -1 && errno == EPERM);
assert_se(personality(PER_LINUX32) == -1 && errno == EPERM);
assert_se(personality(PER_LINUX32_3GB) == -1 && errno == EPERM);
assert_se(personality(PER_UW7) == -1 && errno == EPERM);
assert_se(personality(0x42) == -1 && errno == EPERM);
assert_se(personality(PERSONALITY_INVALID) == -1 && errno == EPERM); /* maybe remove this later */
assert_se(personality(PER_LINUX) == PER_LINUX);
_exit(EXIT_SUCCESS);
}
assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS);
}
int main(int argc, char *argv[]) {
log_set_max_level(LOG_DEBUG);
@ -581,6 +616,7 @@ int main(int argc, char *argv[]) {
test_memory_deny_write_execute_shmat();
test_restrict_archs();
test_load_syscall_filter_set_raw();
test_lock_personality();
return 0;
}