From 78e864e5b3cc11b72ae663f49f42f158cafbfedf Mon Sep 17 00:00:00 2001 From: Topi Miettinen Date: Tue, 4 Jul 2017 15:48:18 +0300 Subject: [PATCH] seccomp: LockPersonality boolean (#6193) Add LockPersonality boolean to allow locking down personality(2) system call so that the execution domain can't be changed. This may be useful to improve security because odd emulations may be poorly tested and source of vulnerabilities, while system services shouldn't need any weird personalities. --- man/systemd.exec.xml | 12 +++++++++ src/core/dbus-execute.c | 1 + src/core/execute.c | 33 +++++++++++++++++++++++- src/core/execute.h | 1 + src/core/load-fragment-gperf.gperf.m4 | 6 +++-- src/shared/seccomp-util.c | 19 ++++++++++++++ src/shared/seccomp-util.h | 1 + src/test/test-seccomp.c | 36 +++++++++++++++++++++++++++ 8 files changed, 106 insertions(+), 3 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 7a7006b9a0..a9f1d8d74e 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1653,6 +1653,18 @@ personality of the host system's kernel. + + LockPersonality= + + Locks down the personality2 system + call so that the kernel execution domain may not be changed from the default or the personality selected with + Personality= directive. This may be useful to improve security, because odd personality + emulations may be poorly tested and source of vulnerabilities. If running in user mode, or in system mode, but + without the CAP_SYS_ADMIN capability (e.g. setting User=), + NoNewPrivileges=yes is implied. + + RuntimeDirectory= diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 45497ca024..d28e8aafd6 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -853,6 +853,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].mode), SD_BUS_VTABLE_PROPERTY_CONST), diff --git a/src/core/execute.c b/src/core/execute.c index d192134b1c..4d285ff250 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1296,7 +1296,8 @@ static bool context_has_no_new_privileges(const ExecContext *c) { c->protect_kernel_modules || c->private_devices || context_has_syscall_filters(c) || - !set_isempty(c->syscall_archs); + !set_isempty(c->syscall_archs) || + c->lock_personality; } #ifdef HAVE_SECCOMP @@ -1455,6 +1456,25 @@ static int apply_restrict_namespaces(Unit *u, const ExecContext *c) { return seccomp_restrict_namespaces(c->restrict_namespaces); } +static int apply_lock_personality(const Unit* u, const ExecContext *c) { + unsigned long personality = c->personality; + + assert(u); + assert(c); + + if (!c->lock_personality) + return 0; + + if (skip_seccomp_unavailable(u, "LockPersonality=")) + return 0; + + /* If personality is not specified, use the default (Linux) */ + if (personality == PERSONALITY_INVALID) + personality = PER_LINUX; + + return seccomp_lock_personality(personality); +} + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { @@ -2972,6 +2992,13 @@ static int exec_child( return r; } + r = apply_lock_personality(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + *error_message = strdup("Failed to lock personalities"); + return r; + } + /* This really should remain the last step before the execve(), to make sure our own code is unaffected * by the filter as little as possible. */ r = apply_syscall_filter(unit, context, needs_ambient_hack); @@ -3733,6 +3760,10 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sPersonality: %s\n", prefix, strna(personality_to_string(c->personality))); + fprintf(f, + "%sLockPersonality: %s\n", + prefix, yes_no(c->lock_personality)); + if (c->syscall_filter) { #ifdef HAVE_SECCOMP Iterator j; diff --git a/src/core/execute.h b/src/core/execute.h index 9a28269283..8a7ce8449b 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -227,6 +227,7 @@ struct ExecContext { bool same_pgrp; unsigned long personality; + bool lock_personality; unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */ diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 9a87f0acd3..94f3d657f6 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -60,14 +60,16 @@ $1.SystemCallErrorNumber, config_parse_syscall_errno, 0, $1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute) $1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context) $1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime) -$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)', +$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context) +$1.LockPersonality, config_parse_bool, 0, offsetof($1, exec_context.lock_personality)', `$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 -$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0') +$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 +$1.LockPersonality, config_parse_warn_compat, DISABLED_CONFIGURATION, 0') $1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit) $1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit) $1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit) diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index dd6d4fbdc7..bf2db28a82 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -29,6 +29,7 @@ #include "alloc-util.h" #include "macro.h" #include "nsflags.h" +#include "process-util.h" #include "seccomp-util.h" #include "set.h" #include "string-util.h" @@ -1402,3 +1403,21 @@ int seccomp_filter_set_add(Set *filter, bool add, const SyscallFilterSet *set) { return 0; } + +int seccomp_lock_personality(unsigned long personality) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int r; + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(personality), + 1, + SCMP_A0(SCMP_CMP_NE, personality)); + if (r < 0) + return r; + + return seccomp_load(seccomp); +} diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 0edffa116d..ca43ba8659 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -78,6 +78,7 @@ int seccomp_protect_sysctl(void); int seccomp_restrict_address_families(Set *address_families, bool whitelist); int seccomp_restrict_realtime(void); int seccomp_memory_deny_write_execute(void); +int seccomp_lock_personality(unsigned long personality); extern const uint32_t seccomp_local_archs[]; diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c index 28fe206507..7ffbc4754e 100644 --- a/src/test/test-seccomp.c +++ b/src/test/test-seccomp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -565,6 +566,40 @@ static void test_load_syscall_filter_set_raw(void) { assert_se(wait_for_terminate_and_warn("syscallrawseccomp", pid, true) == EXIT_SUCCESS); } +static void test_lock_personality(void) { + pid_t pid; + + if (!is_seccomp_available()) + return; + if (geteuid() != 0) + return; + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { + assert_se(seccomp_lock_personality(PER_LINUX) >= 0); + + assert_se(personality(PER_LINUX) == PER_LINUX); + assert_se(personality(PER_LINUX | ADDR_NO_RANDOMIZE) == -1 && errno == EPERM); + assert_se(personality(PER_LINUX | MMAP_PAGE_ZERO) == -1 && errno == EPERM); + assert_se(personality(PER_LINUX | ADDR_COMPAT_LAYOUT) == -1 && errno == EPERM); + assert_se(personality(PER_LINUX | READ_IMPLIES_EXEC) == -1 && errno == EPERM); + assert_se(personality(PER_LINUX_32BIT) == -1 && errno == EPERM); + assert_se(personality(PER_SVR4) == -1 && errno == EPERM); + assert_se(personality(PER_BSD) == -1 && errno == EPERM); + assert_se(personality(PER_LINUX32) == -1 && errno == EPERM); + assert_se(personality(PER_LINUX32_3GB) == -1 && errno == EPERM); + assert_se(personality(PER_UW7) == -1 && errno == EPERM); + assert_se(personality(0x42) == -1 && errno == EPERM); + assert_se(personality(PERSONALITY_INVALID) == -1 && errno == EPERM); /* maybe remove this later */ + assert_se(personality(PER_LINUX) == PER_LINUX); + _exit(EXIT_SUCCESS); + } + + assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS); +} + int main(int argc, char *argv[]) { log_set_max_level(LOG_DEBUG); @@ -581,6 +616,7 @@ int main(int argc, char *argv[]) { test_memory_deny_write_execute_shmat(); test_restrict_archs(); test_load_syscall_filter_set_raw(); + test_lock_personality(); return 0; }