diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 7a7006b9a0..a9f1d8d74e 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1653,6 +1653,18 @@
personality of the host system's kernel.
+
+ LockPersonality=
+
+ Locks down the personality2 system
+ call so that the kernel execution domain may not be changed from the default or the personality selected with
+ Personality= directive. This may be useful to improve security, because odd personality
+ emulations may be poorly tested and source of vulnerabilities. If running in user mode, or in system mode, but
+ without the CAP_SYS_ADMIN capability (e.g. setting User=),
+ NoNewPrivileges=yes is implied.
+
+
RuntimeDirectory=
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 45497ca024..d28e8aafd6 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -853,6 +853,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].mode), SD_BUS_VTABLE_PROPERTY_CONST),
diff --git a/src/core/execute.c b/src/core/execute.c
index d192134b1c..4d285ff250 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1296,7 +1296,8 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
c->protect_kernel_modules ||
c->private_devices ||
context_has_syscall_filters(c) ||
- !set_isempty(c->syscall_archs);
+ !set_isempty(c->syscall_archs) ||
+ c->lock_personality;
}
#ifdef HAVE_SECCOMP
@@ -1455,6 +1456,25 @@ static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
return seccomp_restrict_namespaces(c->restrict_namespaces);
}
+static int apply_lock_personality(const Unit* u, const ExecContext *c) {
+ unsigned long personality = c->personality;
+
+ assert(u);
+ assert(c);
+
+ if (!c->lock_personality)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "LockPersonality="))
+ return 0;
+
+ /* If personality is not specified, use the default (Linux) */
+ if (personality == PERSONALITY_INVALID)
+ personality = PER_LINUX;
+
+ return seccomp_lock_personality(personality);
+}
+
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
@@ -2972,6 +2992,13 @@ static int exec_child(
return r;
}
+ r = apply_lock_personality(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ *error_message = strdup("Failed to lock personalities");
+ return r;
+ }
+
/* This really should remain the last step before the execve(), to make sure our own code is unaffected
* by the filter as little as possible. */
r = apply_syscall_filter(unit, context, needs_ambient_hack);
@@ -3733,6 +3760,10 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sPersonality: %s\n",
prefix, strna(personality_to_string(c->personality)));
+ fprintf(f,
+ "%sLockPersonality: %s\n",
+ prefix, yes_no(c->lock_personality));
+
if (c->syscall_filter) {
#ifdef HAVE_SECCOMP
Iterator j;
diff --git a/src/core/execute.h b/src/core/execute.h
index 9a28269283..8a7ce8449b 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -227,6 +227,7 @@ struct ExecContext {
bool same_pgrp;
unsigned long personality;
+ bool lock_personality;
unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 9a87f0acd3..94f3d657f6 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -60,14 +60,16 @@ $1.SystemCallErrorNumber, config_parse_syscall_errno, 0,
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context)
$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
-$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)',
+$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)
+$1.LockPersonality, config_parse_bool, 0, offsetof($1, exec_context.lock_personality)',
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
-$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.LockPersonality, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
$1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit)
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index dd6d4fbdc7..bf2db28a82 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -29,6 +29,7 @@
#include "alloc-util.h"
#include "macro.h"
#include "nsflags.h"
+#include "process-util.h"
#include "seccomp-util.h"
#include "set.h"
#include "string-util.h"
@@ -1402,3 +1403,21 @@ int seccomp_filter_set_add(Set *filter, bool add, const SyscallFilterSet *set) {
return 0;
}
+
+int seccomp_lock_personality(unsigned long personality) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int r;
+
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
+
+ r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(personality),
+ 1,
+ SCMP_A0(SCMP_CMP_NE, personality));
+ if (r < 0)
+ return r;
+
+ return seccomp_load(seccomp);
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 0edffa116d..ca43ba8659 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -78,6 +78,7 @@ int seccomp_protect_sysctl(void);
int seccomp_restrict_address_families(Set *address_families, bool whitelist);
int seccomp_restrict_realtime(void);
int seccomp_memory_deny_write_execute(void);
+int seccomp_lock_personality(unsigned long personality);
extern const uint32_t seccomp_local_archs[];
diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c
index 28fe206507..7ffbc4754e 100644
--- a/src/test/test-seccomp.c
+++ b/src/test/test-seccomp.c
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -565,6 +566,40 @@ static void test_load_syscall_filter_set_raw(void) {
assert_se(wait_for_terminate_and_warn("syscallrawseccomp", pid, true) == EXIT_SUCCESS);
}
+static void test_lock_personality(void) {
+ pid_t pid;
+
+ if (!is_seccomp_available())
+ return;
+ if (geteuid() != 0)
+ return;
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ assert_se(seccomp_lock_personality(PER_LINUX) >= 0);
+
+ assert_se(personality(PER_LINUX) == PER_LINUX);
+ assert_se(personality(PER_LINUX | ADDR_NO_RANDOMIZE) == -1 && errno == EPERM);
+ assert_se(personality(PER_LINUX | MMAP_PAGE_ZERO) == -1 && errno == EPERM);
+ assert_se(personality(PER_LINUX | ADDR_COMPAT_LAYOUT) == -1 && errno == EPERM);
+ assert_se(personality(PER_LINUX | READ_IMPLIES_EXEC) == -1 && errno == EPERM);
+ assert_se(personality(PER_LINUX_32BIT) == -1 && errno == EPERM);
+ assert_se(personality(PER_SVR4) == -1 && errno == EPERM);
+ assert_se(personality(PER_BSD) == -1 && errno == EPERM);
+ assert_se(personality(PER_LINUX32) == -1 && errno == EPERM);
+ assert_se(personality(PER_LINUX32_3GB) == -1 && errno == EPERM);
+ assert_se(personality(PER_UW7) == -1 && errno == EPERM);
+ assert_se(personality(0x42) == -1 && errno == EPERM);
+ assert_se(personality(PERSONALITY_INVALID) == -1 && errno == EPERM); /* maybe remove this later */
+ assert_se(personality(PER_LINUX) == PER_LINUX);
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS);
+}
+
int main(int argc, char *argv[]) {
log_set_max_level(LOG_DEBUG);
@@ -581,6 +616,7 @@ int main(int argc, char *argv[]) {
test_memory_deny_write_execute_shmat();
test_restrict_archs();
test_load_syscall_filter_set_raw();
+ test_lock_personality();
return 0;
}