From 19c0b0b9a5039b842cf9e6c3e7ece75fb8725602 Mon Sep 17 00:00:00 2001 From: Ronny Chevalier Date: Sat, 30 Jan 2016 17:26:39 +0100 Subject: [PATCH] core: set NoNewPrivileges for seccomp if we don't have CAP_SYS_ADMIN The manpage of seccomp specify that using seccomp with SECCOMP_SET_MODE_FILTER will return EACCES if the caller do not have CAP_SYS_ADMIN set, or if the no_new_privileges bit is not set. Hence, without NoNewPrivilege set, it is impossible to use a SystemCall* directive with a User directive set in system mode. Now, NoNewPrivileges is set if we are in user mode, or if we are in system mode and we don't have CAP_SYS_ADMIN, and SystemCall* directives are used. --- Makefile.am | 1 + man/systemd.exec.xml | 16 ++++-- src/core/execute.c | 16 ++++-- src/test/test-execute.c | 57 +++++++++++++------ .../exec-systemcallfilter-system-user.service | 11 ++++ 5 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 test/test-execute/exec-systemcallfilter-system-user.service diff --git a/Makefile.am b/Makefile.am index 7bd98dddf6..02557ef46a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1556,6 +1556,7 @@ EXTRA_DIST += \ test/test-execute/exec-systemcallfilter-failing.service \ test/test-execute/exec-systemcallfilter-not-failing2.service \ test/test-execute/exec-systemcallfilter-not-failing.service \ + test/test-execute/exec-systemcallfilter-system-user.service \ test/test-execute/exec-user.service \ test/test-execute/exec-workingdirectory.service \ test/test-execute/exec-umask-0177.service \ diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index c1f47e84e6..3e1a2cb224 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1155,7 +1155,9 @@ first character of the list is ~, the effect is inverted: only the listed system calls will result in immediate process termination (blacklisting). If running in - user mode and this option is used, + user mode, or in system mode, but without the + CAP_SYS_ADMIN capabiblity (e.g. setting + User=nobody), NoNewPrivileges=yes is implied. This feature makes use of the Secure Computing Mode 2 interfaces of the kernel ('seccomp filtering') and is useful for enforcing a @@ -1214,8 +1216,10 @@ systems. The special native identifier implicitly maps to the native architecture of the system (or more strictly: to the architecture the system manager is - compiled for). If running in user mode and this option is - used, NoNewPrivileges=yes is implied. Note + compiled for). If running in user mode, or in system mode, + but without the CAP_SYS_ADMIN + capabiblity (e.g. setting User=nobody), + NoNewPrivileges=yes is implied. Note that setting this option to a non-empty list implies that native is included too. By default, this option is set to the empty list, i.e. no architecture system @@ -1244,8 +1248,10 @@ socketpair() (which creates connected AF_UNIX sockets only) are unaffected. Note that this option has no effect on 32-bit x86 and is ignored (but works - correctly on x86-64). If running in user mode and this option - is used, NoNewPrivileges=yes is implied. By + correctly on x86-64). If running in user mode, or in system + mode, but without the CAP_SYS_ADMIN + capabiblity (e.g. setting User=nobody), + NoNewPrivileges=yes is implied. By default, no restriction applies, all address families are accessible to processes. If assigned the empty string, any previous list changes are undone. diff --git a/src/core/execute.c b/src/core/execute.c index 8ede9e9afb..0c311ec330 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1824,6 +1825,11 @@ static int exec_child( if (params->apply_permissions) { + bool use_address_families = context->address_families_whitelist || + !set_isempty(context->address_families); + bool use_syscall_filter = context->syscall_whitelist || + !set_isempty(context->syscall_filter) || + !set_isempty(context->syscall_archs); int secure_bits = context->secure_bits; for (i = 0; i < _RLIMIT_MAX; i++) { @@ -1890,15 +1896,15 @@ static int exec_child( return -errno; } - if (context->no_new_privileges) + if (context->no_new_privileges || + (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || use_syscall_filter))) if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { *exit_status = EXIT_NO_NEW_PRIVILEGES; return -errno; } #ifdef HAVE_SECCOMP - if (context->address_families_whitelist || - !set_isempty(context->address_families)) { + if (use_address_families) { r = apply_address_families(context); if (r < 0) { *exit_status = EXIT_ADDRESS_FAMILIES; @@ -1906,9 +1912,7 @@ static int exec_child( } } - if (context->syscall_whitelist || - !set_isempty(context->syscall_filter) || - !set_isempty(context->syscall_archs)) { + if (use_syscall_filter) { r = apply_seccomp(context); if (r < 0) { *exit_status = EXIT_SECCOMP; diff --git a/src/test/test-execute.c b/src/test/test-execute.c index 0d2e4bfc15..5645f5c086 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -130,6 +130,15 @@ static void test_exec_systemcallerrornumber(Manager *m) { #endif } +static void test_exec_systemcall_system_mode_with_user(Manager *m) { +#ifdef HAVE_SECCOMP + if (getpwnam("nobody")) + test(m, "exec-systemcallfilter-system-user.service", 0, CLD_EXITED); + else + log_error_errno(errno, "Skipping test_exec_systemcall_system_mode_with_user, could not find nobody user: %m"); +#endif +} + static void test_exec_user(Manager *m) { if (getpwnam("nobody")) test(m, "exec-user.service", 0, CLD_EXITED); @@ -267,8 +276,31 @@ static void test_exec_spec_interpolation(Manager *m) { test(m, "exec-spec-interpolation.service", 0, CLD_EXITED); } +static int run_tests(ManagerRunningAs running_as, test_function_t *tests) { + test_function_t *test = NULL; + Manager *m = NULL; + int r; + + assert_se(tests); + + r = manager_new(running_as, true, &m); + if (MANAGER_SKIP_TEST(r)) { + printf("Skipping test: manager_new: %s\n", strerror(-r)); + return EXIT_TEST_SKIP; + } + assert_se(r >= 0); + assert_se(manager_startup(m, NULL, NULL) >= 0); + + for (test = tests; test && *test; test++) + (*test)(m); + + manager_free(m); + + return 0; +} + int main(int argc, char *argv[]) { - test_function_t tests[] = { + test_function_t user_tests[] = { test_exec_workingdirectory, test_exec_personality, test_exec_ignoresigpipe, @@ -291,8 +323,10 @@ int main(int argc, char *argv[]) { test_exec_spec_interpolation, NULL, }; - test_function_t *test = NULL; - Manager *m = NULL; + test_function_t system_tests[] = { + test_exec_systemcall_system_mode_with_user, + NULL, + }; int r; log_parse_environment(); @@ -317,18 +351,9 @@ int main(int argc, char *argv[]) { assert_se(unsetenv("VAR2") == 0); assert_se(unsetenv("VAR3") == 0); - r = manager_new(MANAGER_USER, true, &m); - if (MANAGER_SKIP_TEST(r)) { - printf("Skipping test: manager_new: %s\n", strerror(-r)); - return EXIT_TEST_SKIP; - } - assert_se(r >= 0); - assert_se(manager_startup(m, NULL, NULL) >= 0); + r = run_tests(MANAGER_USER, user_tests); + if (r != 0) + return r; - for (test = tests; test && *test; test++) - (*test)(m); - - manager_free(m); - - return 0; + return run_tests(MANAGER_SYSTEM, system_tests); } diff --git a/test/test-execute/exec-systemcallfilter-system-user.service b/test/test-execute/exec-systemcallfilter-system-user.service new file mode 100644 index 0000000000..462f94133d --- /dev/null +++ b/test/test-execute/exec-systemcallfilter-system-user.service @@ -0,0 +1,11 @@ +[Unit] +Description=Test for SystemCallFilter in system mode with User set + +[Service] +ExecStart=/bin/echo "Foo bar" +Type=oneshot +User=nobody +SystemCallFilter=~read write open execve ioperm +SystemCallFilter=ioctl +SystemCallFilter=read write open execve +SystemCallFilter=~ioperm