core: set NoNewPrivileges for seccomp if we don't have CAP_SYS_ADMIN

The manpage of seccomp specify that using seccomp with
SECCOMP_SET_MODE_FILTER will return EACCES if the caller do not have
CAP_SYS_ADMIN set, or if the no_new_privileges bit is not set. Hence,
without NoNewPrivilege set, it is impossible to use a SystemCall*
directive with a User directive set in system mode.

Now, NoNewPrivileges is set if we are in user mode, or if we are in
system mode and we don't have CAP_SYS_ADMIN, and SystemCall*
directives are used.
This commit is contained in:
Ronny Chevalier 2016-01-30 17:26:39 +01:00
parent 06fb28b16e
commit 19c0b0b9a5
5 changed files with 74 additions and 27 deletions

View File

@ -1556,6 +1556,7 @@ EXTRA_DIST += \
test/test-execute/exec-systemcallfilter-failing.service \
test/test-execute/exec-systemcallfilter-not-failing2.service \
test/test-execute/exec-systemcallfilter-not-failing.service \
test/test-execute/exec-systemcallfilter-system-user.service \
test/test-execute/exec-user.service \
test/test-execute/exec-workingdirectory.service \
test/test-execute/exec-umask-0177.service \

View File

@ -1155,7 +1155,9 @@
first character of the list is <literal>~</literal>, the
effect is inverted: only the listed system calls will result
in immediate process termination (blacklisting). If running in
user mode and this option is used,
user mode, or in system mode, but without the
<constant>CAP_SYS_ADMIN</constant> capabiblity (e.g. setting
<varname>User=nobody</varname>),
<varname>NoNewPrivileges=yes</varname> is implied. This
feature makes use of the Secure Computing Mode 2 interfaces of
the kernel ('seccomp filtering') and is useful for enforcing a
@ -1214,8 +1216,10 @@
systems. The special <constant>native</constant> identifier
implicitly maps to the native architecture of the system (or
more strictly: to the architecture the system manager is
compiled for). If running in user mode and this option is
used, <varname>NoNewPrivileges=yes</varname> is implied. Note
compiled for). If running in user mode, or in system mode,
but without the <constant>CAP_SYS_ADMIN</constant>
capabiblity (e.g. setting <varname>User=nobody</varname>),
<varname>NoNewPrivileges=yes</varname> is implied. Note
that setting this option to a non-empty list implies that
<constant>native</constant> is included too. By default, this
option is set to the empty list, i.e. no architecture system
@ -1244,8 +1248,10 @@
<function>socketpair()</function> (which creates connected
AF_UNIX sockets only) are unaffected. Note that this option
has no effect on 32-bit x86 and is ignored (but works
correctly on x86-64). If running in user mode and this option
is used, <varname>NoNewPrivileges=yes</varname> is implied. By
correctly on x86-64). If running in user mode, or in system
mode, but without the <constant>CAP_SYS_ADMIN</constant>
capabiblity (e.g. setting <varname>User=nobody</varname>),
<varname>NoNewPrivileges=yes</varname> is implied. By
default, no restriction applies, all address families are
accessible to processes. If assigned the empty string, any
previous list changes are undone.</para>

View File

@ -24,6 +24,7 @@
#include <poll.h>
#include <signal.h>
#include <string.h>
#include <sys/capability.h>
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/socket.h>
@ -1824,6 +1825,11 @@ static int exec_child(
if (params->apply_permissions) {
bool use_address_families = context->address_families_whitelist ||
!set_isempty(context->address_families);
bool use_syscall_filter = context->syscall_whitelist ||
!set_isempty(context->syscall_filter) ||
!set_isempty(context->syscall_archs);
int secure_bits = context->secure_bits;
for (i = 0; i < _RLIMIT_MAX; i++) {
@ -1890,15 +1896,15 @@ static int exec_child(
return -errno;
}
if (context->no_new_privileges)
if (context->no_new_privileges ||
(!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || use_syscall_filter)))
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
*exit_status = EXIT_NO_NEW_PRIVILEGES;
return -errno;
}
#ifdef HAVE_SECCOMP
if (context->address_families_whitelist ||
!set_isempty(context->address_families)) {
if (use_address_families) {
r = apply_address_families(context);
if (r < 0) {
*exit_status = EXIT_ADDRESS_FAMILIES;
@ -1906,9 +1912,7 @@ static int exec_child(
}
}
if (context->syscall_whitelist ||
!set_isempty(context->syscall_filter) ||
!set_isempty(context->syscall_archs)) {
if (use_syscall_filter) {
r = apply_seccomp(context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;

View File

@ -130,6 +130,15 @@ static void test_exec_systemcallerrornumber(Manager *m) {
#endif
}
static void test_exec_systemcall_system_mode_with_user(Manager *m) {
#ifdef HAVE_SECCOMP
if (getpwnam("nobody"))
test(m, "exec-systemcallfilter-system-user.service", 0, CLD_EXITED);
else
log_error_errno(errno, "Skipping test_exec_systemcall_system_mode_with_user, could not find nobody user: %m");
#endif
}
static void test_exec_user(Manager *m) {
if (getpwnam("nobody"))
test(m, "exec-user.service", 0, CLD_EXITED);
@ -267,8 +276,31 @@ static void test_exec_spec_interpolation(Manager *m) {
test(m, "exec-spec-interpolation.service", 0, CLD_EXITED);
}
static int run_tests(ManagerRunningAs running_as, test_function_t *tests) {
test_function_t *test = NULL;
Manager *m = NULL;
int r;
assert_se(tests);
r = manager_new(running_as, true, &m);
if (MANAGER_SKIP_TEST(r)) {
printf("Skipping test: manager_new: %s\n", strerror(-r));
return EXIT_TEST_SKIP;
}
assert_se(r >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0);
for (test = tests; test && *test; test++)
(*test)(m);
manager_free(m);
return 0;
}
int main(int argc, char *argv[]) {
test_function_t tests[] = {
test_function_t user_tests[] = {
test_exec_workingdirectory,
test_exec_personality,
test_exec_ignoresigpipe,
@ -291,8 +323,10 @@ int main(int argc, char *argv[]) {
test_exec_spec_interpolation,
NULL,
};
test_function_t *test = NULL;
Manager *m = NULL;
test_function_t system_tests[] = {
test_exec_systemcall_system_mode_with_user,
NULL,
};
int r;
log_parse_environment();
@ -317,18 +351,9 @@ int main(int argc, char *argv[]) {
assert_se(unsetenv("VAR2") == 0);
assert_se(unsetenv("VAR3") == 0);
r = manager_new(MANAGER_USER, true, &m);
if (MANAGER_SKIP_TEST(r)) {
printf("Skipping test: manager_new: %s\n", strerror(-r));
return EXIT_TEST_SKIP;
}
assert_se(r >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0);
r = run_tests(MANAGER_USER, user_tests);
if (r != 0)
return r;
for (test = tests; test && *test; test++)
(*test)(m);
manager_free(m);
return 0;
return run_tests(MANAGER_SYSTEM, system_tests);
}

View File

@ -0,0 +1,11 @@
[Unit]
Description=Test for SystemCallFilter in system mode with User set
[Service]
ExecStart=/bin/echo "Foo bar"
Type=oneshot
User=nobody
SystemCallFilter=~read write open execve ioperm
SystemCallFilter=ioctl
SystemCallFilter=read write open execve
SystemCallFilter=~ioperm