Merge pull request #12430 from poettering/seccomp-kill-process

use SCMP_ACT_KILL_PROCESS for SystemCallFilters=
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2019-05-24 12:17:53 +02:00 committed by GitHub
commit de26d715e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 64 additions and 15 deletions

16
NEWS
View File

@ -2,6 +2,22 @@ systemd System and Service Manager
CHANGES WITH 243 in spe:
* Previously, filters defined with SystemCallFilter= would have the
effect that an calling an offending system call would terminate the
calling thread. This behaviour never made much sense, since killing
individual threads of unexpecting processes is likely to create more
problems than it solves. With this release the default action changed
from killing the thread to killing the whole process. For this to
work correctly both a kernel version (>= 4.14) and a libseccomp
version (>= 2.4.0) supporting this new seccomp action is required. If
an older kernel or libseccomp is used the old behaviour continues to
be used. This change does not affect any services that have no system
call filters defined, or that use SystemCallErrorNumber= (and thus
see EPERM or another error instead of being killed when calling an
offending system call). Note that systemd documentation always
claimed that the whole process is killed. With this change behaviour
is thus adjusted to match the documentation.
* The "kernel.pid_max" sysctl is now bumped to 4194304 by default,
i.e. the full 22bit range the kernel allows, up from the old 16bit
range. This should improve security and robustness a bit, as PID

View File

@ -1439,7 +1439,7 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_
if (skip_seccomp_unavailable(u, "SystemCallFilter="))
return 0;
negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
if (c->syscall_whitelist) {
default_action = negative_action;

View File

@ -1656,13 +1656,19 @@ static int oci_seccomp_action_from_string(const char *name, uint32_t *ret) {
const char *name;
uint32_t action;
} table[] = {
{ "SCMP_ACT_ALLOW", SCMP_ACT_ALLOW },
{ "SCMP_ACT_ERRNO", SCMP_ACT_ERRNO(EPERM) }, /* the OCI spec doesn't document the error, but it appears EPERM is supposed to be used */
{ "SCMP_ACT_KILL", SCMP_ACT_KILL },
#ifdef SCMP_ACT_LOG
{ "SCMP_ACT_LOG", SCMP_ACT_LOG },
{ "SCMP_ACT_ALLOW", SCMP_ACT_ALLOW },
{ "SCMP_ACT_ERRNO", SCMP_ACT_ERRNO(EPERM) }, /* the OCI spec doesn't document the error, but it appears EPERM is supposed to be used */
{ "SCMP_ACT_KILL", SCMP_ACT_KILL },
#ifdef SCMP_ACT_KILL_PROCESS
{ "SCMP_ACT_KILL_PROCESS", SCMP_ACT_KILL_PROCESS },
#endif
{ "SCMP_ACT_TRAP", SCMP_ACT_TRAP },
#ifdef SCMP_ACT_KILL_THREAD
{ "SCMP_ACT_KILL_THREAD", SCMP_ACT_KILL_THREAD },
#endif
#ifdef SCMP_ACT_LOG
{ "SCMP_ACT_LOG", SCMP_ACT_LOG },
#endif
{ "SCMP_ACT_TRAP", SCMP_ACT_TRAP },
/* We don't support SCMP_ACT_TRACE because that requires a tracer, and that doesn't really make sense
* here */

View File

@ -1964,3 +1964,18 @@ int seccomp_restrict_suid_sgid(void) {
return 0;
}
uint32_t scmp_act_kill_process(void) {
/* Returns SCMP_ACT_KILL_PROCESS if it's supported, and SCMP_ACT_KILL_THREAD otherwise. We never
* actually want to use SCMP_ACT_KILL_THREAD as its semantics are nuts (killing arbitrary threads of
* a program is just a bad idea), but on old kernels/old libseccomp it is all we have, and at least
* for single-threaded apps does the right thing. */
#ifdef SCMP_ACT_KILL_PROCESS
if (seccomp_api_get() >= 3)
return SCMP_ACT_KILL_PROCESS;
#endif
return SCMP_ACT_KILL; /* same as SCMP_ACT_KILL_THREAD */
}

View File

@ -104,3 +104,5 @@ extern const uint32_t seccomp_local_archs[];
DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release);
int parse_syscall_archs(char **l, Set **archs);
uint32_t scmp_act_kill_process(void);

View File

@ -33,6 +33,12 @@ static bool can_unshare;
typedef void (*test_function_t)(Manager *m);
static int cld_dumped_to_killed(int code) {
/* Depending on the system, seccomp version, … some signals might result in dumping, others in plain
* killing. Let's ignore the difference here, and map both cases to CLD_KILLED */
return code == CLD_DUMPED ? CLD_KILLED : code;
}
static void check(const char *func, Manager *m, Unit *unit, int status_expected, int code_expected) {
Service *service = NULL;
usec_t ts;
@ -62,18 +68,20 @@ static void check(const char *func, Manager *m, Unit *unit, int status_expected,
}
}
exec_status_dump(&service->main_exec_status, stdout, "\t");
if (cld_dumped_to_killed(service->main_exec_status.code) != cld_dumped_to_killed(code_expected)) {
log_error("%s: %s: exit code %d, expected %d",
func, unit->id,
service->main_exec_status.code, code_expected);
abort();
}
if (service->main_exec_status.status != status_expected) {
log_error("%s: %s: exit status %d, expected %d",
func, unit->id,
service->main_exec_status.status, status_expected);
abort();
}
if (service->main_exec_status.code != code_expected) {
log_error("%s: %s: exit code %d, expected %d",
func, unit->id,
service->main_exec_status.code, code_expected);
abort();
}
}
static bool check_nobody_user_and_group(void) {

View File

@ -635,7 +635,7 @@ static void test_load_syscall_filter_set_raw(void) {
assert_se(access("/", F_OK) >= 0);
assert_se(poll(NULL, 0, 0) == 0);
assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, NULL, SCMP_ACT_KILL, true) >= 0);
assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, NULL, scmp_act_kill_process(), true) >= 0);
assert_se(access("/", F_OK) >= 0);
assert_se(poll(NULL, 0, 0) == 0);

View File

@ -4,6 +4,7 @@ Description=Test for SystemCallFilter
[Service]
ExecStart=/bin/sh -c 'echo "This should not be seen"'
Type=oneshot
LimitCORE=0
SystemCallFilter=ioperm
SystemCallFilter=~ioperm
SystemCallFilter=ioperm

View File

@ -4,4 +4,5 @@ Description=Test for SystemCallFilter
[Service]
ExecStart=/bin/sh -c 'echo "This should not be seen"'
Type=oneshot
LimitCORE=0
SystemCallFilter=~write open execve exit_group close mmap munmap fstat DONOTEXIST

View File

@ -8,7 +8,7 @@
# (at your option) any later version.
[Unit]
Description=Remove the Offline System Updates symlink
Description=Remove the Offline System Updates Symlink
Documentation=man:systemd.special(7) man:systemd.offline-updates(7)
After=system-update.target
DefaultDependencies=no