Merge pull request #12430 from poettering/seccomp-kill-process
use SCMP_ACT_KILL_PROCESS for SystemCallFilters=
This commit is contained in:
commit
de26d715e3
16
NEWS
16
NEWS
|
@ -2,6 +2,22 @@ systemd System and Service Manager
|
||||||
|
|
||||||
CHANGES WITH 243 in spe:
|
CHANGES WITH 243 in spe:
|
||||||
|
|
||||||
|
* Previously, filters defined with SystemCallFilter= would have the
|
||||||
|
effect that an calling an offending system call would terminate the
|
||||||
|
calling thread. This behaviour never made much sense, since killing
|
||||||
|
individual threads of unexpecting processes is likely to create more
|
||||||
|
problems than it solves. With this release the default action changed
|
||||||
|
from killing the thread to killing the whole process. For this to
|
||||||
|
work correctly both a kernel version (>= 4.14) and a libseccomp
|
||||||
|
version (>= 2.4.0) supporting this new seccomp action is required. If
|
||||||
|
an older kernel or libseccomp is used the old behaviour continues to
|
||||||
|
be used. This change does not affect any services that have no system
|
||||||
|
call filters defined, or that use SystemCallErrorNumber= (and thus
|
||||||
|
see EPERM or another error instead of being killed when calling an
|
||||||
|
offending system call). Note that systemd documentation always
|
||||||
|
claimed that the whole process is killed. With this change behaviour
|
||||||
|
is thus adjusted to match the documentation.
|
||||||
|
|
||||||
* The "kernel.pid_max" sysctl is now bumped to 4194304 by default,
|
* The "kernel.pid_max" sysctl is now bumped to 4194304 by default,
|
||||||
i.e. the full 22bit range the kernel allows, up from the old 16bit
|
i.e. the full 22bit range the kernel allows, up from the old 16bit
|
||||||
range. This should improve security and robustness a bit, as PID
|
range. This should improve security and robustness a bit, as PID
|
||||||
|
|
|
@ -1439,7 +1439,7 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_
|
||||||
if (skip_seccomp_unavailable(u, "SystemCallFilter="))
|
if (skip_seccomp_unavailable(u, "SystemCallFilter="))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
|
negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
|
||||||
|
|
||||||
if (c->syscall_whitelist) {
|
if (c->syscall_whitelist) {
|
||||||
default_action = negative_action;
|
default_action = negative_action;
|
||||||
|
|
|
@ -1656,13 +1656,19 @@ static int oci_seccomp_action_from_string(const char *name, uint32_t *ret) {
|
||||||
const char *name;
|
const char *name;
|
||||||
uint32_t action;
|
uint32_t action;
|
||||||
} table[] = {
|
} table[] = {
|
||||||
{ "SCMP_ACT_ALLOW", SCMP_ACT_ALLOW },
|
{ "SCMP_ACT_ALLOW", SCMP_ACT_ALLOW },
|
||||||
{ "SCMP_ACT_ERRNO", SCMP_ACT_ERRNO(EPERM) }, /* the OCI spec doesn't document the error, but it appears EPERM is supposed to be used */
|
{ "SCMP_ACT_ERRNO", SCMP_ACT_ERRNO(EPERM) }, /* the OCI spec doesn't document the error, but it appears EPERM is supposed to be used */
|
||||||
{ "SCMP_ACT_KILL", SCMP_ACT_KILL },
|
{ "SCMP_ACT_KILL", SCMP_ACT_KILL },
|
||||||
#ifdef SCMP_ACT_LOG
|
#ifdef SCMP_ACT_KILL_PROCESS
|
||||||
{ "SCMP_ACT_LOG", SCMP_ACT_LOG },
|
{ "SCMP_ACT_KILL_PROCESS", SCMP_ACT_KILL_PROCESS },
|
||||||
#endif
|
#endif
|
||||||
{ "SCMP_ACT_TRAP", SCMP_ACT_TRAP },
|
#ifdef SCMP_ACT_KILL_THREAD
|
||||||
|
{ "SCMP_ACT_KILL_THREAD", SCMP_ACT_KILL_THREAD },
|
||||||
|
#endif
|
||||||
|
#ifdef SCMP_ACT_LOG
|
||||||
|
{ "SCMP_ACT_LOG", SCMP_ACT_LOG },
|
||||||
|
#endif
|
||||||
|
{ "SCMP_ACT_TRAP", SCMP_ACT_TRAP },
|
||||||
|
|
||||||
/* We don't support SCMP_ACT_TRACE because that requires a tracer, and that doesn't really make sense
|
/* We don't support SCMP_ACT_TRACE because that requires a tracer, and that doesn't really make sense
|
||||||
* here */
|
* here */
|
||||||
|
|
|
@ -1964,3 +1964,18 @@ int seccomp_restrict_suid_sgid(void) {
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t scmp_act_kill_process(void) {
|
||||||
|
|
||||||
|
/* Returns SCMP_ACT_KILL_PROCESS if it's supported, and SCMP_ACT_KILL_THREAD otherwise. We never
|
||||||
|
* actually want to use SCMP_ACT_KILL_THREAD as its semantics are nuts (killing arbitrary threads of
|
||||||
|
* a program is just a bad idea), but on old kernels/old libseccomp it is all we have, and at least
|
||||||
|
* for single-threaded apps does the right thing. */
|
||||||
|
|
||||||
|
#ifdef SCMP_ACT_KILL_PROCESS
|
||||||
|
if (seccomp_api_get() >= 3)
|
||||||
|
return SCMP_ACT_KILL_PROCESS;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return SCMP_ACT_KILL; /* same as SCMP_ACT_KILL_THREAD */
|
||||||
|
}
|
||||||
|
|
|
@ -104,3 +104,5 @@ extern const uint32_t seccomp_local_archs[];
|
||||||
DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release);
|
DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release);
|
||||||
|
|
||||||
int parse_syscall_archs(char **l, Set **archs);
|
int parse_syscall_archs(char **l, Set **archs);
|
||||||
|
|
||||||
|
uint32_t scmp_act_kill_process(void);
|
||||||
|
|
|
@ -33,6 +33,12 @@ static bool can_unshare;
|
||||||
|
|
||||||
typedef void (*test_function_t)(Manager *m);
|
typedef void (*test_function_t)(Manager *m);
|
||||||
|
|
||||||
|
static int cld_dumped_to_killed(int code) {
|
||||||
|
/* Depending on the system, seccomp version, … some signals might result in dumping, others in plain
|
||||||
|
* killing. Let's ignore the difference here, and map both cases to CLD_KILLED */
|
||||||
|
return code == CLD_DUMPED ? CLD_KILLED : code;
|
||||||
|
}
|
||||||
|
|
||||||
static void check(const char *func, Manager *m, Unit *unit, int status_expected, int code_expected) {
|
static void check(const char *func, Manager *m, Unit *unit, int status_expected, int code_expected) {
|
||||||
Service *service = NULL;
|
Service *service = NULL;
|
||||||
usec_t ts;
|
usec_t ts;
|
||||||
|
@ -62,18 +68,20 @@ static void check(const char *func, Manager *m, Unit *unit, int status_expected,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
exec_status_dump(&service->main_exec_status, stdout, "\t");
|
exec_status_dump(&service->main_exec_status, stdout, "\t");
|
||||||
|
|
||||||
|
if (cld_dumped_to_killed(service->main_exec_status.code) != cld_dumped_to_killed(code_expected)) {
|
||||||
|
log_error("%s: %s: exit code %d, expected %d",
|
||||||
|
func, unit->id,
|
||||||
|
service->main_exec_status.code, code_expected);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
if (service->main_exec_status.status != status_expected) {
|
if (service->main_exec_status.status != status_expected) {
|
||||||
log_error("%s: %s: exit status %d, expected %d",
|
log_error("%s: %s: exit status %d, expected %d",
|
||||||
func, unit->id,
|
func, unit->id,
|
||||||
service->main_exec_status.status, status_expected);
|
service->main_exec_status.status, status_expected);
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
if (service->main_exec_status.code != code_expected) {
|
|
||||||
log_error("%s: %s: exit code %d, expected %d",
|
|
||||||
func, unit->id,
|
|
||||||
service->main_exec_status.code, code_expected);
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool check_nobody_user_and_group(void) {
|
static bool check_nobody_user_and_group(void) {
|
||||||
|
|
|
@ -635,7 +635,7 @@ static void test_load_syscall_filter_set_raw(void) {
|
||||||
assert_se(access("/", F_OK) >= 0);
|
assert_se(access("/", F_OK) >= 0);
|
||||||
assert_se(poll(NULL, 0, 0) == 0);
|
assert_se(poll(NULL, 0, 0) == 0);
|
||||||
|
|
||||||
assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, NULL, SCMP_ACT_KILL, true) >= 0);
|
assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, NULL, scmp_act_kill_process(), true) >= 0);
|
||||||
assert_se(access("/", F_OK) >= 0);
|
assert_se(access("/", F_OK) >= 0);
|
||||||
assert_se(poll(NULL, 0, 0) == 0);
|
assert_se(poll(NULL, 0, 0) == 0);
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ Description=Test for SystemCallFilter
|
||||||
[Service]
|
[Service]
|
||||||
ExecStart=/bin/sh -c 'echo "This should not be seen"'
|
ExecStart=/bin/sh -c 'echo "This should not be seen"'
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
|
LimitCORE=0
|
||||||
SystemCallFilter=ioperm
|
SystemCallFilter=ioperm
|
||||||
SystemCallFilter=~ioperm
|
SystemCallFilter=~ioperm
|
||||||
SystemCallFilter=ioperm
|
SystemCallFilter=ioperm
|
||||||
|
|
|
@ -4,4 +4,5 @@ Description=Test for SystemCallFilter
|
||||||
[Service]
|
[Service]
|
||||||
ExecStart=/bin/sh -c 'echo "This should not be seen"'
|
ExecStart=/bin/sh -c 'echo "This should not be seen"'
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
|
LimitCORE=0
|
||||||
SystemCallFilter=~write open execve exit_group close mmap munmap fstat DONOTEXIST
|
SystemCallFilter=~write open execve exit_group close mmap munmap fstat DONOTEXIST
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
# (at your option) any later version.
|
# (at your option) any later version.
|
||||||
|
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Remove the Offline System Updates symlink
|
Description=Remove the Offline System Updates Symlink
|
||||||
Documentation=man:systemd.special(7) man:systemd.offline-updates(7)
|
Documentation=man:systemd.special(7) man:systemd.offline-updates(7)
|
||||||
After=system-update.target
|
After=system-update.target
|
||||||
DefaultDependencies=no
|
DefaultDependencies=no
|
||||||
|
|
Loading…
Reference in a new issue