core: rework syscall filter

- Allow configuration of an errno error to return from blacklisted
  syscalls, instead of immediately terminating a process.

- Fix parsing logic when libseccomp support is turned off

- Only keep the actual syscall set in the ExecContext, and generate the
  string version only on demand.
This commit is contained in:
Lennart Poettering 2014-02-12 18:28:21 +01:00
parent c0467cf387
commit 17df7223be
8 changed files with 274 additions and 127 deletions

View File

@ -327,7 +327,9 @@ have_seccomp=no
AC_ARG_ENABLE(seccomp, AS_HELP_STRING([--disable-seccomp], [Disable optional SECCOMP support]))
if test "x$enable_seccomp" != "xno"; then
PKG_CHECK_MODULES(SECCOMP, [libseccomp >= 1.0.0],
[AC_DEFINE(HAVE_SECCOMP, 1, [Define if seccomp is available]) have_seccomp=yes],
[AC_DEFINE(HAVE_SECCOMP, 1, [Define if seccomp is available])
have_seccomp=yes
M4_DEFINES="$M4_DEFINES -DHAVE_SECCOMP"],
[have_seccomp=no])
if test "x$have_seccomp" = "xno" -a "x$enable_seccomp" = "xyes"; then
AC_MSG_ERROR([*** seccomp support requested but libraries not found])

View File

@ -1001,7 +1001,7 @@
list of system call
names. If this setting is used, all
system calls executed by the unit
process except for the listed ones
processes except for the listed ones
will result in immediate process
termination with the
<constant>SIGSYS</constant> signal
@ -1031,23 +1031,47 @@
prior assignments will have no
effect.</para>
<para>If you specify both types of this option
(i.e. whitelisting and blacklisting) the first
encountered will take precedence and will
dictate the default action (termination
or approval of a system call). Then the
next occurrences of this option will add or
delete the listed system calls from the set
of the filtered system calls, depending of
its type and the default action (e.g. You
have started with a whitelisting of <function>
read</function> and <function>write</function>
and right after it add a blacklisting of
<function>write</function>, then <function>
write</function> will be removed from the set)
<para>If you specify both types of
this option (i.e. whitelisting and
blacklisting) the first encountered
will take precedence and will dictate
the default action (termination or
approval of a system call). Then the
next occurrences of this option will
add or delete the listed system calls
from the set of the filtered system
calls, depending of its type and the
default action (e.g. You have started
with a whitelisting of
<function>read</function> and
<function>write</function> and right
after it add a blacklisting of
<function>write</function>, then
<function>write</function> will be
removed from the set).
</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>SystemCallErrorNumber=</varname></term>
<listitem><para>Takes an
<literal>errno</literal> error number
name to return when the system call
filter configured with
<varname>SystemCallFilter=</varname>
is triggered, instead of terminating
the process immediately. Takes an
error name such as
<literal>EPERM</literal>,
<literal>EACCES</literal> or
<literal>EUCLEAN</literal>. When this
setting is not used, or when the empty
string is assigned the process will be
terminated immediately when the filter
is triggered.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -348,12 +348,66 @@ static int property_get_syscall_filter(
sd_bus_error *error) {
ExecContext *c = userdata;
_cleanup_strv_free_ char **l = NULL;
_cleanup_free_ char *t = NULL;
Iterator i;
void *id;
int r;
assert(bus);
assert(reply);
assert(c);
return sd_bus_message_append(reply, "s", c->syscall_filter_string);
SET_FOREACH(id, c->syscall_filter, i) {
char *name;
name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
if (!name)
continue;
r = strv_push(&l, name);
if (r < 0) {
free(name);
return -ENOMEM;
}
}
strv_sort(l);
t = strv_join(l, " ");
if (!t)
return -ENOMEM;
if (!c->syscall_whitelist) {
char *d;
d = strappend("~", t);
if (!d)
return -ENOMEM;
free(t);
t = d;
}
return sd_bus_message_append(reply, "s", t);
}
static int property_get_syscall_errno(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
ExecContext *c = userdata;
assert(bus);
assert(reply);
assert(c);
return sd_bus_message_append(reply, "i", (int32_t) c->syscall_errno);
}
const sd_bus_vtable bus_exec_vtable[] = {
@ -419,6 +473,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("IgnoreSIGPIPE", "b", bus_property_get_bool, offsetof(ExecContext, ignore_sigpipe), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NoNewPrivileges", "b", bus_property_get_bool, offsetof(ExecContext, no_new_privileges), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallFilter", "s", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};

View File

@ -40,11 +40,6 @@
#include <sys/poll.h>
#include <glob.h>
#include <libgen.h>
#ifdef HAVE_SECCOMP
#include <seccomp.h>
#include "set.h"
#endif
#undef basename
#ifdef HAVE_PAM
@ -55,6 +50,10 @@
#include <selinux/selinux.h>
#endif
#ifdef HAVE_SECCOMP
#include <seccomp.h>
#endif
#include "execute.h"
#include "strv.h"
#include "macro.h"
@ -76,6 +75,7 @@
#include "unit.h"
#include "async.h"
#include "selinux-util.h"
#include "errno-list.h"
#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
@ -937,29 +937,36 @@ static void rename_process_from_path(const char *path) {
}
#ifdef HAVE_SECCOMP
static int apply_seccomp(ExecContext *c) {
uint32_t action = SCMP_ACT_ALLOW;
uint32_t negative_action, action;
scmp_filter_ctx *seccomp;
Iterator i;
void *id;
int r;
assert(c);
c->syscall_filter = seccomp_init(c->syscall_filter_default_action);
if (!c->syscall_filter)
return -1;
negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
if (c->syscall_filter_default_action == SCMP_ACT_ALLOW)
action = SCMP_ACT_KILL;
seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
if (!seccomp)
return -ENOMEM;
SET_FOREACH(id, c->filtered_syscalls, i) {
int r = seccomp_rule_add(c->syscall_filter, action, PTR_TO_INT(id) - 1, 0);
action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
SET_FOREACH(id, c->syscall_filter, i) {
r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
if (r < 0) {
log_error("Failed to add syscall filter");
seccomp_release(seccomp);
return r;
}
}
return seccomp_load(c->syscall_filter);
r = seccomp_load(seccomp);
seccomp_release(seccomp);
return r;
}
#endif
@ -1541,7 +1548,7 @@ int exec_spawn(ExecCommand *command,
}
#ifdef HAVE_SECCOMP
if (context->filtered_syscalls) {
if (context->syscall_filter) {
err = apply_seccomp(context);
if (err < 0) {
r = EXIT_SECCOMP;
@ -1549,6 +1556,7 @@ int exec_spawn(ExecCommand *command,
}
}
#endif
#ifdef HAVE_SELINUX
if (context->selinux_context && use_selinux()) {
bool ignore;
@ -1729,19 +1737,9 @@ void exec_context_done(ExecContext *c) {
free(c->selinux_context);
c->selinux_context = NULL;
free(c->syscall_filter);
c->syscall_filter = NULL;
free(c->syscall_filter_string);
c->syscall_filter_string = NULL;
#ifdef HAVE_SECCOMP
if (c->syscall_filter) {
seccomp_release(c->syscall_filter);
c->syscall_filter = NULL;
}
set_free(c->filtered_syscalls);
c->filtered_syscalls = NULL;
set_free(c->syscall_filter);
c->syscall_filter = NULL;
#endif
}
@ -2115,6 +2113,38 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
fprintf(f,
"%sSELinuxContext: %s\n",
prefix, c->selinux_context);
if (c->syscall_filter) {
Iterator j;
void *id;
bool first = true;
fprintf(f,
"%sSystemCallFilter: \n",
prefix);
if (!c->syscall_whitelist)
fputc('~', f);
SET_FOREACH(id, c->syscall_filter, j) {
_cleanup_free_ char *name = NULL;
if (first)
first = false;
else
fputc(' ', f);
name = seccomp_syscall_resolve_num_arch(PTR_TO_INT(id)-1, SCMP_ARCH_NATIVE);
fputs(strna(name), f);
}
fputc('\n', f);
}
if (c->syscall_errno != 0)
fprintf(f,
"%sSystemCallErrorNumber: %s\n",
prefix, strna(errno_to_name(c->syscall_errno)));
}
void exec_status_start(ExecStatus *s, pid_t pid) {

View File

@ -167,12 +167,9 @@ struct ExecContext {
* don't enter a trigger loop. */
bool same_pgrp;
#ifdef HAVE_SECCOMP
scmp_filter_ctx syscall_filter;
Set *filtered_syscalls;
uint32_t syscall_filter_default_action;
#endif
char *syscall_filter_string;
Set *syscall_filter;
int syscall_errno;
bool syscall_whitelist:1;
bool oom_score_adjust_set:1;
bool nice_set:1;

View File

@ -49,7 +49,11 @@ $1.SecureBits, config_parse_exec_secure_bits, 0,
$1.CapabilityBoundingSet, config_parse_bounding_set, 0, offsetof($1, exec_context.capability_bounding_set_drop)
$1.TimerSlackNSec, config_parse_nsec, 0, offsetof($1, exec_context.timer_slack_nsec)
$1.NoNewPrivileges, config_parse_bool, 0, offsetof($1, exec_context.no_new_privileges)
$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
m4_ifdef(`HAVE_SECCOMP',
`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)',
`$1.SystemCallFilter, config_parse_warn_compat, 0, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, 0, 0')
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
$1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit)

View File

@ -33,10 +33,9 @@
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/resource.h>
#ifdef HAVE_SECCOMP
#include <seccomp.h>
#include "set.h"
#endif
#include "sd-messages.h"
@ -56,18 +55,20 @@
#include "cgroup.h"
#include "bus-util.h"
#include "bus-error.h"
#include "errno-list.h"
#if !defined(HAVE_SYSV_COMPAT) || !defined(HAVE_SECCOMP)
int config_parse_warn_compat(const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
int config_parse_warn_compat(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
log_syntax(unit, LOG_DEBUG, filename, line, EINVAL,
"Support for option %s= has been disabled at compile time and is ignored",
@ -1921,33 +1922,31 @@ int config_parse_documentation(const char *unit,
}
#ifdef HAVE_SECCOMP
int config_parse_syscall_filter(const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
int config_parse_syscall_filter(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
static const char default_syscalls[] =
"execve\0"
"exit\0"
"exit_group\0"
"rt_sigreturn\0"
"sigreturn\0";
ExecContext *c = data;
Unit *u = userdata;
bool invert = false;
char *w;
char *w, *state;
size_t l;
char *state;
_cleanup_strv_free_ char **syscalls = strv_new(NULL, NULL);
_cleanup_free_ char *sorted_syscalls = NULL;
uint32_t action = SCMP_ACT_ALLOW;
Iterator i;
void *e;
static char const *default_syscalls[] = {"execve",
"exit",
"exit_group",
"rt_sigreturn",
"sigreturn",
NULL};
int r;
assert(filename);
assert(lvalue);
@ -1956,42 +1955,51 @@ int config_parse_syscall_filter(const char *unit,
if (isempty(rvalue)) {
/* Empty assignment resets the list */
set_free(c->filtered_syscalls);
c->filtered_syscalls= NULL;
free(c->syscall_filter_string);
c->syscall_filter_string = NULL;
set_free(c->syscall_filter);
c->syscall_filter = NULL;
c->syscall_whitelist = false;
return 0;
}
if (rvalue[0] == '~') {
invert = true;
action = SCMP_ACT_KILL;
rvalue++;
}
if (!c->filtered_syscalls) {
c->filtered_syscalls = set_new(trivial_hash_func, trivial_compare_func);
if (!c->syscall_filter) {
c->syscall_filter = set_new(trivial_hash_func, trivial_compare_func);
if (!c->syscall_filter)
return log_oom();
if (invert)
c->syscall_filter_default_action = SCMP_ACT_ALLOW;
/* Allow everything but the ones listed */
c->syscall_whitelist = false;
else {
char const **syscall;
const char *i;
c->syscall_filter_default_action = SCMP_ACT_KILL;
/* Allow nothing but the ones listed */
c->syscall_whitelist = true;
/* accept default syscalls if we are on a whitelist */
STRV_FOREACH(syscall, default_syscalls) {
int id = seccomp_syscall_resolve_name(*syscall);
/* Accept default syscalls if we are on a whitelist */
NULSTR_FOREACH(i, default_syscalls) {
int id;
id = seccomp_syscall_resolve_name(i);
if (id < 0)
continue;
set_replace(c->filtered_syscalls, INT_TO_PTR(id + 1));
r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
if (r == -EEXIST)
continue;
if (r < 0)
return log_oom();
}
}
}
FOREACH_WORD_QUOTED(w, l, rvalue, state) {
int id;
_cleanup_free_ char *t = NULL;
int id;
t = strndup(w, l);
if (!t)
@ -1999,36 +2007,62 @@ int config_parse_syscall_filter(const char *unit,
id = seccomp_syscall_resolve_name(t);
if (id < 0) {
log_syntax(unit, LOG_ERR, filename, line, EINVAL,
"Failed to parse syscall, ignoring: %s", t);
log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse system call, ignoring: %s", t);
continue;
}
/* If we previously wanted to forbid a syscall
* and now we want to allow it, then remove it from the list
* libseccomp will also return -EPERM if we try to add
* a rule with the same action as the default
/* If we previously wanted to forbid a syscall and now
* we want to allow it, then remove it from the list
*/
if (action == c->syscall_filter_default_action)
set_remove(c->filtered_syscalls, INT_TO_PTR(id + 1));
else
set_replace(c->filtered_syscalls, INT_TO_PTR(id + 1));
if (!invert == c->syscall_whitelist) {
r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
if (r == -EEXIST)
continue;
if (r < 0)
return log_oom();
} else
set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
}
SET_FOREACH(e, c->filtered_syscalls, i) {
char *name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(e) - 1);
strv_push(&syscalls, name);
}
sorted_syscalls = strv_join(strv_sort(syscalls), " ");
if (invert)
c->syscall_filter_string = strv_join(STRV_MAKE("~", sorted_syscalls, NULL), "");
else
c->syscall_filter_string = strdup(sorted_syscalls);
c->no_new_privileges = true;
return 0;
}
int config_parse_syscall_errno(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
ExecContext *c = data;
int e;
assert(filename);
assert(lvalue);
assert(rvalue);
if (isempty(rvalue)) {
/* Empty assignment resets to KILL */
c->syscall_errno = 0;
return 0;
}
e = errno_from_name(rvalue);
if (e < 0) {
log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse error number, ignoring: %s", rvalue);
return 0;
}
c->syscall_errno = e;
return 0;
}
#endif
int config_parse_unit_slice(
@ -2742,6 +2776,9 @@ void unit_dump_config_items(FILE *f) {
const ConfigParserCallback callback;
const char *rvalue;
} table[] = {
#if !defined(HAVE_SYSV_COMPAT) || !defined(HAVE_SECCOMP)
{ config_parse_warn_compat, "NOTSUPPORTED" },
#endif
{ config_parse_int, "INTEGER" },
{ config_parse_unsigned, "UNSIGNED" },
{ config_parse_bytes_size, "SIZE" },
@ -2773,8 +2810,6 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_service_restart, "SERVICERESTART" },
#ifdef HAVE_SYSV_COMPAT
{ config_parse_sysv_priority, "SYSVPRIORITY" },
#else
{ config_parse_warn_compat, "NOTSUPPORTED" },
#endif
{ config_parse_kill_mode, "KILLMODE" },
{ config_parse_kill_signal, "SIGNAL" },
@ -2803,9 +2838,8 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_service_sockets, "SOCKETS" },
{ config_parse_environ, "ENVIRON" },
#ifdef HAVE_SECCOMP
{ config_parse_syscall_filter, "SYSCALL" },
#else
{ config_parse_warn_compat, "NOTSUPPORTED" },
{ config_parse_syscall_filter, "SYSCALLS" },
{ config_parse_syscall_errno, "ERRNO" },
#endif
{ config_parse_cpu_shares, "SHARES" },
{ config_parse_memory_limit, "LIMIT" },

View File

@ -74,6 +74,7 @@ int config_parse_notify_access(const char *unit, const char *filename, unsigned
int config_parse_start_limit_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_syscall_errno(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);