exec: SystemCallLog= directive

With new directive SystemCallLog= it's possible to list system calls to be
logged. This can be used for auditing or temporarily when constructing system
call filters.

---
v5: drop intermediary, update HASHMAP_FOREACH_KEY() use
v4: skip useless debug messages, actually parse directive
v3: don't declare unused variables with old libseccomp
v2: fix build without seccomp or old libseccomp
This commit is contained in:
Topi Miettinen 2020-08-30 21:56:13 +03:00
parent 005bfaf118
commit 9df2cdd8ec
11 changed files with 280 additions and 2 deletions

View File

@ -156,6 +156,7 @@ All execution-related settings are available for transient units.
✓ SystemCallFilter=
✓ SystemCallArchitectures=
✓ SystemCallErrorNumber=
✓ SystemCallLog=
✓ MemoryDenyWriteExecute=
✓ RestrictNamespaces=
✓ RestrictRealtime=

View File

@ -2136,6 +2136,21 @@ SystemCallErrorNumber=EPERM</programlisting>
details.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>SystemCallLog=</varname></term>
<listitem><para>Takes a space-separated list of system call names. If this setting is used, all
system calls executed by the unit processes for the listed ones will be logged. If the first
character of the list is <literal>~</literal>, the effect is inverted: all system calls except the
listed system calls will be logged. If running in user mode, or in system mode, but without the
<constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=nobody</varname>),
<varname>NoNewPrivileges=yes</varname> is implied. This feature makes use of the Secure Computing
Mode 2 interfaces of the kernel ('seccomp filtering') and is useful for auditing or setting up a
minimal sandboxing environment. This option may be specified more than once, in which case the filter
masks are merged. If the empty string is assigned, the filter is reset, all prior assignments will
have no effect. This does not affect commands prefixed with <literal>+</literal>.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -415,6 +415,58 @@ static int property_get_syscall_filter(
return sd_bus_message_close_container(reply);
}
static int property_get_syscall_log(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
ExecContext *c = userdata;
_cleanup_strv_free_ char **l = NULL;
int r;
#if HAVE_SECCOMP
void *id, *val;
#endif
assert(bus);
assert(reply);
assert(c);
r = sd_bus_message_open_container(reply, 'r', "bas");
if (r < 0)
return r;
r = sd_bus_message_append(reply, "b", c->syscall_log_allow_list);
if (r < 0)
return r;
#if HAVE_SECCOMP
HASHMAP_FOREACH_KEY(val, id, c->syscall_log) {
char *name = NULL;
name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
if (!name)
continue;
r = strv_consume(&l, name);
if (r < 0)
return r;
}
#endif
strv_sort(l);
r = sd_bus_message_append_strv(reply, l);
if (r < 0)
return r;
return sd_bus_message_close_container(reply);
}
static int property_get_syscall_archs(
sd_bus *bus,
const char *path,
@ -1068,6 +1120,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("SystemCallFilter", "(bas)", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallErrorNumber", "i", bus_property_get_int, offsetof(ExecContext, syscall_errno), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallLog", "(bas)", property_get_syscall_log, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Personality", "s", property_get_personality, offsetof(ExecContext, personality), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@ -2230,6 +2283,76 @@ int bus_exec_context_set_transient_property(
return 1;
} else if (streq(name, "SystemCallLog")) {
int allow_list;
_cleanup_strv_free_ char **l = NULL;
r = sd_bus_message_enter_container(message, 'r', "bas");
if (r < 0)
return r;
r = sd_bus_message_read(message, "b", &allow_list);
if (r < 0)
return r;
r = sd_bus_message_read_strv(message, &l);
if (r < 0)
return r;
r = sd_bus_message_exit_container(message);
if (r < 0)
return r;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
_cleanup_free_ char *joined = NULL;
SeccompParseFlags invert_flag = allow_list ? 0 : SECCOMP_PARSE_INVERT;
char **s;
if (strv_isempty(l)) {
c->syscall_log_allow_list = false;
c->syscall_log = hashmap_free(c->syscall_log);
unit_write_settingf(u, flags, name, "SystemCallLog=");
return 1;
}
if (!c->syscall_log) {
c->syscall_log = hashmap_new(NULL);
if (!c->syscall_log)
return log_oom();
c->syscall_log_allow_list = allow_list;
}
STRV_FOREACH(s, l) {
_cleanup_free_ char *n = NULL;
int e;
r = parse_syscall_and_errno(*s, &n, &e);
if (r < 0)
return r;
r = seccomp_parse_syscall_filter(n,
0, /* errno not used */
c->syscall_log,
SECCOMP_PARSE_LOG | SECCOMP_PARSE_PERMISSIVE |
invert_flag |
(c->syscall_log_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0),
u->id,
NULL, 0);
if (r < 0)
return r;
}
joined = strv_join(l, " ");
if (!joined)
return -ENOMEM;
unit_write_settingf(u, flags, name, "SystemCallLog=%s%s", allow_list ? "" : "~", joined);
}
return 1;
} else if (streq(name, "SystemCallArchitectures")) {
_cleanup_strv_free_ char **l = NULL;

View File

@ -1407,6 +1407,13 @@ static bool context_has_syscall_filters(const ExecContext *c) {
!hashmap_isempty(c->syscall_filter);
}
static bool context_has_syscall_logs(const ExecContext *c) {
assert(c);
return c->syscall_log_allow_list ||
!hashmap_isempty(c->syscall_log);
}
static bool context_has_no_new_privileges(const ExecContext *c) {
assert(c);
@ -1428,6 +1435,7 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
c->protect_kernel_logs ||
c->private_devices ||
context_has_syscall_filters(c) ||
context_has_syscall_logs(c) ||
!set_isempty(c->syscall_archs) ||
c->lock_personality ||
c->protect_hostname;
@ -1484,6 +1492,39 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_
return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
}
static int apply_syscall_log(const Unit* u, const ExecContext *c) {
#ifdef SCMP_ACT_LOG
uint32_t default_action, action;
#endif
assert(u);
assert(c);
if (!context_has_syscall_logs(c))
return 0;
#ifdef SCMP_ACT_LOG
if (skip_seccomp_unavailable(u, "SystemCallLog="))
return 0;
if (c->syscall_log_allow_list) {
/* Log nothing but the ones listed */
default_action = SCMP_ACT_ALLOW;
action = SCMP_ACT_LOG;
} else {
/* Log everything but the ones listed */
default_action = SCMP_ACT_LOG;
action = SCMP_ACT_ALLOW;
}
return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_log, action, false);
#else
/* old libseccomp */
log_unit_debug(u, "SECCOMP feature SCMP_ACT_LOG not available, skipping SystemCallLog=");
return 0;
#endif
}
static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
assert(u);
assert(c);
@ -4438,6 +4479,12 @@ static int exec_child(
return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
}
r = apply_syscall_log(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return log_unit_error_errno(unit, r, "Failed to apply system call log filters: %m");
}
/* This really should remain the last step before the execve(), to make sure our own code is unaffected
* by the filter as little as possible. */
r = apply_syscall_filter(unit, context, needs_ambient_hack);

View File

@ -302,6 +302,9 @@ struct ExecContext {
int syscall_errno;
bool syscall_allow_list:1;
Hashmap *syscall_log;
bool syscall_log_allow_list:1; /* Log listed system calls */
bool address_families_allow_list:1;
Set *address_families;

View File

@ -79,6 +79,7 @@ m4_ifdef(`HAVE_SECCOMP',
`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)
$1.SystemCallLog, config_parse_syscall_log, 0, offsetof($1, exec_context)
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context)
$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
@ -88,6 +89,7 @@ $1.LockPersonality, config_parse_bool, 0,
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallLog, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0

View File

@ -3197,6 +3197,86 @@ int config_parse_syscall_filter(
}
}
int config_parse_syscall_log(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
ExecContext *c = data;
_unused_ const Unit *u = userdata;
bool invert = false;
const char *p;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(u);
if (isempty(rvalue)) {
/* Empty assignment resets the list */
c->syscall_log = hashmap_free(c->syscall_log);
c->syscall_log_allow_list = false;
return 0;
}
if (rvalue[0] == '~') {
invert = true;
rvalue++;
}
if (!c->syscall_log) {
c->syscall_log = hashmap_new(NULL);
if (!c->syscall_log)
return log_oom();
if (invert)
/* Log everything but the ones listed */
c->syscall_log_allow_list = false;
else
/* Log nothing but the ones listed */
c->syscall_log_allow_list = true;
}
p = rvalue;
for (;;) {
_cleanup_free_ char *word = NULL, *name = NULL;
int num;
r = extract_first_word(&p, &word, NULL, 0);
if (r == 0)
return 0;
if (r == -ENOMEM)
return log_oom();
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
return 0;
}
r = parse_syscall_and_errno(word, &name, &num);
if (r < 0 || num >= 0) { /* errno code not allowed */
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse syscall, ignoring: %s", word);
continue;
}
r = seccomp_parse_syscall_filter(
name, 0, c->syscall_log,
SECCOMP_PARSE_LOG|SECCOMP_PARSE_PERMISSIVE|
(invert ? SECCOMP_PARSE_INVERT : 0)|
(c->syscall_log_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0),
unit, filename, line);
if (r < 0)
return r;
}
}
int config_parse_syscall_archs(
const char *unit,
const char *filename,
@ -5444,6 +5524,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_syscall_filter, "SYSCALLS" },
{ config_parse_syscall_archs, "ARCHS" },
{ config_parse_syscall_errno, "ERRNO" },
{ config_parse_syscall_log, "SYSCALLS" },
{ config_parse_address_families, "FAMILIES" },
{ config_parse_restrict_namespaces, "NAMESPACES" },
#endif

View File

@ -65,6 +65,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_unit_requires_mounts_for);
CONFIG_PARSER_PROTOTYPE(config_parse_syscall_filter);
CONFIG_PARSER_PROTOTYPE(config_parse_syscall_archs);
CONFIG_PARSER_PROTOTYPE(config_parse_syscall_errno);
CONFIG_PARSER_PROTOTYPE(config_parse_syscall_log);
CONFIG_PARSER_PROTOTYPE(config_parse_environ);
CONFIG_PARSER_PROTOTYPE(config_parse_pass_environ);
CONFIG_PARSER_PROTOTYPE(config_parse_unset_environ);

View File

@ -1299,7 +1299,8 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
}
if (STR_IN_SET(field, "RestrictAddressFamilies",
"SystemCallFilter")) {
"SystemCallFilter",
"SystemCallLog")) {
int allow_list = 1;
const char *p = eq;

View File

@ -1073,6 +1073,10 @@ int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, u
if (error == SECCOMP_ERROR_NUMBER_KILL)
a = scmp_act_kill_process();
#ifdef SCMP_ACT_LOG
else if (action == SCMP_ACT_LOG)
a = SCMP_ACT_LOG;
#endif
else if (action != SCMP_ACT_ALLOW && error >= 0)
a = SCMP_ACT_ERRNO(error);

View File

@ -4822,7 +4822,7 @@ static int print_property(const char *name, const char *expected_value, sd_bus_m
return 1;
} else if (STR_IN_SET(name, "SystemCallFilter", "RestrictAddressFamilies")) {
} else if (STR_IN_SET(name, "SystemCallFilter", "SystemCallLog", "RestrictAddressFamilies")) {
_cleanup_strv_free_ char **l = NULL;
int allow_list;