From 9df2cdd8ec1cc223d74fd06eeb89d822f72f61c3 Mon Sep 17 00:00:00 2001 From: Topi Miettinen Date: Sun, 30 Aug 2020 21:56:13 +0300 Subject: [PATCH] exec: SystemCallLog= directive With new directive SystemCallLog= it's possible to list system calls to be logged. This can be used for auditing or temporarily when constructing system call filters. --- v5: drop intermediary, update HASHMAP_FOREACH_KEY() use v4: skip useless debug messages, actually parse directive v3: don't declare unused variables with old libseccomp v2: fix build without seccomp or old libseccomp --- docs/TRANSIENT-SETTINGS.md | 1 + man/systemd.exec.xml | 15 ++++ src/core/dbus-execute.c | 123 ++++++++++++++++++++++++++ src/core/execute.c | 47 ++++++++++ src/core/execute.h | 3 + src/core/load-fragment-gperf.gperf.m4 | 2 + src/core/load-fragment.c | 81 +++++++++++++++++ src/core/load-fragment.h | 1 + src/shared/bus-unit-util.c | 3 +- src/shared/seccomp-util.c | 4 + src/systemctl/systemctl.c | 2 +- 11 files changed, 280 insertions(+), 2 deletions(-) diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md index 89f0a7e80d..f8ff413d28 100644 --- a/docs/TRANSIENT-SETTINGS.md +++ b/docs/TRANSIENT-SETTINGS.md @@ -156,6 +156,7 @@ All execution-related settings are available for transient units. ✓ SystemCallFilter= ✓ SystemCallArchitectures= ✓ SystemCallErrorNumber= +✓ SystemCallLog= ✓ MemoryDenyWriteExecute= ✓ RestrictNamespaces= ✓ RestrictRealtime= diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 46fa900894..d0bb5fc962 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -2136,6 +2136,21 @@ SystemCallErrorNumber=EPERM details. + + SystemCallLog= + + Takes a space-separated list of system call names. If this setting is used, all + system calls executed by the unit processes for the listed ones will be logged. If the first + character of the list is ~, the effect is inverted: all system calls except the + listed system calls will be logged. If running in user mode, or in system mode, but without the + CAP_SYS_ADMIN capability (e.g. setting User=nobody), + NoNewPrivileges=yes is implied. This feature makes use of the Secure Computing + Mode 2 interfaces of the kernel ('seccomp filtering') and is useful for auditing or setting up a + minimal sandboxing environment. This option may be specified more than once, in which case the filter + masks are merged. If the empty string is assigned, the filter is reset, all prior assignments will + have no effect. This does not affect commands prefixed with +. + + diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 05d46520af..b3d417cac7 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -415,6 +415,58 @@ static int property_get_syscall_filter( return sd_bus_message_close_container(reply); } +static int property_get_syscall_log( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + ExecContext *c = userdata; + _cleanup_strv_free_ char **l = NULL; + int r; + +#if HAVE_SECCOMP + void *id, *val; +#endif + + assert(bus); + assert(reply); + assert(c); + + r = sd_bus_message_open_container(reply, 'r', "bas"); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "b", c->syscall_log_allow_list); + if (r < 0) + return r; + +#if HAVE_SECCOMP + HASHMAP_FOREACH_KEY(val, id, c->syscall_log) { + char *name = NULL; + + name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1); + if (!name) + continue; + + r = strv_consume(&l, name); + if (r < 0) + return r; + } +#endif + + strv_sort(l); + + r = sd_bus_message_append_strv(reply, l); + if (r < 0) + return r; + + return sd_bus_message_close_container(reply); +} + static int property_get_syscall_archs( sd_bus *bus, const char *path, @@ -1068,6 +1120,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("SystemCallFilter", "(bas)", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SystemCallErrorNumber", "i", bus_property_get_int, offsetof(ExecContext, syscall_errno), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("SystemCallLog", "(bas)", property_get_syscall_log, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Personality", "s", property_get_personality, offsetof(ExecContext, personality), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST), @@ -2230,6 +2283,76 @@ int bus_exec_context_set_transient_property( return 1; + } else if (streq(name, "SystemCallLog")) { + int allow_list; + _cleanup_strv_free_ char **l = NULL; + + r = sd_bus_message_enter_container(message, 'r', "bas"); + if (r < 0) + return r; + + r = sd_bus_message_read(message, "b", &allow_list); + if (r < 0) + return r; + + r = sd_bus_message_read_strv(message, &l); + if (r < 0) + return r; + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + _cleanup_free_ char *joined = NULL; + SeccompParseFlags invert_flag = allow_list ? 0 : SECCOMP_PARSE_INVERT; + char **s; + + if (strv_isempty(l)) { + c->syscall_log_allow_list = false; + c->syscall_log = hashmap_free(c->syscall_log); + + unit_write_settingf(u, flags, name, "SystemCallLog="); + return 1; + } + + if (!c->syscall_log) { + c->syscall_log = hashmap_new(NULL); + if (!c->syscall_log) + return log_oom(); + + c->syscall_log_allow_list = allow_list; + } + + STRV_FOREACH(s, l) { + _cleanup_free_ char *n = NULL; + int e; + + r = parse_syscall_and_errno(*s, &n, &e); + if (r < 0) + return r; + + r = seccomp_parse_syscall_filter(n, + 0, /* errno not used */ + c->syscall_log, + SECCOMP_PARSE_LOG | SECCOMP_PARSE_PERMISSIVE | + invert_flag | + (c->syscall_log_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0), + u->id, + NULL, 0); + if (r < 0) + return r; + } + + joined = strv_join(l, " "); + if (!joined) + return -ENOMEM; + + unit_write_settingf(u, flags, name, "SystemCallLog=%s%s", allow_list ? "" : "~", joined); + } + + return 1; + } else if (streq(name, "SystemCallArchitectures")) { _cleanup_strv_free_ char **l = NULL; diff --git a/src/core/execute.c b/src/core/execute.c index d9fdebcd70..be35093969 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1407,6 +1407,13 @@ static bool context_has_syscall_filters(const ExecContext *c) { !hashmap_isempty(c->syscall_filter); } +static bool context_has_syscall_logs(const ExecContext *c) { + assert(c); + + return c->syscall_log_allow_list || + !hashmap_isempty(c->syscall_log); +} + static bool context_has_no_new_privileges(const ExecContext *c) { assert(c); @@ -1428,6 +1435,7 @@ static bool context_has_no_new_privileges(const ExecContext *c) { c->protect_kernel_logs || c->private_devices || context_has_syscall_filters(c) || + context_has_syscall_logs(c) || !set_isempty(c->syscall_archs) || c->lock_personality || c->protect_hostname; @@ -1484,6 +1492,39 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false); } +static int apply_syscall_log(const Unit* u, const ExecContext *c) { +#ifdef SCMP_ACT_LOG + uint32_t default_action, action; +#endif + + assert(u); + assert(c); + + if (!context_has_syscall_logs(c)) + return 0; + +#ifdef SCMP_ACT_LOG + if (skip_seccomp_unavailable(u, "SystemCallLog=")) + return 0; + + if (c->syscall_log_allow_list) { + /* Log nothing but the ones listed */ + default_action = SCMP_ACT_ALLOW; + action = SCMP_ACT_LOG; + } else { + /* Log everything but the ones listed */ + default_action = SCMP_ACT_LOG; + action = SCMP_ACT_ALLOW; + } + + return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_log, action, false); +#else + /* old libseccomp */ + log_unit_debug(u, "SECCOMP feature SCMP_ACT_LOG not available, skipping SystemCallLog="); + return 0; +#endif +} + static int apply_syscall_archs(const Unit *u, const ExecContext *c) { assert(u); assert(c); @@ -4438,6 +4479,12 @@ static int exec_child( return log_unit_error_errno(unit, r, "Failed to lock personalities: %m"); } + r = apply_syscall_log(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return log_unit_error_errno(unit, r, "Failed to apply system call log filters: %m"); + } + /* This really should remain the last step before the execve(), to make sure our own code is unaffected * by the filter as little as possible. */ r = apply_syscall_filter(unit, context, needs_ambient_hack); diff --git a/src/core/execute.h b/src/core/execute.h index 810e585fa8..02a2c8d1e7 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -302,6 +302,9 @@ struct ExecContext { int syscall_errno; bool syscall_allow_list:1; + Hashmap *syscall_log; + bool syscall_log_allow_list:1; /* Log listed system calls */ + bool address_families_allow_list:1; Set *address_families; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 4bad8314dc..c60d565eb4 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -79,6 +79,7 @@ m4_ifdef(`HAVE_SECCOMP', `$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context) $1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs) $1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context) +$1.SystemCallLog, config_parse_syscall_log, 0, offsetof($1, exec_context) $1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute) $1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context) $1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime) @@ -88,6 +89,7 @@ $1.LockPersonality, config_parse_bool, 0, `$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 +$1.SystemCallLog, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index ae361b6020..a4240225f1 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -3197,6 +3197,86 @@ int config_parse_syscall_filter( } } +int config_parse_syscall_log( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + ExecContext *c = data; + _unused_ const Unit *u = userdata; + bool invert = false; + const char *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(u); + + if (isempty(rvalue)) { + /* Empty assignment resets the list */ + c->syscall_log = hashmap_free(c->syscall_log); + c->syscall_log_allow_list = false; + return 0; + } + + if (rvalue[0] == '~') { + invert = true; + rvalue++; + } + + if (!c->syscall_log) { + c->syscall_log = hashmap_new(NULL); + if (!c->syscall_log) + return log_oom(); + + if (invert) + /* Log everything but the ones listed */ + c->syscall_log_allow_list = false; + else + /* Log nothing but the ones listed */ + c->syscall_log_allow_list = true; + } + + p = rvalue; + for (;;) { + _cleanup_free_ char *word = NULL, *name = NULL; + int num; + + r = extract_first_word(&p, &word, NULL, 0); + if (r == 0) + return 0; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue); + return 0; + } + + r = parse_syscall_and_errno(word, &name, &num); + if (r < 0 || num >= 0) { /* errno code not allowed */ + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse syscall, ignoring: %s", word); + continue; + } + + r = seccomp_parse_syscall_filter( + name, 0, c->syscall_log, + SECCOMP_PARSE_LOG|SECCOMP_PARSE_PERMISSIVE| + (invert ? SECCOMP_PARSE_INVERT : 0)| + (c->syscall_log_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0), + unit, filename, line); + if (r < 0) + return r; + } +} + int config_parse_syscall_archs( const char *unit, const char *filename, @@ -5444,6 +5524,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_syscall_filter, "SYSCALLS" }, { config_parse_syscall_archs, "ARCHS" }, { config_parse_syscall_errno, "ERRNO" }, + { config_parse_syscall_log, "SYSCALLS" }, { config_parse_address_families, "FAMILIES" }, { config_parse_restrict_namespaces, "NAMESPACES" }, #endif diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index e90953b80f..3504227cae 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -65,6 +65,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_unit_requires_mounts_for); CONFIG_PARSER_PROTOTYPE(config_parse_syscall_filter); CONFIG_PARSER_PROTOTYPE(config_parse_syscall_archs); CONFIG_PARSER_PROTOTYPE(config_parse_syscall_errno); +CONFIG_PARSER_PROTOTYPE(config_parse_syscall_log); CONFIG_PARSER_PROTOTYPE(config_parse_environ); CONFIG_PARSER_PROTOTYPE(config_parse_pass_environ); CONFIG_PARSER_PROTOTYPE(config_parse_unset_environ); diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index eb62e1231b..3ae3c12f92 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -1299,7 +1299,8 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con } if (STR_IN_SET(field, "RestrictAddressFamilies", - "SystemCallFilter")) { + "SystemCallFilter", + "SystemCallLog")) { int allow_list = 1; const char *p = eq; diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 0b7cdbaadf..358960d5c4 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -1073,6 +1073,10 @@ int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, u if (error == SECCOMP_ERROR_NUMBER_KILL) a = scmp_act_kill_process(); +#ifdef SCMP_ACT_LOG + else if (action == SCMP_ACT_LOG) + a = SCMP_ACT_LOG; +#endif else if (action != SCMP_ACT_ALLOW && error >= 0) a = SCMP_ACT_ERRNO(error); diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index aa046c6ada..115983f98b 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -4822,7 +4822,7 @@ static int print_property(const char *name, const char *expected_value, sd_bus_m return 1; - } else if (STR_IN_SET(name, "SystemCallFilter", "RestrictAddressFamilies")) { + } else if (STR_IN_SET(name, "SystemCallFilter", "SystemCallLog", "RestrictAddressFamilies")) { _cleanup_strv_free_ char **l = NULL; int allow_list;