From ec2ebfd524447caf14e63b49b3f63117b93c7c78 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 18:26:30 +0200 Subject: [PATCH 01/10] update-done: minor clean-ups This is a follow-up for fb8b0869a7bc30e23be175cf978df23192d59118, and makes a couple of minor clean-up changes: - The field name in the timestamp file is changed from "TimestampNSec=" to "TIMESTAMP_NSEC=". This is done simply to reflect the fact that we parse the file with the env var file parser, and hence the contents should better follow the usual capitalization of env vars, i.e. be all uppercase. - Needless negation of the errno parameter log_error_errno() and friends has been removed. - Instead of manually calculating the nsec remainder of the timestamp, use timespec_store(). - We now check whether we were able to write the timestamp file in full with fflush_and_check() the way we usually do it. --- src/shared/condition.c | 9 ++++----- src/update-done/update-done.c | 15 +++++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/shared/condition.c b/src/shared/condition.c index f13fa6a9fd..69b4837e1f 100644 --- a/src/shared/condition.c +++ b/src/shared/condition.c @@ -329,9 +329,9 @@ static int condition_test_needs_update(Condition *c) { uint64_t timestamp; int r; - r = parse_env_file(p, NULL, "TimestampNSec", ×tamp_str, NULL); + r = parse_env_file(p, NULL, "TIMESTAMP_NSEC", ×tamp_str, NULL); if (r < 0) { - log_error_errno(-r, "Failed to parse timestamp file '%s', using mtime: %m", p); + log_error_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p); return true; } else if (r == 0) { log_debug("No data in timestamp file '%s', using mtime", p); @@ -340,12 +340,11 @@ static int condition_test_needs_update(Condition *c) { r = safe_atou64(timestamp_str, ×tamp); if (r < 0) { - log_error_errno(-r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", - timestamp_str, p); + log_error_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p); return true; } - other.st_mtim.tv_nsec = timestamp % NSEC_PER_SEC; + timespec_store(&other.st_mtim, timestamp); } return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec; diff --git a/src/update-done/update-done.c b/src/update-done/update-done.c index 5cc5abfddf..48c2a3fff4 100644 --- a/src/update-done/update-done.c +++ b/src/update-done/update-done.c @@ -18,6 +18,7 @@ ***/ #include "fd-util.h" +#include "fileio.h" #include "io-util.h" #include "selinux-util.h" #include "util.h" @@ -32,8 +33,8 @@ static int apply_timestamp(const char *path, struct timespec *ts) { *ts, *ts }; - int fd = -1; _cleanup_fclose_ FILE *f = NULL; + int fd = -1; int r; assert(path); @@ -59,18 +60,20 @@ static int apply_timestamp(const char *path, struct timespec *ts) { return log_error_errno(errno, "Failed to create/open timestamp file %s: %m", path); } - f = fdopen(fd, "w"); + f = fdopen(fd, "we"); if (!f) { safe_close(fd); return log_error_errno(errno, "Failed to fdopen() timestamp file %s: %m", path); } (void) fprintf(f, - "%s" - "TimestampNSec=" NSEC_FMT "\n", - MESSAGE, timespec_load_nsec(ts)); + MESSAGE + "TIMESTAMP_NSEC=" NSEC_FMT "\n", + timespec_load_nsec(ts)); - fflush(f); + r = fflush_and_check(f); + if (r < 0) + return log_error_errno(r, "Failed to write timestamp file: %m"); if (futimens(fd, twice) < 0) return log_error_errno(errno, "Failed to update timestamp on %s: %m", path); From f673b62df67dfa67ddfa4f5a72d78ea3c002278f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 20:03:51 +0200 Subject: [PATCH 02/10] core: simplify skip_seccomp_unavailable() a bit Let's prefer early-exit over deep-indented if blocks. Not behavioural change. --- src/core/execute.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index 53356c3c06..b69297241b 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1185,13 +1185,14 @@ static void rename_process_from_path(const char *path) { #ifdef HAVE_SECCOMP static bool skip_seccomp_unavailable(const Unit* u, const char* msg) { - if (!is_seccomp_available()) { - log_open(); - log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg); - log_close(); - return true; - } - return false; + + if (is_seccomp_available()) + return false; + + log_open(); + log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg); + log_close(); + return true; } static int apply_seccomp(const Unit* u, const ExecContext *c) { From e0f3720e399573134657458f4c8bd20c68fc092a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 20:05:49 +0200 Subject: [PATCH 03/10] core: move misplaced comment to the right place --- src/core/execute.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index b69297241b..e63a12f934 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1891,9 +1891,9 @@ static int setup_private_users(uid_t uid, gid_t gid) { asprintf(&uid_map, "0 0 1\n" /* Map root → root */ UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */ - uid, uid); /* The case where the above is the same */ + uid, uid); else - uid_map = strdup("0 0 1\n"); + uid_map = strdup("0 0 1\n"); /* The case where the above is the same */ if (!uid_map) return -ENOMEM; From 8130926d32d76193e98ba783ba932816f276bfad Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 21:50:05 +0200 Subject: [PATCH 04/10] core: rework syscall filter set handling A variety of fixes: - rename the SystemCallFilterSet structure to SyscallFilterSet. So far the main instance of it (the syscall_filter_sets[] array) used to abbreviate "SystemCall" as "Syscall". Let's stick to one of the two syntaxes, and not mix and match too wildly. Let's pick the shorter name in this case, as it is sufficiently well established to not confuse hackers reading this. - Export explicit indexes into the syscall_filter_sets[] array via an enum. This way, code that wants to make use of a specific filter set, can index it directly via the enum, instead of having to search for it. This makes apply_private_devices() in particular a lot simpler. - Provide two new helper calls in seccomp-util.c: syscall_filter_set_find() to find a set by its name, seccomp_add_syscall_filter_set() to add a set to a seccomp object. - Update SystemCallFilter= parser to use extract_first_word(). Let's work on deprecating FOREACH_WORD_QUOTED(). - Simplify apply_private_devices() using this functionality --- src/core/execute.c | 41 +----------- src/core/load-fragment.c | 54 +++++++++------- src/shared/seccomp-util.c | 128 ++++++++++++++++++++++++++++---------- src/shared/seccomp-util.h | 30 +++++++-- 4 files changed, 154 insertions(+), 99 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index e63a12f934..18bb67cda9 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1578,10 +1578,7 @@ finish: } static int apply_private_devices(Unit *u, const ExecContext *c) { - const SystemCallFilterSet *set; scmp_filter_ctx *seccomp; - const char *sys; - bool syscalls_found = false; int r; assert(c); @@ -1599,43 +1596,9 @@ static int apply_private_devices(Unit *u, const ExecContext *c) { if (r < 0) goto finish; - for (set = syscall_filter_sets; set->set_name; set++) - if (streq(set->set_name, "@raw-io")) { - syscalls_found = true; - break; - } - - /* We should never fail here */ - if (!syscalls_found) { - r = -EOPNOTSUPP; + r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); + if (r < 0) goto finish; - } - - NULSTR_FOREACH(sys, set->value) { - int id; - bool add = true; - -#ifndef __NR_s390_pci_mmio_read - if (streq(sys, "s390_pci_mmio_read")) - add = false; -#endif -#ifndef __NR_s390_pci_mmio_write - if (streq(sys, "s390_pci_mmio_write")) - add = false; -#endif - - if (!add) - continue; - - id = seccomp_syscall_resolve_name(sys); - - r = seccomp_rule_add( - seccomp, - SCMP_ACT_ERRNO(EPERM), - id, 0); - if (r < 0) - goto finish; - } r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); if (r < 0) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 6f68e23340..118b39c1cf 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2618,6 +2618,7 @@ int config_parse_documentation(const char *unit, } #ifdef HAVE_SECCOMP + static int syscall_filter_parse_one( const char *unit, const char *filename, @@ -2628,27 +2629,29 @@ static int syscall_filter_parse_one( bool warn) { int r; - if (*t == '@') { - const SystemCallFilterSet *set; + if (t[0] == '@') { + const SyscallFilterSet *set; + const char *i; - for (set = syscall_filter_sets; set->set_name; set++) - if (streq(set->set_name, t)) { - const char *sys; + set = syscall_filter_set_find(t); + if (!set) { + if (warn) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t); + return 0; + } - NULSTR_FOREACH(sys, set->value) { - r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false); - if (r < 0) - return r; - } - break; - } + NULSTR_FOREACH(i, set->value) { + r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false); + if (r < 0) + return r; + } } else { int id; id = seccomp_syscall_resolve_name(t); if (id == __NR_SCMP_ERROR) { if (warn) - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); + log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t); return 0; } @@ -2662,8 +2665,9 @@ static int syscall_filter_parse_one( if (r < 0) return log_oom(); } else - set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); } + return 0; } @@ -2682,8 +2686,7 @@ int config_parse_syscall_filter( ExecContext *c = data; Unit *u = userdata; bool invert = false; - const char *word, *state; - size_t l; + const char *p; int r; assert(filename); @@ -2722,19 +2725,24 @@ int config_parse_syscall_filter( } } - FOREACH_WORD_QUOTED(word, l, rvalue, state) { - _cleanup_free_ char *t = NULL; + p = rvalue; + for (;;) { + _cleanup_free_ char *word = NULL; - t = strndup(word, l); - if (!t) + r = extract_first_word(&p, &word, NULL, 0); + if (r == 0) + break; + if (r == -ENOMEM) return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue); + break; + } - r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true); + r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true); if (r < 0) return r; } - if (!isempty(state)) - log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring."); /* Turn on NNP, but only if it wasn't configured explicitly * before, and only if we are in user mode. */ diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 8116c7671f..1d51f3fd1f 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -26,6 +26,7 @@ #include "macro.h" #include "seccomp-util.h" #include "string-util.h" +#include "util.h" const char* seccomp_arch_to_string(uint32_t c) { @@ -132,28 +133,30 @@ bool is_seccomp_available(void) { return cached_enabled; } -const SystemCallFilterSet syscall_filter_sets[] = { - { +const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { + [SYSCALL_FILTER_SET_CLOCK] = { /* Clock */ - .set_name = "@clock", + .name = "@clock", .value = "adjtimex\0" "clock_adjtime\0" "clock_settime\0" "settimeofday\0" "stime\0" - }, { + }, + [SYSCALL_FILTER_SET_CPU_EMULATION] = { /* CPU emulation calls */ - .set_name = "@cpu-emulation", + .name = "@cpu-emulation", .value = "modify_ldt\0" "subpage_prot\0" "switch_endian\0" "vm86\0" "vm86old\0" - }, { + }, + [SYSCALL_FILTER_SET_DEBUG] = { /* Debugging/Performance Monitoring/Tracing */ - .set_name = "@debug", + .name = "@debug", .value = "lookup_dcookie\0" "perf_event_open\0" @@ -161,11 +164,14 @@ const SystemCallFilterSet syscall_filter_sets[] = { "process_vm_writev\0" "ptrace\0" "rtas\0" +#ifdef __NR_s390_runtime_instr "s390_runtime_instr\0" +#endif "sys_debug_setcontext\0" - }, { + }, + [SYSCALL_FILTER_SET_DEFAULT] = { /* Default list */ - .set_name = "@default", + .name = "@default", .value = "execve\0" "exit\0" @@ -173,9 +179,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "getrlimit\0" /* make sure processes can query stack size and such */ "rt_sigreturn\0" "sigreturn\0" - }, { + }, + [SYSCALL_FILTER_SET_IO_EVENT] = { /* Event loop use */ - .set_name = "@io-event", + .name = "@io-event", .value = "_newselect\0" "epoll_create1\0" @@ -191,9 +198,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "ppoll\0" "pselect6\0" "select\0" - }, { + }, + [SYSCALL_FILTER_SET_IPC] = { /* Message queues, SYSV IPC or other IPC: unusual */ - .set_name = "@ipc", + .name = "@ipc", .value = "ipc\0" "mq_getsetattr\0" "mq_notify\0" @@ -215,23 +223,26 @@ const SystemCallFilterSet syscall_filter_sets[] = { "shmctl\0" "shmdt\0" "shmget\0" - }, { + }, + [SYSCALL_FILTER_SET_KEYRING] = { /* Keyring */ - .set_name = "@keyring", + .name = "@keyring", .value = "add_key\0" "keyctl\0" "request_key\0" - }, { + }, + [SYSCALL_FILTER_SET_MODULE] = { /* Kernel module control */ - .set_name = "@module", + .name = "@module", .value = "delete_module\0" "finit_module\0" "init_module\0" - }, { + }, + [SYSCALL_FILTER_SET_MOUNT] = { /* Mounting */ - .set_name = "@mount", + .name = "@mount", .value = "chroot\0" "mount\0" @@ -239,9 +250,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "pivot_root\0" "umount2\0" "umount\0" - }, { + }, + [SYSCALL_FILTER_SET_NETWORK_IO] = { /* Network or Unix socket IO, should not be needed if not network facing */ - .set_name = "@network-io", + .name = "@network-io", .value = "accept4\0" "accept\0" @@ -264,9 +276,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "socket\0" "socketcall\0" "socketpair\0" - }, { + }, + [SYSCALL_FILTER_SET_OBSOLETE] = { /* Unusual, obsolete or unimplemented, some unknown even to libseccomp */ - .set_name = "@obsolete", + .name = "@obsolete", .value = "_sysctl\0" "afs_syscall\0" @@ -292,9 +305,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "uselib\0" "ustat\0" "vserver\0" - }, { + }, + [SYSCALL_FILTER_SET_PRIVILEGED] = { /* Nice grab-bag of all system calls which need superuser capabilities */ - .set_name = "@privileged", + .name = "@privileged", .value = "@clock\0" "@module\0" @@ -333,9 +347,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "swapon\0" "sysctl\0" "vhangup\0" - }, { + }, + [SYSCALL_FILTER_SET_PROCESS] = { /* Process control, execution, namespaces */ - .set_name = "@process", + .name = "@process", .value = "arch_prctl\0" "clone\0" @@ -349,19 +364,66 @@ const SystemCallFilterSet syscall_filter_sets[] = { "tkill\0" "unshare\0" "vfork\0" - }, { + }, + [SYSCALL_FILTER_SET_RAW_IO] = { /* Raw I/O ports */ - .set_name = "@raw-io", + .name = "@raw-io", .value = "ioperm\0" "iopl\0" "pciconfig_iobase\0" "pciconfig_read\0" "pciconfig_write\0" +#ifdef __NR_s390_pci_mmio_read "s390_pci_mmio_read\0" +#endif +#ifdef __NR_s390_pci_mmio_write "s390_pci_mmio_write\0" - }, { - .set_name = NULL, - .value = NULL - } +#endif + }, }; + +const SyscallFilterSet *syscall_filter_set_find(const char *name) { + unsigned i; + + if (isempty(name) || name[0] != '@') + return NULL; + + for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) + if (streq(syscall_filter_sets[i].name, name)) + return syscall_filter_sets + i; + + return NULL; +} + +int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action) { + const char *sys; + int r; + + assert(seccomp); + assert(set); + + NULSTR_FOREACH(sys, set->value) { + int id; + + if (sys[0] == '@') { + const SyscallFilterSet *other; + + other = syscall_filter_set_find(sys); + if (!other) + return -EINVAL; + + r = seccomp_add_syscall_filter_set(seccomp, other, action); + } else { + id = seccomp_syscall_resolve_name(sys); + if (id == __NR_SCMP_ERROR) + return -EINVAL; + + r = seccomp_rule_add(seccomp, action, id, 0); + } + if (r < 0) + return r; + } + + return 0; +} diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index cca7c17912..34fd49c122 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -29,9 +29,31 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c); bool is_seccomp_available(void); -typedef struct SystemCallFilterSet { - const char *set_name; +typedef struct SyscallFilterSet { + const char *name; const char *value; -} SystemCallFilterSet; +} SyscallFilterSet; -extern const SystemCallFilterSet syscall_filter_sets[]; +enum { + SYSCALL_FILTER_SET_CLOCK, + SYSCALL_FILTER_SET_CPU_EMULATION, + SYSCALL_FILTER_SET_DEBUG, + SYSCALL_FILTER_SET_DEFAULT, + SYSCALL_FILTER_SET_IO_EVENT, + SYSCALL_FILTER_SET_IPC, + SYSCALL_FILTER_SET_KEYRING, + SYSCALL_FILTER_SET_MODULE, + SYSCALL_FILTER_SET_MOUNT, + SYSCALL_FILTER_SET_NETWORK_IO, + SYSCALL_FILTER_SET_OBSOLETE, + SYSCALL_FILTER_SET_PRIVILEGED, + SYSCALL_FILTER_SET_PROCESS, + SYSCALL_FILTER_SET_RAW_IO, + _SYSCALL_FILTER_SET_MAX +}; + +extern const SyscallFilterSet syscall_filter_sets[]; + +const SyscallFilterSet *syscall_filter_set_find(const char *name); + +int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action); From 25a8d8a0cb297f75b6b9fd3cc15747ba7f56031e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 20:12:33 +0200 Subject: [PATCH 05/10] core: rework apply_protect_kernel_modules() to use seccomp_add_syscall_filter_set() Let's simplify this call, by making use of the new infrastructure. This is actually more in line with Djalal's original patch but instead of search the filter set in the array by its name we can now use the set index and jump directly to it. --- src/core/execute.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index 18bb67cda9..f435a079c7 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1534,19 +1534,14 @@ finish: } static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { - static const int module_syscalls[] = { - SCMP_SYS(delete_module), - SCMP_SYS(finit_module), - SCMP_SYS(init_module), - }; scmp_filter_ctx *seccomp; - unsigned i; + const char *sys; int r; assert(c); - /* Turn of module syscalls on ProtectKernelModules=yes */ + /* Turn off module syscalls on ProtectKernelModules=yes */ if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) return 0; @@ -1559,12 +1554,9 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { if (r < 0) goto finish; - for (i = 0; i < ELEMENTSOF(module_syscalls); i++) { - r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), - module_syscalls[i], 0); - if (r < 0) - goto finish; - } + r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); + if (r < 0) + goto finish; r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); if (r < 0) From 8d7b0c8fd780e88ab5a6d1d79e09e27247245bee Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 20:28:05 +0200 Subject: [PATCH 06/10] seccomp: add new seccomp_init_conservative() helper This adds a new seccomp_init_conservative() helper call that is mostly just a wrapper around seccomp_init(), but turns off NNP and adds in all secondary archs, for best compatibility with everything else. Pretty much all of our code used the very same constructs for these three steps, hence unifying this in one small function makes things a lot shorter. This also changes incorrect usage of the "scmp_filter_ctx" type at various places. libseccomp defines it as typedef to "void*", i.e. it is a pointer type (pretty poor choice already!) that casts implicitly to and from all other pointer types (even poorer choice: you defined a confusing type now, and don't even gain any bit of type safety through it...). A lot of the code assumed the type would refer to a structure, and hence aded additional "*" here and there. Remove that. --- src/core/execute.c | 88 ++++++++----------------------------- src/nspawn/nspawn-seccomp.c | 18 ++------ src/shared/seccomp-util.c | 30 ++++++++++++- src/shared/seccomp-util.h | 4 +- 4 files changed, 53 insertions(+), 87 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index f435a079c7..668504c5cf 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1197,7 +1197,7 @@ static bool skip_seccomp_unavailable(const Unit* u, const char* msg) { static int apply_seccomp(const Unit* u, const ExecContext *c) { uint32_t negative_action, action; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; Iterator i; void *id; int r; @@ -1248,7 +1248,7 @@ finish: } static int apply_address_families(const Unit* u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; Iterator i; int r; @@ -1257,13 +1257,9 @@ static int apply_address_families(const Unit* u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "RestrictAddressFamilies=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; if (c->address_families_whitelist) { int af, first = 0, last = 0; @@ -1360,10 +1356,6 @@ static int apply_address_families(const Unit* u, const ExecContext *c) { } } - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1372,7 +1364,7 @@ finish: } static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; int r; assert(c); @@ -1380,13 +1372,9 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_rule_add( seccomp, @@ -1406,10 +1394,6 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1424,7 +1408,7 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { SCHED_IDLE, }; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; unsigned i; int r, p, max_policy = 0; @@ -1433,13 +1417,9 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "RestrictRealtime=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; /* Determine the highest policy constant we want to allow */ for (i = 0; i < ELEMENTSOF(permitted_policies); i++) @@ -1483,10 +1463,6 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1495,7 +1471,7 @@ finish: } static int apply_protect_sysctl(Unit *u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; int r; assert(c); @@ -1506,13 +1482,9 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "ProtectKernelTunables=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_rule_add( seccomp, @@ -1522,10 +1494,6 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1534,9 +1502,7 @@ finish: } static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { - - scmp_filter_ctx *seccomp; - const char *sys; + scmp_filter_ctx seccomp; int r; assert(c); @@ -1546,22 +1512,14 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1570,7 +1528,7 @@ finish: } static int apply_private_devices(Unit *u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; int r; assert(c); @@ -1580,22 +1538,14 @@ static int apply_private_devices(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "PrivateDevices=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c index 44a0b397ab..03a397d30c 100644 --- a/src/nspawn/nspawn-seccomp.c +++ b/src/nspawn/nspawn-seccomp.c @@ -135,15 +135,9 @@ int setup_seccomp(uint64_t cap_list_retain) { return 0; } - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return log_oom(); - - r = seccomp_add_secondary_archs(seccomp); - if (r < 0) { - log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m"); - goto finish; - } + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return log_error_errno(r, "Failed to allocate seccomp object: %m"); r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain); if (r < 0) @@ -171,12 +165,6 @@ int setup_seccomp(uint64_t cap_list_retain) { goto finish; } - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) { - log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m"); - goto finish; - } - r = seccomp_load(seccomp); if (r < 0) { log_error_errno(r, "Failed to install seccomp audit filter: %m"); diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 1d51f3fd1f..0b9fa47c44 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -74,7 +74,34 @@ int seccomp_arch_from_string(const char *n, uint32_t *ret) { return 0; } -int seccomp_add_secondary_archs(scmp_filter_ctx *c) { +int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action) { + scmp_filter_ctx seccomp; + int r; + + /* Much like seccomp_init(), but tries to be a bit more conservative in its defaults: all secondary archs are + * added by default, and NNP is turned off. */ + + seccomp = seccomp_init(default_action); + if (!seccomp) + return -ENOMEM; + + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + *ret = seccomp; + return 0; + +finish: + seccomp_release(seccomp); + return r; +} + +int seccomp_add_secondary_archs(scmp_filter_ctx c) { #if defined(__i386__) || defined(__x86_64__) int r; @@ -111,7 +138,6 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) { #endif return 0; - } static bool is_basic_seccomp_available(void) { diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 34fd49c122..2de429a772 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -25,7 +25,9 @@ const char* seccomp_arch_to_string(uint32_t c); int seccomp_arch_from_string(const char *n, uint32_t *ret); -int seccomp_add_secondary_archs(scmp_filter_ctx *c); +int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action); + +int seccomp_add_secondary_archs(scmp_filter_ctx c); bool is_seccomp_available(void); From 60f547cf684d27e8c0e7ff44663650e90f9e0bcf Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 21:15:43 +0200 Subject: [PATCH 07/10] seccomp: two fixes for the syscall set tables "oldumount()" is not a syscall, but simply a wrapper for it, the actual syscall nr is called "umount" (and the nr of umount() is called umount2 internally). "sysctl()" is not a syscall, but "_syscall()" is. Fix this in the table. Without these changes libseccomp cannot actually translate the tables in full. This wasn't noticed before as the code was written defensively for this case. --- src/shared/seccomp-util.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 0b9fa47c44..f1e9de05b2 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -272,7 +272,6 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { .value = "chroot\0" "mount\0" - "oldumount\0" "pivot_root\0" "umount2\0" "umount\0" @@ -371,7 +370,7 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "setuid\0" "swapoff\0" "swapon\0" - "sysctl\0" + "_sysctl\0" "vhangup\0" }, [SYSCALL_FILTER_SET_PROCESS] = { From a3be2849b2570482757f83181b999febbfc7bbef Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 21:18:46 +0200 Subject: [PATCH 08/10] seccomp: add new helper call seccomp_load_filter_set() This allows us to unify most of the code in apply_protect_kernel_modules() and apply_private_devices(). --- src/core/execute.c | 34 ++-------------------------------- src/shared/seccomp-util.c | 24 ++++++++++++++++++++++++ src/shared/seccomp-util.h | 2 ++ 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index 668504c5cf..5e7d7c25d7 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1502,9 +1502,6 @@ finish: } static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { - scmp_filter_ctx seccomp; - int r; - assert(c); /* Turn off module syscalls on ProtectKernelModules=yes */ @@ -1512,25 +1509,10 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) return 0; - r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); - if (r < 0) - return r; - - r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); - if (r < 0) - goto finish; - - r = seccomp_load(seccomp); - -finish: - seccomp_release(seccomp); - return r; + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); } static int apply_private_devices(Unit *u, const ExecContext *c) { - scmp_filter_ctx seccomp; - int r; - assert(c); /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */ @@ -1538,19 +1520,7 @@ static int apply_private_devices(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "PrivateDevices=")) return 0; - r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); - if (r < 0) - return r; - - r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); - if (r < 0) - goto finish; - - r = seccomp_load(seccomp); - -finish: - seccomp_release(seccomp); - return r; + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); } #endif diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index f1e9de05b2..6252cd16a6 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -452,3 +452,27 @@ int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterS return 0; } + +int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action) { + scmp_filter_ctx seccomp; + int r; + + assert(set); + + /* The one-stop solution: allocate a seccomp object, add a filter to it, and apply it */ + + r = seccomp_init_conservative(&seccomp, default_action); + if (r < 0) + return r; + + r = seccomp_add_syscall_filter_set(seccomp, set, action); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; + +} diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 2de429a772..667687b14f 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -59,3 +59,5 @@ extern const SyscallFilterSet syscall_filter_sets[]; const SyscallFilterSet *syscall_filter_set_find(const char *name); int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action); + +int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action); From f6281133def1da2d7ac875b8cf5af5c32bc63fd8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 21:48:10 +0200 Subject: [PATCH 09/10] seccomp: add test-seccomp test tool This validates the system call set table and many of our seccomp-util.c APIs. --- Makefile.am | 11 ++++ src/shared/seccomp-util.h | 1 + src/test/test-seccomp.c | 103 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 src/test/test-seccomp.c diff --git a/Makefile.am b/Makefile.am index 18a5f4a82a..8ef3b42c41 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1570,6 +1570,11 @@ tests += \ test-acl-util endif +if HAVE_SECCOMP +tests += \ + test-seccomp +endif + EXTRA_DIST += \ test/a.service \ test/basic.target \ @@ -2038,6 +2043,12 @@ test_acl_util_SOURCES = \ test_acl_util_LDADD = \ libsystemd-shared.la +test_seccomp_SOURCES = \ + src/test/test-seccomp.c + +test_seccomp_LDADD = \ + libsystemd-shared.la + test_namespace_LDADD = \ libcore.la diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 667687b14f..8050fc6fbf 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -20,6 +20,7 @@ ***/ #include +#include #include const char* seccomp_arch_to_string(uint32_t c); diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c new file mode 100644 index 0000000000..0060ecdf02 --- /dev/null +++ b/src/test/test-seccomp.c @@ -0,0 +1,103 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include + +#include "fd-util.h" +#include "macro.h" +#include "process-util.h" +#include "seccomp-util.h" + +static void test_seccomp_arch_to_string(void) { + uint32_t a, b; + const char *name; + + a = seccomp_arch_native(); + assert_se(a > 0); + name = seccomp_arch_to_string(a); + assert_se(name); + assert_se(seccomp_arch_from_string(name, &b) >= 0); + assert_se(a == b); +} + +static void test_syscall_filter_set_find(void) { + assert_se(!syscall_filter_set_find(NULL)); + assert_se(!syscall_filter_set_find("")); + assert_se(!syscall_filter_set_find("quux")); + assert_se(!syscall_filter_set_find("@quux")); + + assert_se(syscall_filter_set_find("@clock") == syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK); + assert_se(syscall_filter_set_find("@default") == syscall_filter_sets + SYSCALL_FILTER_SET_DEFAULT); + assert_se(syscall_filter_set_find("@raw-io") == syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO); +} + +static void test_filter_sets(void) { + unsigned i; + int r; + + if (!is_seccomp_available()) + return; + + if (geteuid() != 0) + return; + + for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) { + pid_t pid; + + log_info("Testing %s", syscall_filter_sets[i].name); + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { /* Child? */ + int fd; + + if (i == SYSCALL_FILTER_SET_DEFAULT) /* if we look at the default set, whitelist instead of blacklist */ + r = seccomp_load_filter_set(SCMP_ACT_ERRNO(EPERM), syscall_filter_sets + i, SCMP_ACT_ALLOW); + else + r = seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + i, SCMP_ACT_ERRNO(EPERM)); + if (r < 0) + _exit(EXIT_FAILURE); + + /* Test the sycall filter with one random system call */ + fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC); + if (IN_SET(i, SYSCALL_FILTER_SET_IO_EVENT, SYSCALL_FILTER_SET_DEFAULT)) + assert_se(fd < 0 && errno == EPERM); + else { + assert_se(fd >= 0); + safe_close(fd); + } + + _exit(EXIT_SUCCESS); + } + + assert_se(wait_for_terminate_and_warn(syscall_filter_sets[i].name, pid, true) == EXIT_SUCCESS); + } +} + +int main(int argc, char *argv[]) { + + test_seccomp_arch_to_string(); + test_syscall_filter_set_find(); + test_filter_sets(); + + return 0; +} From 171ae2cd86390c17d51121f9dff607911b888c5a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 20:15:18 +0200 Subject: [PATCH 10/10] Various additions to NEWS --- NEWS | 97 ++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 26 deletions(-) diff --git a/NEWS b/NEWS index 87cc4f48c0..2a1edbe766 100644 --- a/NEWS +++ b/NEWS @@ -35,14 +35,14 @@ CHANGES WITH 232 in spe ProtectSystem=strict enabled, so they are not able to make any permanent modifications to the system. - The nss-systemd module also always resolves root and nobody, making + * The nss-systemd module also always resolves root and nobody, making it possible to have no /etc/passwd or /etc/group files in minimal - container systems. + container or chroot environments. * Services may be started with their own user namespace using the new - PrivateUsers= option. Only root, nobody, and the uid/gid under which - the service is running are mapped. All other users are mapped to - nobody. + boolean PrivateUsers= option. Only root, nobody, and the uid/gid + under which the service is running are mapped. All other users are + mapped to nobody. * Support for the cgroup namespace has been added to systemd-nspawn. If supported by kernel, the container system started by systemd-nspawn @@ -57,12 +57,22 @@ CHANGES WITH 232 in spe options. This controller requires out-of-tree patches for the kernel and the support is provisional. - * .automount units may now be transient. + * Mount and automount units may now be created transiently + (i.e. dynamically at runtime via the bus API, instead of requiring + unit files in the file system). - * systemd-mount is a new tool which wraps mount(8) to pull in - additional dependencies through transient .mount and .automount - units. For example, this automatically runs fsck on the block device - before mounting, and allows the automount logic to be used. + * systemd-mount is a new tool which may mount file systems – much like + mount(8), optionally pulling in additional dependencies through + transient .mount and .automount units. For example, this tool + automatically runs fsck on a backing block device before mounting, + and allows the automount logic to be used dynamically from the + command line for establishing mount points. This tool is particularly + useful when dealing with removable media, as it will ensure fsck is + run – if necessary – before the first access and that the file system + is quickly unmounted after each access by utilizing the automount + logic. This maximizes the chance that the file system on the + removable media stays in a clean state, and if it isn't in a clean + state is fixed automatically. * LazyUnmount=yes option for mount units has been added to expose the umount --lazy option. Similarly, ForceUnmount=yes exposes the --force @@ -75,6 +85,12 @@ CHANGES WITH 232 in spe mount the EFI partition on systems where /boot is used for something else. + * When operating on GPT disk images for containers, systemd-nspawn will + now mount the ESP to /boot or /efi according to the same rules as PID + 1 running on a host. This allows tools like "bootctl" to operate + correctly within such containers, in order to make container images + bootable on physical systems. + * disk/by-id and disk/by-path symlinks are now created for NVMe drives. * Two new user session targets have been added to support running @@ -95,7 +111,7 @@ CHANGES WITH 232 in spe the top of the process hierarchy (which is usually the init process of the container). - * systemd-journal-gatewayd learned the --directory option to serve + * systemd-journal-gatewayd learned the --directory= option to serve files from the specified location. * journalctl --root=… can be used to peruse the journal in the @@ -112,23 +128,26 @@ CHANGES WITH 232 in spe a click rate that is different than the one for the vertical wheel. * systemd-run gained a new --wait option that makes service execution - synchronous. + synchronous. (Specifically, the command will not return until the + specified service binary exited.) - systemctl gained a new --wait option that causes the start command to + * systemctl gained a new --wait option that causes the start command to wait until the units being started have terminated again. - * A new journal output mode "short-full" has been added which uses + * A new journal output mode "short-full" has been added which displays timestamps with abbreviated English day names and adds a timezone - suffix. Those timestamps include more information and can be parsed - by journalctl. + suffix. Those timestamps include more information than the default + "short" output mode, and can be passed directly to journalctl's + --since= and --until= options. * /etc/resolv.conf will be bind-mounted into containers started by systemd-nspawn, if possible, so any changes to resolv.conf contents are automatically propagated to the container. * The number of instances for socket-activated services originating - from a single IP can be limited with MaxConnectionsPerSource=, - extending the existing setting of MaxConnections. + from a single IP address can be limited with + MaxConnectionsPerSource=, extending the existing setting of + MaxConnections=. * systemd-networkd gained support for vcan ("Virtual CAN") interface configuration. @@ -143,21 +162,23 @@ CHANGES WITH 232 in spe GenericReceiveOffload=, LargeReceiveOffload= options in the [Link] section of .link files. - Spanning Tree Protocol enablement, Priority, Aging Time, and the - Default Port VLAN ID can be configured for bridge devices using the - new STP=, Priority=, AgeingTimeSec=, and DefaultPVID= settings in the - [Bridge] section of .netdev files. + * The Spanning Tree Protocol, Priority, Aging Time, and the Default + Port VLAN ID can be configured for bridge devices using the new STP=, + Priority=, AgeingTimeSec=, and DefaultPVID= settings in the [Bridge] + section of .netdev files. - The route table to which routes received over DHCP or RA should be + * The route table to which routes received over DHCP or RA should be added can be configured with the new RouteTable= option in the [DHCP] and [IPv6AcceptRA] sections of .network files. - Address Resolution Protocol can be disabled on links managed by + * The Address Resolution Protocol can be disabled on links managed by systemd-networkd using the ARP=no setting in the [Link] section of .network files. - * $SERVICE_RESULT, $EXIT_CODE, $EXIT_STATUS are set for ExecStop= and - ExecStopPost= commands. + * New environment variables $SERVICE_RESULT, $EXIT_CODE and + $EXIT_STATUS are set for ExecStop= and ExecStopPost= commands, and + encode information about the result and exit codes of the current + service runtime cycle. * systemd-sysctl will now configure kernel parameters in the order they occur in the configuration files. This matches what sysctl @@ -184,6 +205,30 @@ CHANGES WITH 232 in spe $SYSTEMD_NSPAWN_SHARE_NS_UTS may be used to control the unsharing of individual namespaces. + * "machinectl list" now shows the IP address of running containers in + the output, as well as OS release information. + + * "loginctl list" now shows the TTY of each session in the output. + + * sd-bus gained new API calls sd_bus_track_set_recursive(), + sd_bus_track_get_recursive(), sd_bus_track_count_name(), + sd_bus_track_count_sender(). They permit usage of sd_bus_track peer + tracking objects in a "recursive" mode, where a single client can be + counted multiple times, if it takes multiple references. + + * sd-bus gained new API calls sd_bus_set_exit_on_disconnect() and + sd_bus_get_exit_on_disconnect(). They may be used to to make a + process using sd-bus automatically exit if the bus connection is + severed. + + * Bus clients of the service manager may now "pin" loaded units into + memory, by taking an explicit reference on them. This is useful to + ensure the client can retrieve runtime data about the service even + after the service completed execution. Taking such a reference is + available only for privileged clients and should be helpful to watch + running services in a race-free manner, and in particular collect + information about exit statuses and results. + CHANGES WITH 231: * In service units the various ExecXYZ= settings have been extended