diff --git a/NEWS b/NEWS index 238ea4a032..3383357c41 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,15 @@ systemd System and Service Manager CHANGES WITH 243 in spe: + * This release enables unprivileged programs (i.e. requiring neither + setuid nor file capabilities) to send ICMP Echo (i.e. ping) requests + by turning on the net.ipv4.ping_group_range sysctl of the Linux + kernel for the whole UNIX group range, i.e. all processes. This + change should be reasonably safe, as the kernel support for it was + specifically implemented to allow safe access to ICMP Echo for + processes lacking any privileges. If this is not desirable, it can be + disabled again by setting the parameter to "1 0". + * Previously, filters defined with SystemCallFilter= would have the effect that an calling an offending system call would terminate the calling thread. This behaviour never made much sense, since killing @@ -345,6 +354,9 @@ CHANGES WITH 243 in spe: * "localectl list-locales" won't list non-UTF-8 locales anymore. It's 2019. (You can set non-UTF-8 locales though, if you know there name.) + * If variable assignments in sysctl.d/ files are prefixed with "-" any + failures to apply them are now ignored. + Contributions from: Aaron Barany, Adrian Bunk, Alan Jenkins, Andrej Valek, Anita Zhang, Arian van Putten, Balint Reczey, Bastien Nocera, Ben Boeckel, Benjamin Robin, camoz, Chen Qi, Chris Chiu, Chris Down, diff --git a/man/sysctl.d.xml b/man/sysctl.d.xml index 0a8eeb62a4..21ef6de97c 100644 --- a/man/sysctl.d.xml +++ b/man/sysctl.d.xml @@ -59,6 +59,10 @@ /proc/sys/net/ipv4/conf/enp3s0.200/forwarding. + If a variable assignment is prefixed with a single - character, any attempts to + set it that fail will be ignored (though are logged). Moreover, any access permission errors, and + attempts to write variables not defined on the local system are ignored (and logged) too. + The settings configured with sysctl.d files will be applied early on boot. The network interface-specific options will also be applied individually for diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c index 9838701a3d..c67782ea8f 100644 --- a/src/sysctl/sysctl.c +++ b/src/sysctl/sysctl.c @@ -30,25 +30,71 @@ static PagerFlags arg_pager_flags = 0; STATIC_DESTRUCTOR_REGISTER(arg_prefixes, strv_freep); +typedef struct Option { + char *key; + char *value; + bool ignore_failure; +} Option; + +static Option *option_free(Option *o) { + if (!o) + return NULL; + + free(o->key); + free(o->value); + + return mfree(o); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(Option*, option_free); +DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(option_hash_ops, char, string_hash_func, string_compare_func, Option, option_free); + +static Option *option_new( + const char *key, + const char *value, + bool ignore_failure) { + + _cleanup_(option_freep) Option *o = NULL; + + assert(key); + assert(value); + + o = new(Option, 1); + if (!o) + return NULL; + + *o = (Option) { + .key = strdup(key), + .value = strdup(value), + .ignore_failure = ignore_failure, + }; + + if (!o->key || !o->value) + return NULL; + + return TAKE_PTR(o); +} + static int apply_all(OrderedHashmap *sysctl_options) { - char *property, *value; + Option *option; Iterator i; int r = 0; - ORDERED_HASHMAP_FOREACH_KEY(value, property, sysctl_options, i) { + ORDERED_HASHMAP_FOREACH(option, sysctl_options, i) { int k; - k = sysctl_write(property, value); + k = sysctl_write(option->key, option->value); if (k < 0) { - /* If the sysctl is not available in the kernel or we are running with reduced privileges and - * cannot write it, then log about the issue at LOG_NOTICE level, and proceed without - * failing. (EROFS is treated as a permission problem here, since that's how container managers - * usually protected their sysctls.) In all other cases log an error and make the tool fail. */ + /* If the sysctl is not available in the kernel or we are running with reduced + * privileges and cannot write it, then log about the issue at LOG_NOTICE level, and + * proceed without failing. (EROFS is treated as a permission problem here, since + * that's how container managers usually protected their sysctls.) In all other cases + * log an error and make the tool fail. */ - if (IN_SET(k, -EPERM, -EACCES, -EROFS, -ENOENT)) - log_notice_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", value, property); + if (IN_SET(k, -EPERM, -EACCES, -EROFS, -ENOENT) || option->ignore_failure) + log_notice_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", option->value, option->key); else { - log_error_errno(k, "Couldn't write '%s' to '%s': %m", value, property); + log_error_errno(k, "Couldn't write '%s' to '%s': %m", option->value, option->key); if (r == 0) r = k; } @@ -94,9 +140,11 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign log_debug("Parsing %s", path); for (;;) { - char *p, *value, *new_value, *property, *existing; + _cleanup_(option_freep) Option *new_option = NULL; _cleanup_free_ char *l = NULL; - void *v; + bool ignore_failure; + Option *existing; + char *p, *value; int k; k = read_line(f, LONG_LINE_MAX, &l); @@ -116,8 +164,7 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign value = strchr(p, '='); if (!value) { - log_error("Line is not an assignment at '%s:%u': %s", path, c, p); - + log_syntax(NULL, LOG_WARNING, path, c, 0, "Line is not an assignment, ignoring: %s", p); if (r == 0) r = -EINVAL; continue; @@ -126,39 +173,37 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign *value = 0; value++; - p = sysctl_normalize(strstrip(p)); + p = strstrip(p); + ignore_failure = p[0] == '-'; + if (ignore_failure) + p++; + + p = sysctl_normalize(p); value = strstrip(value); if (!test_prefix(p)) continue; - existing = ordered_hashmap_get2(sysctl_options, p, &v); + existing = ordered_hashmap_get(sysctl_options, p); if (existing) { - if (streq(value, existing)) + if (streq(value, existing->value)) { + existing->ignore_failure = existing->ignore_failure || ignore_failure; continue; + } log_debug("Overwriting earlier assignment of %s at '%s:%u'.", p, path, c); - free(ordered_hashmap_remove(sysctl_options, p)); - free(v); + option_free(ordered_hashmap_remove(sysctl_options, p)); } - property = strdup(p); - if (!property) + new_option = option_new(p, value, ignore_failure); + if (!new_option) return log_oom(); - new_value = strdup(value); - if (!new_value) { - free(property); - return log_oom(); - } + k = ordered_hashmap_put(sysctl_options, new_option->key, new_option); + if (k < 0) + return log_error_errno(k, "Failed to add sysctl variable %s to hashmap: %m", p); - k = ordered_hashmap_put(sysctl_options, property, new_value); - if (k < 0) { - log_error_errno(k, "Failed to add sysctl variable %s to hashmap: %m", property); - free(property); - free(new_value); - return k; - } + TAKE_PTR(new_option); } return r; @@ -265,7 +310,7 @@ static int parse_argv(int argc, char *argv[]) { } static int run(int argc, char *argv[]) { - _cleanup_(ordered_hashmap_free_free_freep) OrderedHashmap *sysctl_options = NULL; + _cleanup_(ordered_hashmap_freep) OrderedHashmap *sysctl_options = NULL; int r, k; r = parse_argv(argc, argv); @@ -276,15 +321,15 @@ static int run(int argc, char *argv[]) { umask(0022); - sysctl_options = ordered_hashmap_new(&path_hash_ops); + sysctl_options = ordered_hashmap_new(&option_hash_ops); if (!sysctl_options) return log_oom(); - r = 0; - if (argc > optind) { int i; + r = 0; + for (i = optind; i < argc; i++) { k = parse_file(sysctl_options, argv[i], false); if (k < 0 && r == 0) diff --git a/sysctl.d/50-default.conf b/sysctl.d/50-default.conf index 27084f6242..41bd1f9183 100644 --- a/sysctl.d/50-default.conf +++ b/sysctl.d/50-default.conf @@ -30,6 +30,14 @@ net.ipv4.conf.all.accept_source_route = 0 # Promote secondary addresses when the primary address is removed net.ipv4.conf.all.promote_secondaries = 1 +# ping(8) without CAP_NET_ADMIN and CAP_NET_RAW +# The upper limit is set to 2^31-1. Values greater than that get rejected by +# the kernel because of this definition in linux/include/net/ping.h: +# #define GID_T_MAX (((gid_t)~0U) >> 1) +# That's not so bad because values between 2^31 and 2^32-1 are reserved on +# systemd-based systems anyway: https://systemd.io/UIDS-GIDS.html#summary +-net.ipv4.ping_group_range = 0 2147483647 + # Fair Queue CoDel packet scheduler to fight bufferbloat net.core.default_qdisc = fq_codel