From 8a65188437d4b70b15e52db02e210844bd45d57d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 26 Jul 2019 09:03:06 +0200 Subject: [PATCH 1/7] sysctl: switch to log_syntax() With @keszybz' recent work this will give us clickable links in the journalctl output. --- src/sysctl/sysctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c index 9838701a3d..eeefc8b8b0 100644 --- a/src/sysctl/sysctl.c +++ b/src/sysctl/sysctl.c @@ -116,8 +116,7 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign value = strchr(p, '='); if (!value) { - log_error("Line is not an assignment at '%s:%u': %s", path, c, p); - + log_syntax(NULL, LOG_WARNING, path, c, 0, "Line is not an assignment, ignoring: %s", p); if (r == 0) r = -EINVAL; continue; From 2de30233f7a6b582aa25e186daa9007c4ad8ef8d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 26 Jul 2019 09:04:15 +0200 Subject: [PATCH 2/7] sysctl: reset 'r' only where needed --- src/sysctl/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c index eeefc8b8b0..f601ae5fd5 100644 --- a/src/sysctl/sysctl.c +++ b/src/sysctl/sysctl.c @@ -279,11 +279,11 @@ static int run(int argc, char *argv[]) { if (!sysctl_options) return log_oom(); - r = 0; - if (argc > optind) { int i; + r = 0; + for (i = optind; i < argc; i++) { k = parse_file(sysctl_options, argv[i], false); if (k < 0 && r == 0) From dec02d6e1993d420a0a94c7fec294605df55e88e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 26 Jul 2019 09:17:01 +0200 Subject: [PATCH 3/7] sysctl: if options are prefixed with "-" ignore write errors --- src/sysctl/sysctl.c | 112 +++++++++++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 33 deletions(-) diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c index f601ae5fd5..c67782ea8f 100644 --- a/src/sysctl/sysctl.c +++ b/src/sysctl/sysctl.c @@ -30,25 +30,71 @@ static PagerFlags arg_pager_flags = 0; STATIC_DESTRUCTOR_REGISTER(arg_prefixes, strv_freep); +typedef struct Option { + char *key; + char *value; + bool ignore_failure; +} Option; + +static Option *option_free(Option *o) { + if (!o) + return NULL; + + free(o->key); + free(o->value); + + return mfree(o); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(Option*, option_free); +DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(option_hash_ops, char, string_hash_func, string_compare_func, Option, option_free); + +static Option *option_new( + const char *key, + const char *value, + bool ignore_failure) { + + _cleanup_(option_freep) Option *o = NULL; + + assert(key); + assert(value); + + o = new(Option, 1); + if (!o) + return NULL; + + *o = (Option) { + .key = strdup(key), + .value = strdup(value), + .ignore_failure = ignore_failure, + }; + + if (!o->key || !o->value) + return NULL; + + return TAKE_PTR(o); +} + static int apply_all(OrderedHashmap *sysctl_options) { - char *property, *value; + Option *option; Iterator i; int r = 0; - ORDERED_HASHMAP_FOREACH_KEY(value, property, sysctl_options, i) { + ORDERED_HASHMAP_FOREACH(option, sysctl_options, i) { int k; - k = sysctl_write(property, value); + k = sysctl_write(option->key, option->value); if (k < 0) { - /* If the sysctl is not available in the kernel or we are running with reduced privileges and - * cannot write it, then log about the issue at LOG_NOTICE level, and proceed without - * failing. (EROFS is treated as a permission problem here, since that's how container managers - * usually protected their sysctls.) In all other cases log an error and make the tool fail. */ + /* If the sysctl is not available in the kernel or we are running with reduced + * privileges and cannot write it, then log about the issue at LOG_NOTICE level, and + * proceed without failing. (EROFS is treated as a permission problem here, since + * that's how container managers usually protected their sysctls.) In all other cases + * log an error and make the tool fail. */ - if (IN_SET(k, -EPERM, -EACCES, -EROFS, -ENOENT)) - log_notice_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", value, property); + if (IN_SET(k, -EPERM, -EACCES, -EROFS, -ENOENT) || option->ignore_failure) + log_notice_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", option->value, option->key); else { - log_error_errno(k, "Couldn't write '%s' to '%s': %m", value, property); + log_error_errno(k, "Couldn't write '%s' to '%s': %m", option->value, option->key); if (r == 0) r = k; } @@ -94,9 +140,11 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign log_debug("Parsing %s", path); for (;;) { - char *p, *value, *new_value, *property, *existing; + _cleanup_(option_freep) Option *new_option = NULL; _cleanup_free_ char *l = NULL; - void *v; + bool ignore_failure; + Option *existing; + char *p, *value; int k; k = read_line(f, LONG_LINE_MAX, &l); @@ -125,39 +173,37 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign *value = 0; value++; - p = sysctl_normalize(strstrip(p)); + p = strstrip(p); + ignore_failure = p[0] == '-'; + if (ignore_failure) + p++; + + p = sysctl_normalize(p); value = strstrip(value); if (!test_prefix(p)) continue; - existing = ordered_hashmap_get2(sysctl_options, p, &v); + existing = ordered_hashmap_get(sysctl_options, p); if (existing) { - if (streq(value, existing)) + if (streq(value, existing->value)) { + existing->ignore_failure = existing->ignore_failure || ignore_failure; continue; + } log_debug("Overwriting earlier assignment of %s at '%s:%u'.", p, path, c); - free(ordered_hashmap_remove(sysctl_options, p)); - free(v); + option_free(ordered_hashmap_remove(sysctl_options, p)); } - property = strdup(p); - if (!property) + new_option = option_new(p, value, ignore_failure); + if (!new_option) return log_oom(); - new_value = strdup(value); - if (!new_value) { - free(property); - return log_oom(); - } + k = ordered_hashmap_put(sysctl_options, new_option->key, new_option); + if (k < 0) + return log_error_errno(k, "Failed to add sysctl variable %s to hashmap: %m", p); - k = ordered_hashmap_put(sysctl_options, property, new_value); - if (k < 0) { - log_error_errno(k, "Failed to add sysctl variable %s to hashmap: %m", property); - free(property); - free(new_value); - return k; - } + TAKE_PTR(new_option); } return r; @@ -264,7 +310,7 @@ static int parse_argv(int argc, char *argv[]) { } static int run(int argc, char *argv[]) { - _cleanup_(ordered_hashmap_free_free_freep) OrderedHashmap *sysctl_options = NULL; + _cleanup_(ordered_hashmap_freep) OrderedHashmap *sysctl_options = NULL; int r, k; r = parse_argv(argc, argv); @@ -275,7 +321,7 @@ static int run(int argc, char *argv[]) { umask(0022); - sysctl_options = ordered_hashmap_new(&path_hash_ops); + sysctl_options = ordered_hashmap_new(&option_hash_ops); if (!sysctl_options) return log_oom(); From e08be64937293e3aa8adb08048497520d58445c6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 26 Jul 2019 09:24:11 +0200 Subject: [PATCH 4/7] man: document the new sysctl.d/ - prefix --- man/sysctl.d.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/man/sysctl.d.xml b/man/sysctl.d.xml index 0a8eeb62a4..21ef6de97c 100644 --- a/man/sysctl.d.xml +++ b/man/sysctl.d.xml @@ -59,6 +59,10 @@ /proc/sys/net/ipv4/conf/enp3s0.200/forwarding. + If a variable assignment is prefixed with a single - character, any attempts to + set it that fail will be ignored (though are logged). Moreover, any access permission errors, and + attempts to write variables not defined on the local system are ignored (and logged) too. + The settings configured with sysctl.d files will be applied early on boot. The network interface-specific options will also be applied individually for From 0338934f4bcda6a96a5342449ae96b003de3378d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 26 Jul 2019 09:25:09 +0200 Subject: [PATCH 5/7] Revert "Revert "sysctl: Enable ping(8) inside rootless Podman containers"" This reverts commit be74f51605b4c7cb74fec3a50cd13b67598a8ac1. Let's add this again. With the new sysctl "-" thing we can make this work. --- NEWS | 9 +++++++++ sysctl.d/50-default.conf | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/NEWS b/NEWS index 2a79a2cded..0049be76ca 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,15 @@ systemd System and Service Manager CHANGES WITH 243 in spe: + * This release enables unprivileged programs (i.e. requiring neither + setuid nor file capabilities) to send ICMP Echo (i.e. ping) requests + by turning on the net.ipv4.ping_group_range sysctl of the Linux + kernel for the whole UNIX group range, i.e. all processes. This + change should be reasonably safe, as the kernel support for it was + specifically implemented to allow safe access to ICMP Echo for + processes lacking any privileges. If this is not desirable, it can be + disabled again by setting the parameter to "1 0". + * Previously, filters defined with SystemCallFilter= would have the effect that an calling an offending system call would terminate the calling thread. This behaviour never made much sense, since killing diff --git a/sysctl.d/50-default.conf b/sysctl.d/50-default.conf index 27084f6242..f0b4f610f8 100644 --- a/sysctl.d/50-default.conf +++ b/sysctl.d/50-default.conf @@ -30,6 +30,14 @@ net.ipv4.conf.all.accept_source_route = 0 # Promote secondary addresses when the primary address is removed net.ipv4.conf.all.promote_secondaries = 1 +# ping(8) without CAP_NET_ADMIN and CAP_NET_RAW +# The upper limit is set to 2^31-1. Values greater than that get rejected by +# the kernel because of this definition in linux/include/net/ping.h: +# #define GID_T_MAX (((gid_t)~0U) >> 1) +# That's not so bad because values between 2^31 and 2^32-1 are reserved on +# systemd-based systems anyway: https://systemd.io/UIDS-GIDS.html#summary +net.ipv4.ping_group_range = 0 2147483647 + # Fair Queue CoDel packet scheduler to fight bufferbloat net.core.default_qdisc = fq_codel From 000500c9d6347e0e2cdb92ec48fa10c0bb3ceca8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 26 Jul 2019 09:26:07 +0200 Subject: [PATCH 6/7] sysctl: prefix ping port range setting with a dash Fixes: #13177 --- sysctl.d/50-default.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sysctl.d/50-default.conf b/sysctl.d/50-default.conf index f0b4f610f8..41bd1f9183 100644 --- a/sysctl.d/50-default.conf +++ b/sysctl.d/50-default.conf @@ -36,7 +36,7 @@ net.ipv4.conf.all.promote_secondaries = 1 # #define GID_T_MAX (((gid_t)~0U) >> 1) # That's not so bad because values between 2^31 and 2^32-1 are reserved on # systemd-based systems anyway: https://systemd.io/UIDS-GIDS.html#summary -net.ipv4.ping_group_range = 0 2147483647 +-net.ipv4.ping_group_range = 0 2147483647 # Fair Queue CoDel packet scheduler to fight bufferbloat net.core.default_qdisc = fq_codel From b64c47c03896ce90dba1246da5d56389535d4961 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 26 Jul 2019 09:28:43 +0200 Subject: [PATCH 7/7] NEWS: mention the new sysctl.d/ - prefix --- NEWS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS b/NEWS index 0049be76ca..ac9d0aa7a5 100644 --- a/NEWS +++ b/NEWS @@ -348,6 +348,9 @@ CHANGES WITH 243 in spe: * "localectl list-locales" won't list non-UTF-8 locales anymore. It's 2019. (You can set non-UTF-8 locales though, if you know there name.) + * If variable assignments in sysctl.d/ files are prefixed with "-" any + failures to apply them are now ignored. + Contributions from: Aaron Barany, Adrian Bunk, Alan Jenkins, Andrej Valek, Anita Zhang, Arian van Putten, Balint Reczey, Bastien Nocera, Ben Boeckel, Benjamin Robin, camoz, Chen Qi, Chris Chiu, Chris Down,