From 00df39a56a247a9936ca0c0a79c17cd0dda31daa Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 10 Sep 2020 16:36:35 +0200 Subject: [PATCH 1/3] timesyncd: don't attempt to call IP_TOS sockopt on IPv6 sockets --- src/timesync/timesyncd-manager.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/timesync/timesyncd-manager.c b/src/timesync/timesyncd-manager.c index 637a3b81c6..0fab37d783 100644 --- a/src/timesync/timesyncd-manager.c +++ b/src/timesync/timesyncd-manager.c @@ -647,7 +647,8 @@ static int manager_listen_setup(Manager *m) { if (r < 0) return r; - (void) setsockopt_int(m->server_socket, IPPROTO_IP, IP_TOS, IPTOS_LOWDELAY); + if (addr.sa.sa_family == AF_INET) + (void) setsockopt_int(m->server_socket, IPPROTO_IP, IP_TOS, IPTOS_LOWDELAY); return sd_event_add_io(m->event, &m->event_receive, m->server_socket, EPOLLIN, manager_receive_response, m); } From 5d0fe4233b98301a2f8644824dd1dec0c5fc3403 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 10 Sep 2020 16:31:31 +0200 Subject: [PATCH 2/3] tree-wide: add helper for IPv4/IPv6 sockopts A variety of sockopts exist both for IPv4 and IPv6 but require a different pair of sockopt level/option number. Let's add helpers for these that internally determine the right sockopt to call. This should shorten code that generically wants to support both ipv4 + ipv6 and for the first time adds correct support for some cases where we only called the ipv4 versions, and not the ipv6 options. --- src/basic/missing_network.h | 5 + src/basic/socket-label.c | 8 +- src/basic/socket-util.c | 157 +++++++++++++++++++++++++++++- src/basic/socket-util.h | 9 +- src/core/socket.c | 21 ++-- src/resolve/resolved-dns-scope.c | 51 +++------- src/resolve/resolved-dns-stream.c | 14 +-- src/resolve/resolved-dns-stub.c | 27 ++--- 8 files changed, 202 insertions(+), 90 deletions(-) diff --git a/src/basic/missing_network.h b/src/basic/missing_network.h index 257879405c..a25a1480f0 100644 --- a/src/basic/missing_network.h +++ b/src/basic/missing_network.h @@ -6,6 +6,11 @@ #define IPV6_UNICAST_IF 76 #endif +/* linux/in6.h or netinet/in.h */ +#ifndef IPV6_TRANSPARENT +#define IPV6_TRANSPARENT 75 +#endif + /* Not exposed but defined at include/net/ip.h */ #ifndef IPV4_MIN_MTU #define IPV4_MIN_MTU 68 diff --git a/src/basic/socket-label.c b/src/basic/socket-label.c index ec52d81653..dd69eaaac2 100644 --- a/src/basic/socket-label.c +++ b/src/basic/socket-label.c @@ -80,15 +80,15 @@ int socket_address_listen( } if (free_bind) { - r = setsockopt_int(fd, IPPROTO_IP, IP_FREEBIND, true); + r = socket_set_freebind(fd, socket_address_family(a), true); if (r < 0) - log_warning_errno(r, "IP_FREEBIND failed: %m"); + log_warning_errno(r, "IP_FREEBIND/IPV6_FREEBIND failed: %m"); } if (transparent) { - r = setsockopt_int(fd, IPPROTO_IP, IP_TRANSPARENT, true); + r = socket_set_transparent(fd, socket_address_family(a), true); if (r < 0) - log_warning_errno(r, "IP_TRANSPARENT failed: %m"); + log_warning_errno(r, "IP_TRANSPARENT/IPV6_TRANSPARENT failed: %m"); } } diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c index 7a3299672a..a6552b2687 100644 --- a/src/basic/socket-util.c +++ b/src/basic/socket-util.c @@ -26,6 +26,7 @@ #include "macro.h" #include "memory-util.h" #include "missing_socket.h" +#include "missing_network.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" @@ -1204,13 +1205,28 @@ ssize_t recvmsg_safe(int sockfd, struct msghdr *msg, int flags) { return n; } -int socket_pass_pktinfo(int fd, bool b) { +int socket_get_family(int fd, int *ret) { int af; socklen_t sl = sizeof(af); if (getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &af, &sl) < 0) return -errno; + if (sl != sizeof(af)) + return -EINVAL; + + return af; +} + +int socket_set_recvpktinfo(int fd, int af, bool b) { + int r; + + if (af == AF_UNSPEC) { + r = socket_get_family(fd, &af); + if (r < 0) + return r; + } + switch (af) { case AF_INET: @@ -1226,3 +1242,142 @@ int socket_pass_pktinfo(int fd, bool b) { return -EAFNOSUPPORT; } } + +int socket_set_recverr(int fd, int af, bool b) { + int r; + + if (af == AF_UNSPEC) { + r = socket_get_family(fd, &af); + if (r < 0) + return r; + } + + switch (af) { + + case AF_INET: + return setsockopt_int(fd, IPPROTO_IP, IP_RECVERR, b); + + case AF_INET6: + return setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVERR, b); + + default: + return -EAFNOSUPPORT; + } +} + +int socket_set_recvttl(int fd, int af, bool b) { + int r; + + if (af == AF_UNSPEC) { + r = socket_get_family(fd, &af); + if (r < 0) + return r; + } + + switch (af) { + + case AF_INET: + return setsockopt_int(fd, IPPROTO_IP, IP_RECVTTL, b); + + case AF_INET6: + return setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, b); + + default: + return -EAFNOSUPPORT; + } +} + +int socket_set_ttl(int fd, int af, int ttl) { + int r; + + if (af == AF_UNSPEC) { + r = socket_get_family(fd, &af); + if (r < 0) + return r; + } + + switch (af) { + + case AF_INET: + return setsockopt_int(fd, IPPROTO_IP, IP_TTL, ttl); + + case AF_INET6: + return setsockopt_int(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, ttl); + + default: + return -EAFNOSUPPORT; + } +} + +int socket_set_unicast_if(int fd, int af, int ifi) { + be32_t ifindex_be = htobe32(ifi); + int r; + + if (af == AF_UNSPEC) { + r = socket_get_family(fd, &af); + if (r < 0) + return r; + } + + switch (af) { + + case AF_INET: + if (setsockopt(fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex_be, sizeof(ifindex_be)) < 0) + return -errno; + + return 0; + + case AF_INET6: + if (setsockopt(fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex_be, sizeof(ifindex_be)) < 0) + return -errno; + + return 0; + + default: + return -EAFNOSUPPORT; + } +} + +int socket_set_freebind(int fd, int af, bool b) { + int r; + + if (af == AF_UNSPEC) { + r = socket_get_family(fd, &af); + if (r < 0) + return r; + } + + switch (af) { + + case AF_INET: + return setsockopt_int(fd, IPPROTO_IP, IP_FREEBIND, b); + + case AF_INET6: + return setsockopt_int(fd, IPPROTO_IPV6, IPV6_FREEBIND, b); + + default: + return -EAFNOSUPPORT; + } +} + +int socket_set_transparent(int fd, int af, bool b) { + int r; + + if (af == AF_UNSPEC) { + r = socket_get_family(fd, &af); + if (r < 0) + return r; + } + + switch (af) { + + case AF_INET: + return setsockopt_int(fd, IPPROTO_IP, IP_TRANSPARENT, b); + + case AF_INET6: + return setsockopt_int(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, b); + + default: + return -EAFNOSUPPORT; + } +} diff --git a/src/basic/socket-util.h b/src/basic/socket-util.h index fee9055cec..c36f90f75f 100644 --- a/src/basic/socket-util.h +++ b/src/basic/socket-util.h @@ -261,4 +261,11 @@ int socket_bind_to_ifindex(int fd, int ifindex); ssize_t recvmsg_safe(int sockfd, struct msghdr *msg, int flags); -int socket_pass_pktinfo(int fd, bool b); +int socket_get_family(int fd, int *ret); +int socket_set_recvpktinfo(int fd, int af, bool b); +int socket_set_recverr(int fd, int af, bool b); +int socket_set_recvttl(int fd, int af, bool b); +int socket_set_ttl(int fd, int af, int ttl); +int socket_set_unicast_if(int fd, int af, int ifi); +int socket_set_freebind(int fd, int af, bool b); +int socket_set_transparent(int fd, int af, bool b); diff --git a/src/core/socket.c b/src/core/socket.c index 1da5f304aa..04177ea7a5 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -979,10 +979,11 @@ static void socket_close_fds(Socket *s) { (void) unlink(*i); } -static void socket_apply_socket_options(Socket *s, int fd) { +static void socket_apply_socket_options(Socket *s, SocketPort *p, int fd) { int r; assert(s); + assert(p); assert(fd >= 0); if (s->keep_alive) { @@ -1046,7 +1047,7 @@ static void socket_apply_socket_options(Socket *s, int fd) { } if (s->pass_pktinfo) { - r = socket_pass_pktinfo(fd, true); + r = socket_set_recvpktinfo(fd, socket_address_family(&p->address), true); if (r < 0) log_unit_warning_errno(UNIT(s), r, "Failed to enable packet info socket option: %m"); } @@ -1082,16 +1083,8 @@ static void socket_apply_socket_options(Socket *s, int fd) { } if (s->ip_ttl >= 0) { - int x; - - r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, s->ip_ttl); - - if (socket_ipv6_is_supported()) - x = setsockopt_int(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, s->ip_ttl); - else - x = -EAFNOSUPPORT; - - if (r < 0 && x < 0) + r = socket_set_ttl(fd, socket_address_family(&p->address), s->ip_ttl); + if (r < 0) log_unit_warning_errno(UNIT(s), r, "IP_TTL/IPV6_UNICAST_HOPS failed: %m"); } @@ -1664,7 +1657,7 @@ static int socket_open_fds(Socket *_s) { if (p->fd < 0) return p->fd; - socket_apply_socket_options(s, p->fd); + socket_apply_socket_options(s, p, p->fd); socket_symlink(s); break; @@ -3004,7 +2997,7 @@ static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, if (cfd < 0) goto fail; - socket_apply_socket_options(p->socket, cfd); + socket_apply_socket_options(p->socket, p, cfd); } socket_enter_running(p->socket, cfd); diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c index e69ba3c758..2ad4544002 100644 --- a/src/resolve/resolved-dns-scope.c +++ b/src/resolve/resolved-dns-scope.c @@ -386,54 +386,27 @@ static int dns_scope_socket( } if (s->link) { - be32_t ifindex_be = htobe32(ifindex); - - if (sa.sa.sa_family == AF_INET) { - r = setsockopt(fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex_be, sizeof(ifindex_be)); - if (r < 0) - return -errno; - } else if (sa.sa.sa_family == AF_INET6) { - r = setsockopt(fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex_be, sizeof(ifindex_be)); - if (r < 0) - return -errno; - } + r = socket_set_unicast_if(fd, sa.sa.sa_family, ifindex); + if (r < 0) + return r; } if (s->protocol == DNS_PROTOCOL_LLMNR) { /* RFC 4795, section 2.5 requires the TTL to be set to 1 */ - - if (sa.sa.sa_family == AF_INET) { - r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, 1); - if (r < 0) - return r; - } else if (sa.sa.sa_family == AF_INET6) { - r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1); - if (r < 0) - return r; - } + r = socket_set_ttl(fd, sa.sa.sa_family, 1); + if (r < 0) + return r; } if (type == SOCK_DGRAM) { /* Set IP_RECVERR or IPV6_RECVERR to get ICMP error feedback. See discussion in #10345. */ + r = socket_set_recverr(fd, sa.sa.sa_family, true); + if (r < 0) + return r; - if (sa.sa.sa_family == AF_INET) { - r = setsockopt_int(fd, IPPROTO_IP, IP_RECVERR, true); - if (r < 0) - return r; - - r = setsockopt_int(fd, IPPROTO_IP, IP_PKTINFO, true); - if (r < 0) - return r; - - } else if (sa.sa.sa_family == AF_INET6) { - r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVERR, true); - if (r < 0) - return r; - - r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, true); - if (r < 0) - return r; - } + r = socket_set_recvpktinfo(fd, sa.sa.sa_family, true); + if (r < 0) + return r; } if (ret_socket_address) diff --git a/src/resolve/resolved-dns-stream.c b/src/resolve/resolved-dns-stream.c index a814de6a1f..e6f72f00b4 100644 --- a/src/resolve/resolved-dns-stream.c +++ b/src/resolve/resolved-dns-stream.c @@ -190,18 +190,10 @@ static int dns_stream_identify(DnsStream *s) { s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, s->local.sa.sa_family == AF_INET ? (union in_addr_union*) &s->local.in.sin_addr : (union in_addr_union*) &s->local.in6.sin6_addr); if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) { - be32_t ifindex = htobe32(s->ifindex); - /* Make sure all packets for this connection are sent on the same interface */ - if (s->local.sa.sa_family == AF_INET) { - r = setsockopt(s->fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex, sizeof(ifindex)); - if (r < 0) - log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF: %m"); - } else if (s->local.sa.sa_family == AF_INET6) { - r = setsockopt(s->fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex, sizeof(ifindex)); - if (r < 0) - log_debug_errno(errno, "Failed to invoke IPV6_UNICAST_IF: %m"); - } + r = socket_set_unicast_if(s->fd, s->local.sa.sa_family, s->ifindex); + if (r < 0) + log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF/IPV6_UNICAST_IF: %m"); } s->identified = true; diff --git a/src/resolve/resolved-dns-stub.c b/src/resolve/resolved-dns-stub.c index 572be26c2d..1d23a199e9 100644 --- a/src/resolve/resolved-dns-stub.c +++ b/src/resolve/resolved-dns-stub.c @@ -543,23 +543,13 @@ static int set_dns_stub_common_socket_options(int fd, int family) { if (r < 0) return r; - if (family == AF_INET) { - r = setsockopt_int(fd, IPPROTO_IP, IP_PKTINFO, true); - if (r < 0) - return r; + r = socket_set_recvpktinfo(fd, family, true); + if (r < 0) + return r; - r = setsockopt_int(fd, IPPROTO_IP, IP_RECVTTL, true); - if (r < 0) - return r; - } else { - r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, true); - if (r < 0) - return r; - - r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, true); - if (r < 0) - return r; - } + r = socket_set_recvttl(fd, family, true); + if (r < 0) + return r; return 0; } @@ -661,10 +651,7 @@ static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int ty /* Do not set IP_TTL for extra DNS stub listners, as the address may not be local and in that case * people may want ttl > 1. */ - if (l->family == AF_INET) - r = setsockopt_int(fd, IPPROTO_IP, IP_FREEBIND, true); - else - r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_FREEBIND, true); + r = socket_set_freebind(fd, l->family, true); if (r < 0) goto fail; From c6a7924513e6aa1ff8fbd2d4c644a8d14d4d4149 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 10 Sep 2020 16:35:31 +0200 Subject: [PATCH 3/3] man: always document both the ipv4 and the ipv6 sockopt --- man/daemon.xml | 50 +++++++++++++++++------------------------- man/systemd.socket.xml | 45 +++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 55 deletions(-) diff --git a/man/daemon.xml b/man/daemon.xml index 072529eeec..b5ae08473d 100644 --- a/man/daemon.xml +++ b/man/daemon.xml @@ -448,37 +448,27 @@ Other Forms of Activation - Other forms of activation have been suggested and - implemented in some systems. However, there are often simpler or - better alternatives, or they can be put together of combinations - of the schemes above. Example: Sometimes, it appears useful to - start daemons or .socket units when a - specific IP address is configured on a network interface, - because network sockets shall be bound to the address. However, - an alternative to implement this is by utilizing the Linux - IP_FREEBIND socket option, as accessible - via FreeBind=yes in systemd socket files (see - systemd.socket5 - for details). This option, when enabled, allows sockets to be - bound to a non-local, not configured IP address, and hence - allows bindings to a particular IP address before it actually - becomes available, making such an explicit dependency to the - configured address redundant. Another often suggested trigger - for service activation is low system load. However, here too, a - more convincing approach might be to make proper use of features - of the operating system, in particular, the CPU or I/O scheduler - of Linux. Instead of scheduling jobs from userspace based on - monitoring the OS scheduler, it is advisable to leave the - scheduling of processes to the OS scheduler itself. systemd - provides fine-grained access to the CPU and I/O schedulers. If a - process executed by the init system shall not negatively impact - the amount of CPU or I/O bandwidth available to other processes, - it should be configured with + Other forms of activation have been suggested and implemented in some systems. However, there are + often simpler or better alternatives, or they can be put together of combinations of the schemes + above. Example: Sometimes, it appears useful to start daemons or .socket units + when a specific IP address is configured on a network interface, because network sockets shall be bound + to the address. However, an alternative to implement this is by utilizing the Linux + IP_FREEBIND/IPV6_FREEBIND socket option, as accessible via + FreeBind=yes in systemd socket files (see + systemd.socket5 for + details). This option, when enabled, allows sockets to be bound to a non-local, not configured IP + address, and hence allows bindings to a particular IP address before it actually becomes available, + making such an explicit dependency to the configured address redundant. Another often suggested trigger + for service activation is low system load. However, here too, a more convincing approach might be to + make proper use of features of the operating system, in particular, the CPU or I/O scheduler of + Linux. Instead of scheduling jobs from userspace based on monitoring the OS scheduler, it is advisable + to leave the scheduling of processes to the OS scheduler itself. systemd provides fine-grained access + to the CPU and I/O schedulers. If a process executed by the init system shall not negatively impact the + amount of CPU or I/O bandwidth available to other processes, it should be configured with CPUSchedulingPolicy=idle and/or - IOSchedulingClass=idle. Optionally, this may - be combined with timer-based activation to schedule background - jobs during runtime and with minimal impact on the system, and - remove it from the boot phase itself. + IOSchedulingClass=idle. Optionally, this may be combined with timer-based activation + to schedule background jobs during runtime and with minimal impact on the system, and remove it from + the boot phase itself. diff --git a/man/systemd.socket.xml b/man/systemd.socket.xml index 253da8a6d4..3aff453185 100644 --- a/man/systemd.socket.xml +++ b/man/systemd.socket.xml @@ -568,26 +568,23 @@ IPTOS= - Takes an integer argument controlling the IP - Type-Of-Service field for packets generated from this socket. - This controls the IP_TOS socket option (see - ip7 - for details.). Either a numeric string or one of - , , - or may - be specified. + Takes an integer argument controlling the IP Type-Of-Service field for packets + generated from this socket. This controls the IP_TOS socket option (see + ip7 for + details.). Either a numeric string or one of , , + or may be specified. IPTTL= - Takes an integer argument controlling the IPv4 - Time-To-Live/IPv6 Hop-Count field for packets generated from - this socket. This sets the IP_TTL/IPV6_UNICAST_HOPS socket - options (see - ip7 - and - ipv67 - for details.) + Takes an integer argument controlling the IPv4 Time-To-Live/IPv6 Hop-Count field for + packets generated from this socket. This sets the + IP_TTL/IPV6_UNICAST_HOPS socket options (see ip7 and + ipv67 for + details.) @@ -662,20 +659,18 @@ FreeBind= - Takes a boolean value. Controls whether the - socket can be bound to non-local IP addresses. This is useful - to configure sockets listening on specific IP addresses before - those IP addresses are successfully configured on a network - interface. This sets the IP_FREEBIND socket option. For - robustness reasons it is recommended to use this option - whenever you bind a socket to a specific IP address. Defaults - to . + Takes a boolean value. Controls whether the socket can be bound to non-local IP + addresses. This is useful to configure sockets listening on specific IP addresses before those IP + addresses are successfully configured on a network interface. This sets the + IP_FREEBIND/IPV6_FREEBIND socket option. For robustness + reasons it is recommended to use this option whenever you bind a socket to a specific IP + address. Defaults to . Transparent= Takes a boolean value. Controls the - IP_TRANSPARENT socket option. Defaults to + IP_TRANSPARENT/IPV6_TRANSPARENT socket option. Defaults to .