From d301c52383ca7cfa6b7cda87d7a4209c234a532c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 5 Nov 2020 18:22:38 +0100 Subject: [PATCH] resolved: bind socket to interface during connect() Apparently, IF_UNICAST_IF does not influence the routing decisions done during connect(). But SO_BINDTODEVICE/SO_BINDTOINDEX does, which however brings a lot of other semantics with it, we are not so interested in (i.e. it doesn't not allow packets from any other iface to us, even if routing otherwise allows it). Hence, let's bind to the ifindex immediately before the connect() and unbind right after again, so that we get the semantics we want, but not the ones we don't. Fixes: #11935 Replaces: #12004 --- src/resolve/resolved-dns-scope.c | 27 +++++++++++++++++++++++++++ src/resolve/resolved.c | 4 ++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c index 3e2fc16f10..f23d0b6579 100644 --- a/src/resolve/resolved-dns-scope.c +++ b/src/resolve/resolved-dns-scope.c @@ -412,9 +412,36 @@ static int dns_scope_socket( if (ret_socket_address) *ret_socket_address = sa; else { + bool bound = false; + + /* Let's temporarily bind the socket to the specified ifindex. The kernel currently takes + * only the SO_BINDTODEVICE/SO_BINDTOINDEX ifindex into account when making routing decisions + * in connect() — and not IP_UNICAST_IF. We don't really want any of the other semantics of + * SO_BINDTODEVICE/SO_BINDTOINDEX, hence we immediately unbind the socket after the fact + * again. + * + * As a special exception we don't do this if we notice that the specified IP address is on + * the local host. SO_BINDTODEVICE in combination with destination addresses on the local + * host result in EHOSTUNREACH, since Linux won't send the packets out of the specified + * interface, but delivers them directly to the local socket. */ + if (s->link && + !manager_find_link_address(s->manager, sa.sa.sa_family, sockaddr_in_addr(&sa.sa))) { + r = socket_bind_to_ifindex(fd, ifindex); + if (r < 0) + return r; + + bound = true; + } + r = connect(fd, &sa.sa, salen); if (r < 0 && errno != EINPROGRESS) return -errno; + + if (bound) { + r = socket_bind_to_ifindex(fd, 0); + if (r < 0) + return r; + } } return TAKE_FD(fd); diff --git a/src/resolve/resolved.c b/src/resolve/resolved.c index fd9be30dcf..4c42b6591d 100644 --- a/src/resolve/resolved.c +++ b/src/resolve/resolved.c @@ -58,7 +58,7 @@ static int run(int argc, char *argv[]) { if (r < 0) return log_error_errno(r, "Could not create runtime directory: %m"); - /* Drop privileges, but keep three caps. Note that we drop those too, later on (see below) */ + /* Drop privileges, but keep three caps. Note that we drop two of those too, later on (see below) */ r = drop_privileges(uid, gid, (UINT64_C(1) << CAP_NET_RAW)| /* needed for SO_BINDTODEVICE */ (UINT64_C(1) << CAP_NET_BIND_SERVICE)| /* needed to bind on port 53 */ @@ -83,7 +83,7 @@ static int run(int argc, char *argv[]) { (void) manager_check_resolv_conf(m); /* Let's drop the remaining caps now */ - r = capability_bounding_set_drop(0, true); + r = capability_bounding_set_drop((UINT64_C(1) << CAP_NET_RAW), true); if (r < 0) return log_error_errno(r, "Failed to drop remaining caps: %m");