Systemd/src/resolve/resolved-dns-stream.c

600 lines
21 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: LGPL-2.1+ */
#include <netinet/tcp.h>
2019-10-31 03:07:23 +01:00
#include <unistd.h>
#include "alloc-util.h"
#include "fd-util.h"
#include "io-util.h"
2019-10-31 03:07:23 +01:00
#include "missing_network.h"
#include "resolved-dns-stream.h"
2020-05-06 19:10:59 +02:00
#include "resolved-manager.h"
#define DNS_STREAM_TIMEOUT_USEC (10 * USEC_PER_SEC)
#define DNS_STREAMS_MAX 128
#define DNS_QUERIES_PER_STREAM 32
static void dns_stream_stop(DnsStream *s) {
assert(s);
s->io_event_source = sd_event_source_unref(s->io_event_source);
s->timeout_event_source = sd_event_source_unref(s->timeout_event_source);
s->fd = safe_close(s->fd);
/* Disconnect us from the server object if we are now not usable anymore */
dns_stream_detach(s);
}
static int dns_stream_update_io(DnsStream *s) {
int f = 0;
assert(s);
if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
f |= EPOLLOUT;
else if (!ordered_set_isempty(s->write_queue)) {
dns_packet_unref(s->write_packet);
s->write_packet = ordered_set_steal_first(s->write_queue);
s->write_size = htobe16(s->write_packet->size);
s->n_written = 0;
f |= EPOLLOUT;
}
/* Let's read a packet if we haven't queued any yet. Except if we already hit a limit of parallel
* queries for this connection. */
if ((!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size) &&
set_size(s->queries) < DNS_QUERIES_PER_STREAM)
f |= EPOLLIN;
#if ENABLE_DNS_OVER_TLS
/* For handshake and clean closing purposes, TLS can override requested events */
if (s->dnstls_events != 0)
f = s->dnstls_events;
#endif
return sd_event_source_set_io_events(s->io_event_source, f);
}
static int dns_stream_complete(DnsStream *s, int error) {
_cleanup_(dns_stream_unrefp) _unused_ DnsStream *ref = dns_stream_ref(s); /* Protect stream while we process it */
assert(s);
assert(error >= 0);
/* Error is > 0 when the connection failed for some reason in the network stack. It's == 0 if we sent
2019-04-27 02:22:40 +02:00
* and received exactly one packet each (in the LLMNR client case). */
#if ENABLE_DNS_OVER_TLS
if (s->encrypted) {
int r;
r = dnstls_stream_shutdown(s, error);
if (r != -EAGAIN)
dns_stream_stop(s);
} else
#endif
dns_stream_stop(s);
dns_stream_detach(s);
if (s->complete)
s->complete(s, error);
resolved: respond to local resolver requests on 127.0.0.53:53 In order to improve compatibility with local clients that speak DNS directly (and do not use NSS or our bus API) listen locally on 127.0.0.53:53 and process any queries made that way. Note that resolved does not implement a full DNS server on this port, but simply enough to allow normal, local clients to resolve RRs through resolved. Specifically it does not implement queries without the RD bit set (these are requests where recursive lookups are explicitly disabled), and neither queries with DNSSEC DO set in combination with DNSSEC CD (i.e. DNSSEC lookups with validation turned off). It also refuses zone transfers and obsolete RR types. All lookups done this way will be rejected with a clean error code, so that the client side can repeat the query with a reduced feature set. The code will set the DNSSEC AD flag however, depending on whether the data resolved has been validated (or comes from a local, trusted source). Lookups made via this mechanisms are propagated to LLMNR and mDNS as necessary, but this is only partially useful as DNS packets cannot carry IP scope data (i.e. the ifindex), and hence link-local addresses returned cannot be used properly (and given that LLMNR/mDNS are mostly about link-local communication this is quite a limitation). Also, given that DNS tends to use IDNA for non-ASCII names, while LLMNR/mDNS uses UTF-8 lookups cannot be mapped 1:1. In general this should improve compatibility with clients bypassing NSS but it is highly recommended for clients to instead use NSS or our native bus API. This patch also beefs up the DnsStream logic, as it reuses the code for local TCP listening. DnsStream now provides proper reference counting for its objects. In order to avoid feedback loops resolved will no silently ignore 127.0.0.53 specified as DNS server when reading configuration. resolved listens on 127.0.0.53:53 instead of 127.0.0.1:53 in order to leave the latter free for local, external DNS servers or forwarders. This also changes the "etc.conf" tmpfiles snippet to create a symlink from /etc/resolv.conf to /usr/lib/systemd/resolv.conf by default, thus making this stub the default mode of operation if /etc is not populated.
2016-06-21 00:58:47 +02:00
else /* the default action if no completion function is set is to close the stream */
dns_stream_unref(s);
return 0;
}
static int dns_stream_identify(DnsStream *s) {
CMSG_BUFFER_TYPE(CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
+ CMSG_SPACE(int) + /* for the TTL */
+ EXTRA_CMSG_SPACE /* kernel appears to require extra space */) control;
struct msghdr mh = {};
struct cmsghdr *cmsg;
socklen_t sl;
int r;
assert(s);
if (s->identified)
return 0;
/* Query the local side */
s->local_salen = sizeof(s->local);
r = getsockname(s->fd, &s->local.sa, &s->local_salen);
if (r < 0)
return -errno;
if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
s->ifindex = s->local.in6.sin6_scope_id;
/* Query the remote side */
s->peer_salen = sizeof(s->peer);
r = getpeername(s->fd, &s->peer.sa, &s->peer_salen);
if (r < 0)
return -errno;
if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0)
s->ifindex = s->peer.in6.sin6_scope_id;
/* Check consistency */
assert(s->peer.sa.sa_family == s->local.sa.sa_family);
assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
/* Query connection meta information */
sl = sizeof(control);
if (s->peer.sa.sa_family == AF_INET) {
r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
if (r < 0)
return -errno;
} else if (s->peer.sa.sa_family == AF_INET6) {
r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
if (r < 0)
return -errno;
} else
return -EAFNOSUPPORT;
mh.msg_control = &control;
mh.msg_controllen = sl;
CMSG_FOREACH(cmsg, &mh) {
if (cmsg->cmsg_level == IPPROTO_IPV6) {
assert(s->peer.sa.sa_family == AF_INET6);
switch (cmsg->cmsg_type) {
case IPV6_PKTINFO: {
struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
if (s->ifindex <= 0)
s->ifindex = i->ipi6_ifindex;
break;
}
case IPV6_HOPLIMIT:
s->ttl = *(int *) CMSG_DATA(cmsg);
break;
}
} else if (cmsg->cmsg_level == IPPROTO_IP) {
assert(s->peer.sa.sa_family == AF_INET);
switch (cmsg->cmsg_type) {
case IP_PKTINFO: {
struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
if (s->ifindex <= 0)
s->ifindex = i->ipi_ifindex;
break;
}
case IP_TTL:
s->ttl = *(int *) CMSG_DATA(cmsg);
break;
}
}
}
/* The Linux kernel sets the interface index to the loopback
* device if the connection came from the local host since it
* avoids the routing table in such a case. Let's unset the
* interface index in such a case. */
if (s->ifindex == LOOPBACK_IFINDEX)
s->ifindex = 0;
/* If we don't know the interface index still, we look for the
* first local interface with a matching address. Yuck! */
if (s->ifindex <= 0)
s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, s->local.sa.sa_family == AF_INET ? (union in_addr_union*) &s->local.in.sin_addr : (union in_addr_union*) &s->local.in6.sin6_addr);
if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
2020-07-02 09:12:06 +02:00
be32_t ifindex = htobe32(s->ifindex);
/* Make sure all packets for this connection are sent on the same interface */
if (s->local.sa.sa_family == AF_INET) {
r = setsockopt(s->fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex, sizeof(ifindex));
if (r < 0)
log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF: %m");
} else if (s->local.sa.sa_family == AF_INET6) {
r = setsockopt(s->fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex, sizeof(ifindex));
if (r < 0)
log_debug_errno(errno, "Failed to invoke IPV6_UNICAST_IF: %m");
}
}
s->identified = true;
return 0;
}
ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags) {
ssize_t m;
assert(s);
assert(iov);
#if ENABLE_DNS_OVER_TLS
if (s->encrypted && !(flags & DNS_STREAM_WRITE_TLS_DATA)) {
ssize_t ss;
size_t i;
m = 0;
for (i = 0; i < iovcnt; i++) {
ss = dnstls_stream_write(s, iov[i].iov_base, iov[i].iov_len);
if (ss < 0)
return ss;
m += ss;
if (ss != (ssize_t) iov[i].iov_len)
continue;
}
} else
#endif
if (s->tfo_salen > 0) {
struct msghdr hdr = {
.msg_iov = (struct iovec*) iov,
.msg_iovlen = iovcnt,
.msg_name = &s->tfo_address.sa,
.msg_namelen = s->tfo_salen
};
m = sendmsg(s->fd, &hdr, MSG_FASTOPEN);
if (m < 0) {
if (errno == EOPNOTSUPP) {
s->tfo_salen = 0;
if (connect(s->fd, &s->tfo_address.sa, s->tfo_salen) < 0)
return -errno;
return -EAGAIN;
}
if (errno == EINPROGRESS)
return -EAGAIN;
return -errno;
} else
s->tfo_salen = 0; /* connection is made */
} else {
m = writev(s->fd, iov, iovcnt);
if (m < 0)
return -errno;
}
return m;
}
static ssize_t dns_stream_read(DnsStream *s, void *buf, size_t count) {
ssize_t ss;
#if ENABLE_DNS_OVER_TLS
if (s->encrypted)
ss = dnstls_stream_read(s, buf, count);
else
#endif
{
ss = read(s->fd, buf, count);
if (ss < 0)
2018-12-04 20:21:02 +01:00
return -errno;
}
return ss;
}
static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
DnsStream *s = userdata;
assert(s);
return dns_stream_complete(s, ETIMEDOUT);
}
static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
_cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
bool progressed = false;
int r;
assert(s);
#if ENABLE_DNS_OVER_TLS
if (s->encrypted) {
r = dnstls_stream_on_io(s, revents);
if (r == DNSTLS_STREAM_CLOSED)
return 0;
if (r == -EAGAIN)
return dns_stream_update_io(s);
if (r < 0)
return dns_stream_complete(s, -r);
r = dns_stream_update_io(s);
if (r < 0)
return r;
}
#endif
/* only identify after connecting */
if (s->tfo_salen == 0) {
r = dns_stream_identify(s);
if (r < 0)
return dns_stream_complete(s, -r);
}
if ((revents & EPOLLOUT) &&
s->write_packet &&
s->n_written < sizeof(s->write_size) + s->write_packet->size) {
struct iovec iov[2];
ssize_t ss;
iov[0] = IOVEC_MAKE(&s->write_size, sizeof(s->write_size));
iov[1] = IOVEC_MAKE(DNS_PACKET_DATA(s->write_packet), s->write_packet->size);
IOVEC_INCREMENT(iov, 2, s->n_written);
ss = dns_stream_writev(s, iov, 2, 0);
if (ss < 0) {
if (!IN_SET(-ss, EINTR, EAGAIN))
return dns_stream_complete(s, -ss);
} else {
progressed = true;
s->n_written += ss;
}
/* Are we done? If so, disable the event source for EPOLLOUT */
if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
r = dns_stream_update_io(s);
if (r < 0)
return dns_stream_complete(s, -r);
}
}
if ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
(!s->read_packet ||
s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
if (s->n_read < sizeof(s->read_size)) {
ssize_t ss;
ss = dns_stream_read(s, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
if (ss < 0) {
if (!IN_SET(-ss, EINTR, EAGAIN))
return dns_stream_complete(s, -ss);
} else if (ss == 0)
return dns_stream_complete(s, ECONNRESET);
else {
progressed = true;
s->n_read += ss;
}
}
if (s->n_read >= sizeof(s->read_size)) {
if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
return dns_stream_complete(s, EBADMSG);
if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
ssize_t ss;
if (!s->read_packet) {
r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size), DNS_PACKET_SIZE_MAX);
if (r < 0)
return dns_stream_complete(s, -r);
s->read_packet->size = be16toh(s->read_size);
s->read_packet->ipproto = IPPROTO_TCP;
s->read_packet->family = s->peer.sa.sa_family;
s->read_packet->ttl = s->ttl;
s->read_packet->ifindex = s->ifindex;
if (s->read_packet->family == AF_INET) {
s->read_packet->sender.in = s->peer.in.sin_addr;
s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
s->read_packet->destination.in = s->local.in.sin_addr;
s->read_packet->destination_port = be16toh(s->local.in.sin_port);
} else {
assert(s->read_packet->family == AF_INET6);
s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
s->read_packet->destination.in6 = s->local.in6.sin6_addr;
s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
if (s->read_packet->ifindex == 0)
s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
if (s->read_packet->ifindex == 0)
s->read_packet->ifindex = s->local.in6.sin6_scope_id;
}
}
ss = dns_stream_read(s,
(uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
if (ss < 0) {
if (!IN_SET(-ss, EINTR, EAGAIN))
return dns_stream_complete(s, -ss);
} else if (ss == 0)
return dns_stream_complete(s, ECONNRESET);
else
s->n_read += ss;
}
/* Are we done? If so, disable the event source for EPOLLIN */
if (s->n_read >= sizeof(s->read_size) + be16toh(s->read_size)) {
/* If there's a packet handler
* installed, call that. Note that
* this is optional... */
if (s->on_packet) {
r = s->on_packet(s);
if (r < 0)
return r;
}
r = dns_stream_update_io(s);
if (r < 0)
return dns_stream_complete(s, -r);
}
}
}
/* Call "complete" callback if finished reading and writing one packet, and there's nothing else left
* to write. */
if (s->type == DNS_STREAM_LLMNR_SEND &&
(s->write_packet && s->n_written >= sizeof(s->write_size) + s->write_packet->size) &&
ordered_set_isempty(s->write_queue) &&
(s->read_packet && s->n_read >= sizeof(s->read_size) + s->read_packet->size))
return dns_stream_complete(s, 0);
/* If we did something, let's restart the timeout event source */
if (progressed && s->timeout_event_source) {
r = sd_event_source_set_time_relative(s->timeout_event_source, DNS_STREAM_TIMEOUT_USEC);
if (r < 0)
log_warning_errno(errno, "Couldn't restart TCP connection timeout, ignoring: %m");
}
return 0;
}
static DnsStream *dns_stream_free(DnsStream *s) {
DnsPacket *p;
Iterator i;
assert(s);
resolved: respond to local resolver requests on 127.0.0.53:53 In order to improve compatibility with local clients that speak DNS directly (and do not use NSS or our bus API) listen locally on 127.0.0.53:53 and process any queries made that way. Note that resolved does not implement a full DNS server on this port, but simply enough to allow normal, local clients to resolve RRs through resolved. Specifically it does not implement queries without the RD bit set (these are requests where recursive lookups are explicitly disabled), and neither queries with DNSSEC DO set in combination with DNSSEC CD (i.e. DNSSEC lookups with validation turned off). It also refuses zone transfers and obsolete RR types. All lookups done this way will be rejected with a clean error code, so that the client side can repeat the query with a reduced feature set. The code will set the DNSSEC AD flag however, depending on whether the data resolved has been validated (or comes from a local, trusted source). Lookups made via this mechanisms are propagated to LLMNR and mDNS as necessary, but this is only partially useful as DNS packets cannot carry IP scope data (i.e. the ifindex), and hence link-local addresses returned cannot be used properly (and given that LLMNR/mDNS are mostly about link-local communication this is quite a limitation). Also, given that DNS tends to use IDNA for non-ASCII names, while LLMNR/mDNS uses UTF-8 lookups cannot be mapped 1:1. In general this should improve compatibility with clients bypassing NSS but it is highly recommended for clients to instead use NSS or our native bus API. This patch also beefs up the DnsStream logic, as it reuses the code for local TCP listening. DnsStream now provides proper reference counting for its objects. In order to avoid feedback loops resolved will no silently ignore 127.0.0.53 specified as DNS server when reading configuration. resolved listens on 127.0.0.53:53 instead of 127.0.0.1:53 in order to leave the latter free for local, external DNS servers or forwarders. This also changes the "etc.conf" tmpfiles snippet to create a symlink from /etc/resolv.conf to /usr/lib/systemd/resolv.conf by default, thus making this stub the default mode of operation if /etc is not populated.
2016-06-21 00:58:47 +02:00
dns_stream_stop(s);
if (s->manager) {
LIST_REMOVE(streams, s->manager->dns_streams, s);
s->manager->n_dns_streams[s->type]--;
}
#if ENABLE_DNS_OVER_TLS
if (s->encrypted)
dnstls_stream_free(s);
#endif
ORDERED_SET_FOREACH(p, s->write_queue, i)
dns_packet_unref(ordered_set_remove(s->write_queue, p));
dns_packet_unref(s->write_packet);
dns_packet_unref(s->read_packet);
dns_server_unref(s->server);
ordered_set_free(s->write_queue);
2016-10-17 00:28:30 +02:00
return mfree(s);
}
DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream, dns_stream, dns_stream_free);
int dns_stream_new(
Manager *m,
DnsStream **ret,
DnsStreamType type,
DnsProtocol protocol,
int fd,
const union sockaddr_union *tfo_address) {
resolved: respond to local resolver requests on 127.0.0.53:53 In order to improve compatibility with local clients that speak DNS directly (and do not use NSS or our bus API) listen locally on 127.0.0.53:53 and process any queries made that way. Note that resolved does not implement a full DNS server on this port, but simply enough to allow normal, local clients to resolve RRs through resolved. Specifically it does not implement queries without the RD bit set (these are requests where recursive lookups are explicitly disabled), and neither queries with DNSSEC DO set in combination with DNSSEC CD (i.e. DNSSEC lookups with validation turned off). It also refuses zone transfers and obsolete RR types. All lookups done this way will be rejected with a clean error code, so that the client side can repeat the query with a reduced feature set. The code will set the DNSSEC AD flag however, depending on whether the data resolved has been validated (or comes from a local, trusted source). Lookups made via this mechanisms are propagated to LLMNR and mDNS as necessary, but this is only partially useful as DNS packets cannot carry IP scope data (i.e. the ifindex), and hence link-local addresses returned cannot be used properly (and given that LLMNR/mDNS are mostly about link-local communication this is quite a limitation). Also, given that DNS tends to use IDNA for non-ASCII names, while LLMNR/mDNS uses UTF-8 lookups cannot be mapped 1:1. In general this should improve compatibility with clients bypassing NSS but it is highly recommended for clients to instead use NSS or our native bus API. This patch also beefs up the DnsStream logic, as it reuses the code for local TCP listening. DnsStream now provides proper reference counting for its objects. In order to avoid feedback loops resolved will no silently ignore 127.0.0.53 specified as DNS server when reading configuration. resolved listens on 127.0.0.53:53 instead of 127.0.0.1:53 in order to leave the latter free for local, external DNS servers or forwarders. This also changes the "etc.conf" tmpfiles snippet to create a symlink from /etc/resolv.conf to /usr/lib/systemd/resolv.conf by default, thus making this stub the default mode of operation if /etc is not populated.
2016-06-21 00:58:47 +02:00
_cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
int r;
assert(m);
2018-12-04 19:29:15 +01:00
assert(ret);
assert(type >= 0);
assert(type < _DNS_STREAM_TYPE_MAX);
assert(protocol >= 0);
assert(protocol < _DNS_PROTOCOL_MAX);
assert(fd >= 0);
if (m->n_dns_streams[type] > DNS_STREAMS_MAX)
return -EBUSY;
s = new(DnsStream, 1);
if (!s)
return -ENOMEM;
*s = (DnsStream) {
.n_ref = 1,
.fd = -1,
.protocol = protocol,
.type = type,
};
r = ordered_set_ensure_allocated(&s->write_queue, &dns_packet_hash_ops);
if (r < 0)
return r;
r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
if (r < 0)
return r;
(void) sd_event_source_set_description(s->io_event_source, "dns-stream-io");
r = sd_event_add_time_relative(
m->event,
&s->timeout_event_source,
clock_boottime_or_monotonic(),
DNS_STREAM_TIMEOUT_USEC, 0,
on_stream_timeout, s);
if (r < 0)
return r;
(void) sd_event_source_set_description(s->timeout_event_source, "dns-stream-timeout");
LIST_PREPEND(streams, m->dns_streams, s);
m->n_dns_streams[type]++;
s->manager = m;
s->fd = fd;
if (tfo_address) {
s->tfo_address = *tfo_address;
s->tfo_salen = tfo_address->sa.sa_family == AF_INET6 ? sizeof(tfo_address->in6) : sizeof(tfo_address->in);
}
*ret = TAKE_PTR(s);
return 0;
}
int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
int r;
assert(s);
2018-12-04 19:29:15 +01:00
assert(p);
r = ordered_set_put(s->write_queue, p);
if (r < 0)
return r;
dns_packet_ref(p);
return dns_stream_update_io(s);
}
DnsPacket *dns_stream_take_read_packet(DnsStream *s) {
assert(s);
if (!s->read_packet)
return NULL;
if (s->n_read < sizeof(s->read_size))
return NULL;
if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size))
return NULL;
s->n_read = 0;
return TAKE_PTR(s->read_packet);
}
void dns_stream_detach(DnsStream *s) {
assert(s);
if (!s->server)
return;
if (s->server->stream != s)
return;
dns_server_unref_stream(s->server);
}