2014-07-16 00:26:02 +02:00
|
|
|
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
|
|
|
|
|
|
|
|
/***
|
|
|
|
This file is part of systemd.
|
|
|
|
|
|
|
|
Copyright 2014 Lennart Poettering
|
|
|
|
|
|
|
|
systemd is free software; you can redistribute it and/or modify it
|
|
|
|
under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
systemd is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
***/
|
|
|
|
|
2015-10-27 03:01:06 +01:00
|
|
|
#include "alloc-util.h"
|
2014-07-16 00:26:02 +02:00
|
|
|
#include "resolved-dns-server.h"
|
2015-11-24 17:03:12 +01:00
|
|
|
#include "resolved-resolv-conf.h"
|
2015-10-27 03:01:06 +01:00
|
|
|
#include "siphash24.h"
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
#include "string-table.h"
|
2015-11-24 17:03:12 +01:00
|
|
|
#include "string-util.h"
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2015-07-28 02:32:24 +02:00
|
|
|
/* After how much time to repeat classic DNS requests */
|
|
|
|
#define DNS_TIMEOUT_MIN_USEC (500 * USEC_PER_MSEC)
|
|
|
|
#define DNS_TIMEOUT_MAX_USEC (5 * USEC_PER_SEC)
|
|
|
|
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
/* The amount of time to wait before retrying with a full feature set */
|
|
|
|
#define DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC (6 * USEC_PER_HOUR)
|
|
|
|
#define DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC (5 * USEC_PER_MINUTE)
|
|
|
|
|
|
|
|
/* The number of times we will attempt a certain feature set before degrading */
|
|
|
|
#define DNS_SERVER_FEATURE_RETRY_ATTEMPTS 3
|
|
|
|
|
2014-07-16 00:26:02 +02:00
|
|
|
int dns_server_new(
|
|
|
|
Manager *m,
|
|
|
|
DnsServer **ret,
|
2014-08-01 16:04:12 +02:00
|
|
|
DnsServerType type,
|
2014-07-16 00:26:02 +02:00
|
|
|
Link *l,
|
2014-07-18 16:09:30 +02:00
|
|
|
int family,
|
2014-07-18 13:59:49 +02:00
|
|
|
const union in_addr_union *in_addr) {
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2015-11-24 21:39:14 +01:00
|
|
|
DnsServer *s;
|
2014-07-16 00:26:02 +02:00
|
|
|
|
|
|
|
assert(m);
|
2014-08-01 16:04:12 +02:00
|
|
|
assert((type == DNS_SERVER_LINK) == !!l);
|
2014-07-16 00:26:02 +02:00
|
|
|
assert(in_addr);
|
|
|
|
|
2015-11-24 21:39:14 +01:00
|
|
|
if (!IN_SET(family, AF_INET, AF_INET6))
|
|
|
|
return -EAFNOSUPPORT;
|
|
|
|
|
|
|
|
if (l) {
|
|
|
|
if (l->n_dns_servers >= LINK_DNS_SERVERS_MAX)
|
|
|
|
return -E2BIG;
|
|
|
|
} else {
|
|
|
|
if (m->n_dns_servers >= MANAGER_DNS_SERVERS_MAX)
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
|
2014-07-16 00:26:02 +02:00
|
|
|
s = new0(DnsServer, 1);
|
|
|
|
if (!s)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-06-24 18:41:46 +02:00
|
|
|
s->n_ref = 1;
|
2015-11-24 20:50:37 +01:00
|
|
|
s->manager = m;
|
2015-12-27 01:35:00 +01:00
|
|
|
s->verified_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID;
|
|
|
|
s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_BEST;
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
s->features_grace_period_usec = DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC;
|
resolved: announce support for large UDP packets
This is often needed for proper DNSSEC support, and even to handle AAAA records
without falling back to TCP.
If the path between the client and server is fully compliant, this should always
work, however, that is not the case, and overlarge packets will get mysteriously
lost in some cases.
For that reason, we use a similar fallback mechanism as we do for palin EDNS0,
EDNS0+DO, etc.:
The large UDP size feature is different from the other supported feature, as we
cannot simply verify that it works based on receiving a reply (as the server
will usually send us much smaller packets than what we claim to support, so
simply receiving a reply does not mean much).
For that reason, we keep track of the largest UDP packet we ever received, as this
is the smallest known good size (defaulting to the standard 512 bytes). If
announcing the default large size of 4096 fails (in the same way as the other
features), we fall back to the known good size. The same logic of retrying after a
grace-period applies.
2015-07-06 16:48:24 +02:00
|
|
|
s->received_udp_packet_max = DNS_PACKET_UNICAST_SIZE_MAX;
|
2014-08-01 16:04:12 +02:00
|
|
|
s->type = type;
|
2014-07-16 00:26:02 +02:00
|
|
|
s->family = family;
|
|
|
|
s->address = *in_addr;
|
2015-07-28 02:32:24 +02:00
|
|
|
s->resend_timeout = DNS_TIMEOUT_MIN_USEC;
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2015-11-24 20:50:37 +01:00
|
|
|
switch (type) {
|
|
|
|
|
|
|
|
case DNS_SERVER_LINK:
|
|
|
|
s->link = l;
|
2015-11-24 21:39:14 +01:00
|
|
|
LIST_APPEND(servers, l->dns_servers, s);
|
|
|
|
l->n_dns_servers++;
|
2015-11-24 20:50:37 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_SYSTEM:
|
2015-11-24 21:39:14 +01:00
|
|
|
LIST_APPEND(servers, m->dns_servers, s);
|
|
|
|
m->n_dns_servers++;
|
2015-11-24 20:50:37 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_FALLBACK:
|
2015-11-24 21:39:14 +01:00
|
|
|
LIST_APPEND(servers, m->fallback_dns_servers, s);
|
|
|
|
m->n_dns_servers++;
|
2015-11-24 20:50:37 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2014-08-01 16:04:12 +02:00
|
|
|
assert_not_reached("Unknown server type");
|
2015-11-24 20:50:37 +01:00
|
|
|
}
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
s->linked = true;
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2014-08-01 16:04:12 +02:00
|
|
|
/* A new DNS server that isn't fallback is added and the one
|
|
|
|
* we used so far was a fallback one? Then let's try to pick
|
|
|
|
* the new one */
|
|
|
|
if (type != DNS_SERVER_FALLBACK &&
|
2014-08-12 19:32:55 +02:00
|
|
|
m->current_dns_server &&
|
|
|
|
m->current_dns_server->type == DNS_SERVER_FALLBACK)
|
|
|
|
manager_set_dns_server(m, NULL);
|
2014-08-01 16:04:12 +02:00
|
|
|
|
2014-07-16 00:26:02 +02:00
|
|
|
if (ret)
|
|
|
|
*ret = s;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-06-24 18:41:46 +02:00
|
|
|
DnsServer* dns_server_ref(DnsServer *s) {
|
2014-07-16 00:26:02 +02:00
|
|
|
if (!s)
|
|
|
|
return NULL;
|
|
|
|
|
2015-06-24 18:41:46 +02:00
|
|
|
assert(s->n_ref > 0);
|
|
|
|
s->n_ref ++;
|
2015-05-18 23:23:17 +02:00
|
|
|
|
2015-06-24 18:41:46 +02:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
DnsServer* dns_server_unref(DnsServer *s) {
|
2015-06-24 18:41:46 +02:00
|
|
|
if (!s)
|
|
|
|
return NULL;
|
2014-08-12 19:32:55 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
assert(s->n_ref > 0);
|
|
|
|
s->n_ref --;
|
2015-06-24 18:41:46 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
if (s->n_ref > 0)
|
|
|
|
return NULL;
|
2014-07-16 00:26:02 +02:00
|
|
|
|
2016-01-08 20:59:03 +01:00
|
|
|
free(s->server_string);
|
2014-07-16 00:26:02 +02:00
|
|
|
free(s);
|
|
|
|
return NULL;
|
|
|
|
}
|
2014-08-12 12:21:10 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
void dns_server_unlink(DnsServer *s) {
|
|
|
|
assert(s);
|
|
|
|
assert(s->manager);
|
2015-06-24 18:41:46 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
/* This removes the specified server from the linked list of
|
|
|
|
* servers, but any server might still stay around if it has
|
|
|
|
* refs, for example from an ongoing transaction. */
|
2015-06-24 18:41:46 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
if (!s->linked)
|
|
|
|
return;
|
2015-06-24 18:41:46 +02:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
switch (s->type) {
|
|
|
|
|
|
|
|
case DNS_SERVER_LINK:
|
|
|
|
assert(s->link);
|
2015-11-24 21:39:14 +01:00
|
|
|
assert(s->link->n_dns_servers > 0);
|
2015-11-24 17:59:40 +01:00
|
|
|
LIST_REMOVE(servers, s->link->dns_servers, s);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_SYSTEM:
|
2015-11-24 21:39:14 +01:00
|
|
|
assert(s->manager->n_dns_servers > 0);
|
2015-11-24 17:59:40 +01:00
|
|
|
LIST_REMOVE(servers, s->manager->dns_servers, s);
|
2015-11-24 21:39:14 +01:00
|
|
|
s->manager->n_dns_servers--;
|
2015-11-24 17:59:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_FALLBACK:
|
2015-11-24 21:39:14 +01:00
|
|
|
assert(s->manager->n_dns_servers > 0);
|
2015-11-24 17:59:40 +01:00
|
|
|
LIST_REMOVE(servers, s->manager->fallback_dns_servers, s);
|
2015-11-24 21:39:14 +01:00
|
|
|
s->manager->n_dns_servers--;
|
2015-11-24 17:59:40 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->linked = false;
|
|
|
|
|
|
|
|
if (s->link && s->link->current_dns_server == s)
|
|
|
|
link_set_dns_server(s->link, NULL);
|
|
|
|
|
|
|
|
if (s->manager->current_dns_server == s)
|
|
|
|
manager_set_dns_server(s->manager, NULL);
|
|
|
|
|
|
|
|
dns_server_unref(s);
|
2015-06-24 18:41:46 +02:00
|
|
|
}
|
|
|
|
|
2015-11-24 20:50:37 +01:00
|
|
|
void dns_server_move_back_and_unmark(DnsServer *s) {
|
|
|
|
DnsServer *tail;
|
|
|
|
|
|
|
|
assert(s);
|
|
|
|
|
|
|
|
if (!s->marked)
|
|
|
|
return;
|
|
|
|
|
|
|
|
s->marked = false;
|
|
|
|
|
|
|
|
if (!s->linked || !s->servers_next)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Move us to the end of the list, so that the order is
|
|
|
|
* strictly kept, if we are not at the end anyway. */
|
|
|
|
|
|
|
|
switch (s->type) {
|
|
|
|
|
|
|
|
case DNS_SERVER_LINK:
|
|
|
|
assert(s->link);
|
|
|
|
LIST_FIND_TAIL(servers, s, tail);
|
|
|
|
LIST_REMOVE(servers, s->link->dns_servers, s);
|
|
|
|
LIST_INSERT_AFTER(servers, s->link->dns_servers, tail, s);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_SYSTEM:
|
|
|
|
LIST_FIND_TAIL(servers, s, tail);
|
|
|
|
LIST_REMOVE(servers, s->manager->dns_servers, s);
|
|
|
|
LIST_INSERT_AFTER(servers, s->manager->dns_servers, tail, s);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DNS_SERVER_FALLBACK:
|
|
|
|
LIST_FIND_TAIL(servers, s, tail);
|
|
|
|
LIST_REMOVE(servers, s->manager->fallback_dns_servers, s);
|
|
|
|
LIST_INSERT_AFTER(servers, s->manager->fallback_dns_servers, tail, s);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
assert_not_reached("Unknown server type");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
static void dns_server_verified(DnsServer *s, DnsServerFeatureLevel level) {
|
2015-07-28 02:32:24 +02:00
|
|
|
assert(s);
|
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
if (s->verified_feature_level > level)
|
|
|
|
return;
|
2015-12-25 15:05:46 +01:00
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
if (s->verified_feature_level != level) {
|
2016-01-15 19:23:51 +01:00
|
|
|
log_debug("Verified we get a response at feature level %s from DNS server %s.",
|
|
|
|
dns_server_feature_level_to_string(level),
|
|
|
|
dns_server_string(s));
|
2015-12-27 01:35:00 +01:00
|
|
|
s->verified_feature_level = level;
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
}
|
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void dns_server_packet_received(DnsServer *s, int protocol, DnsServerFeatureLevel level, usec_t rtt, size_t size) {
|
|
|
|
assert(s);
|
|
|
|
|
|
|
|
if (protocol == IPPROTO_UDP) {
|
|
|
|
if (s->possible_feature_level == level)
|
|
|
|
s->n_failed_udp = 0;
|
|
|
|
|
resolved: when we receive an reply which is OPT-less or RRSIG-less, downgrade what we verified
If we receive a reply that lacks the OPT RR, then this is reason to downgrade what was verified before, as it's
apparently no longer true, and the previous OPT RR we saw was only superficially OK.
Similar, if we realize that RRSIGs are not augmented, then also downgrade the feature level that was verified, as
DNSSEC is after all not supported. This check is in particular necessary, as we might notice the fact that RRSIG is not
augmented only very late, when verifying the root domain.
Also, when verifying a successful response, actually take in consideration that it might have been reported already
that RRSIG or OPT are missing in the response.
2016-01-15 20:29:56 +01:00
|
|
|
/* If the RRSIG data is missing, then we can only validate EDNS0 at max */
|
|
|
|
if (s->packet_rrsig_missing && level >= DNS_SERVER_FEATURE_LEVEL_DO)
|
|
|
|
level = DNS_SERVER_FEATURE_LEVEL_DO - 1;
|
|
|
|
|
|
|
|
/* If the OPT RR got lost, then we can only validate UDP at max */
|
|
|
|
if (s->packet_bad_opt && level >= DNS_SERVER_FEATURE_LEVEL_EDNS0)
|
|
|
|
level = DNS_SERVER_FEATURE_LEVEL_EDNS0 - 1;
|
|
|
|
|
|
|
|
/* Even if we successfully receive a reply to a request announcing support for large packets,
|
|
|
|
that does not mean we can necessarily receive large packets. */
|
2016-01-08 18:50:41 +01:00
|
|
|
if (level == DNS_SERVER_FEATURE_LEVEL_LARGE)
|
resolved: when we receive an reply which is OPT-less or RRSIG-less, downgrade what we verified
If we receive a reply that lacks the OPT RR, then this is reason to downgrade what was verified before, as it's
apparently no longer true, and the previous OPT RR we saw was only superficially OK.
Similar, if we realize that RRSIGs are not augmented, then also downgrade the feature level that was verified, as
DNSSEC is after all not supported. This check is in particular necessary, as we might notice the fact that RRSIG is not
augmented only very late, when verifying the root domain.
Also, when verifying a successful response, actually take in consideration that it might have been reported already
that RRSIG or OPT are missing in the response.
2016-01-15 20:29:56 +01:00
|
|
|
level = DNS_SERVER_FEATURE_LEVEL_LARGE - 1;
|
2016-01-08 18:50:41 +01:00
|
|
|
|
|
|
|
} else if (protocol == IPPROTO_TCP) {
|
|
|
|
|
|
|
|
if (s->possible_feature_level == level)
|
|
|
|
s->n_failed_tcp = 0;
|
|
|
|
|
|
|
|
/* Successful TCP connections are only useful to verify the TCP feature level. */
|
resolved: when we receive an reply which is OPT-less or RRSIG-less, downgrade what we verified
If we receive a reply that lacks the OPT RR, then this is reason to downgrade what was verified before, as it's
apparently no longer true, and the previous OPT RR we saw was only superficially OK.
Similar, if we realize that RRSIGs are not augmented, then also downgrade the feature level that was verified, as
DNSSEC is after all not supported. This check is in particular necessary, as we might notice the fact that RRSIG is not
augmented only very late, when verifying the root domain.
Also, when verifying a successful response, actually take in consideration that it might have been reported already
that RRSIG or OPT are missing in the response.
2016-01-15 20:29:56 +01:00
|
|
|
level = DNS_SERVER_FEATURE_LEVEL_TCP;
|
2016-01-08 18:50:41 +01:00
|
|
|
}
|
2015-11-24 17:01:09 +01:00
|
|
|
|
resolved: when we receive an reply which is OPT-less or RRSIG-less, downgrade what we verified
If we receive a reply that lacks the OPT RR, then this is reason to downgrade what was verified before, as it's
apparently no longer true, and the previous OPT RR we saw was only superficially OK.
Similar, if we realize that RRSIGs are not augmented, then also downgrade the feature level that was verified, as
DNSSEC is after all not supported. This check is in particular necessary, as we might notice the fact that RRSIG is not
augmented only very late, when verifying the root domain.
Also, when verifying a successful response, actually take in consideration that it might have been reported already
that RRSIG or OPT are missing in the response.
2016-01-15 20:29:56 +01:00
|
|
|
dns_server_verified(s, level);
|
|
|
|
|
resolved: announce support for large UDP packets
This is often needed for proper DNSSEC support, and even to handle AAAA records
without falling back to TCP.
If the path between the client and server is fully compliant, this should always
work, however, that is not the case, and overlarge packets will get mysteriously
lost in some cases.
For that reason, we use a similar fallback mechanism as we do for palin EDNS0,
EDNS0+DO, etc.:
The large UDP size feature is different from the other supported feature, as we
cannot simply verify that it works based on receiving a reply (as the server
will usually send us much smaller packets than what we claim to support, so
simply receiving a reply does not mean much).
For that reason, we keep track of the largest UDP packet we ever received, as this
is the smallest known good size (defaulting to the standard 512 bytes). If
announcing the default large size of 4096 fails (in the same way as the other
features), we fall back to the known good size. The same logic of retrying after a
grace-period applies.
2015-07-06 16:48:24 +02:00
|
|
|
/* Remember the size of the largest UDP packet we received from a server,
|
|
|
|
we know that we can always announce support for packets with at least
|
|
|
|
this size. */
|
2016-01-08 18:50:41 +01:00
|
|
|
if (protocol == IPPROTO_UDP && s->received_udp_packet_max < size)
|
resolved: announce support for large UDP packets
This is often needed for proper DNSSEC support, and even to handle AAAA records
without falling back to TCP.
If the path between the client and server is fully compliant, this should always
work, however, that is not the case, and overlarge packets will get mysteriously
lost in some cases.
For that reason, we use a similar fallback mechanism as we do for palin EDNS0,
EDNS0+DO, etc.:
The large UDP size feature is different from the other supported feature, as we
cannot simply verify that it works based on receiving a reply (as the server
will usually send us much smaller packets than what we claim to support, so
simply receiving a reply does not mean much).
For that reason, we keep track of the largest UDP packet we ever received, as this
is the smallest known good size (defaulting to the standard 512 bytes). If
announcing the default large size of 4096 fails (in the same way as the other
features), we fall back to the known good size. The same logic of retrying after a
grace-period applies.
2015-07-06 16:48:24 +02:00
|
|
|
s->received_udp_packet_max = size;
|
|
|
|
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
if (s->max_rtt < rtt) {
|
|
|
|
s->max_rtt = rtt;
|
2015-12-29 19:08:22 +01:00
|
|
|
s->resend_timeout = CLAMP(s->max_rtt * 2, DNS_TIMEOUT_MIN_USEC, DNS_TIMEOUT_MAX_USEC);
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
}
|
2015-07-28 02:32:24 +02:00
|
|
|
}
|
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
void dns_server_packet_lost(DnsServer *s, int protocol, DnsServerFeatureLevel level, usec_t usec) {
|
2015-07-28 02:32:24 +02:00
|
|
|
assert(s);
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
assert(s->manager);
|
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
if (s->possible_feature_level == level) {
|
|
|
|
if (protocol == IPPROTO_UDP)
|
|
|
|
s->n_failed_udp ++;
|
|
|
|
else if (protocol == IPPROTO_TCP)
|
|
|
|
s->n_failed_tcp ++;
|
|
|
|
}
|
2015-07-28 02:32:24 +02:00
|
|
|
|
2015-11-24 17:01:09 +01:00
|
|
|
if (s->resend_timeout > usec)
|
|
|
|
return;
|
|
|
|
|
|
|
|
s->resend_timeout = MIN(s->resend_timeout * 2, DNS_TIMEOUT_MAX_USEC);
|
2015-07-28 02:32:24 +02:00
|
|
|
}
|
|
|
|
|
2015-12-27 01:35:00 +01:00
|
|
|
void dns_server_packet_failed(DnsServer *s, DnsServerFeatureLevel level) {
|
2015-07-16 14:39:55 +02:00
|
|
|
assert(s);
|
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
/* Invoked whenever we get a FORMERR, SERVFAIL or NOTIMP rcode from a server. */
|
|
|
|
|
2015-12-27 01:35:00 +01:00
|
|
|
if (s->possible_feature_level != level)
|
2015-07-16 14:39:55 +02:00
|
|
|
return;
|
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
s->packet_failed = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void dns_server_packet_truncated(DnsServer *s, DnsServerFeatureLevel level) {
|
|
|
|
assert(s);
|
2016-01-08 02:46:59 +01:00
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
/* Invoked whenever we get a packet with TC bit set. */
|
|
|
|
|
|
|
|
if (s->possible_feature_level != level)
|
2016-01-08 02:46:59 +01:00
|
|
|
return;
|
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
s->packet_truncated = true;
|
2015-07-16 14:39:55 +02:00
|
|
|
}
|
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
void dns_server_packet_rrsig_missing(DnsServer *s, DnsServerFeatureLevel level) {
|
|
|
|
assert(s);
|
|
|
|
|
|
|
|
if (level < DNS_SERVER_FEATURE_LEVEL_DO)
|
|
|
|
return;
|
|
|
|
|
resolved: when we receive an reply which is OPT-less or RRSIG-less, downgrade what we verified
If we receive a reply that lacks the OPT RR, then this is reason to downgrade what was verified before, as it's
apparently no longer true, and the previous OPT RR we saw was only superficially OK.
Similar, if we realize that RRSIGs are not augmented, then also downgrade the feature level that was verified, as
DNSSEC is after all not supported. This check is in particular necessary, as we might notice the fact that RRSIG is not
augmented only very late, when verifying the root domain.
Also, when verifying a successful response, actually take in consideration that it might have been reported already
that RRSIG or OPT are missing in the response.
2016-01-15 20:29:56 +01:00
|
|
|
/* If the RRSIG RRs are missing, we have to downgrade what we previously verified */
|
|
|
|
if (s->verified_feature_level >= DNS_SERVER_FEATURE_LEVEL_DO)
|
|
|
|
s->verified_feature_level = DNS_SERVER_FEATURE_LEVEL_DO-1;
|
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
s->packet_rrsig_missing = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void dns_server_packet_bad_opt(DnsServer *s, DnsServerFeatureLevel level) {
|
2015-12-25 15:05:46 +01:00
|
|
|
assert(s);
|
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
if (level < DNS_SERVER_FEATURE_LEVEL_EDNS0)
|
|
|
|
return;
|
2015-12-25 15:05:46 +01:00
|
|
|
|
resolved: when we receive an reply which is OPT-less or RRSIG-less, downgrade what we verified
If we receive a reply that lacks the OPT RR, then this is reason to downgrade what was verified before, as it's
apparently no longer true, and the previous OPT RR we saw was only superficially OK.
Similar, if we realize that RRSIGs are not augmented, then also downgrade the feature level that was verified, as
DNSSEC is after all not supported. This check is in particular necessary, as we might notice the fact that RRSIG is not
augmented only very late, when verifying the root domain.
Also, when verifying a successful response, actually take in consideration that it might have been reported already
that RRSIG or OPT are missing in the response.
2016-01-15 20:29:56 +01:00
|
|
|
/* If the OPT RR got lost, we have to downgrade what we previously verified */
|
|
|
|
if (s->verified_feature_level >= DNS_SERVER_FEATURE_LEVEL_EDNS0)
|
|
|
|
s->verified_feature_level = DNS_SERVER_FEATURE_LEVEL_EDNS0-1;
|
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
s->packet_bad_opt = true;
|
2015-12-25 15:05:46 +01:00
|
|
|
}
|
|
|
|
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
static bool dns_server_grace_period_expired(DnsServer *s) {
|
|
|
|
usec_t ts;
|
|
|
|
|
|
|
|
assert(s);
|
|
|
|
assert(s->manager);
|
|
|
|
|
|
|
|
if (s->verified_usec == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
|
|
|
|
|
|
|
|
if (s->verified_usec + s->features_grace_period_usec > ts)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
s->features_grace_period_usec = MIN(s->features_grace_period_usec * 2, DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-01-11 18:57:59 +01:00
|
|
|
static void dns_server_reset_counters(DnsServer *s) {
|
|
|
|
assert(s);
|
|
|
|
|
|
|
|
s->n_failed_udp = 0;
|
|
|
|
s->n_failed_tcp = 0;
|
|
|
|
s->packet_failed = false;
|
|
|
|
s->packet_truncated = false;
|
2016-01-15 19:23:51 +01:00
|
|
|
s->packet_bad_opt = false;
|
|
|
|
s->packet_rrsig_missing = false;
|
2016-01-11 18:57:59 +01:00
|
|
|
s->verified_usec = 0;
|
|
|
|
}
|
|
|
|
|
2015-12-27 01:35:00 +01:00
|
|
|
DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s) {
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
assert(s);
|
|
|
|
|
2015-12-27 01:35:00 +01:00
|
|
|
if (s->possible_feature_level != DNS_SERVER_FEATURE_LEVEL_BEST &&
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
dns_server_grace_period_expired(s)) {
|
|
|
|
|
2015-12-27 01:35:00 +01:00
|
|
|
s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_BEST;
|
2016-01-11 18:57:59 +01:00
|
|
|
dns_server_reset_counters(s);
|
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
log_info("Grace period over, resuming full feature set (%s) for DNS server %s.",
|
2016-01-08 20:59:03 +01:00
|
|
|
dns_server_feature_level_to_string(s->possible_feature_level),
|
|
|
|
dns_server_string(s));
|
2016-01-08 18:50:41 +01:00
|
|
|
|
2015-12-27 01:35:00 +01:00
|
|
|
} else if (s->possible_feature_level <= s->verified_feature_level)
|
|
|
|
s->possible_feature_level = s->verified_feature_level;
|
2016-01-08 18:50:41 +01:00
|
|
|
else {
|
|
|
|
DnsServerFeatureLevel p = s->possible_feature_level;
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
if (s->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
|
2016-01-15 19:23:51 +01:00
|
|
|
s->possible_feature_level == DNS_SERVER_FEATURE_LEVEL_TCP) {
|
2016-01-08 02:29:02 +01:00
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
/* We are at the TCP (lowest) level, and we tried a couple of TCP connections, and it didn't
|
|
|
|
* work. Upgrade back to UDP again. */
|
2016-01-15 19:23:51 +01:00
|
|
|
log_debug("Reached maximum number of failed TCP connection attempts, trying UDP again...");
|
|
|
|
s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_UDP;
|
|
|
|
|
|
|
|
} else if (s->packet_bad_opt &&
|
|
|
|
s->possible_feature_level >= DNS_SERVER_FEATURE_LEVEL_EDNS0) {
|
|
|
|
|
|
|
|
/* A reply to one of our EDNS0 queries didn't carry a valid OPT RR, then downgrade to below
|
|
|
|
* EDNS0 levels. After all, some records generate different responses with and without OPT RR
|
|
|
|
* in the request. Example:
|
|
|
|
* https://open.nlnetlabs.nl/pipermail/dnssec-trigger/2014-November/000376.html */
|
|
|
|
|
|
|
|
log_debug("Server doesn't support EDNS(0) properly, downgrading feature level...");
|
2016-01-08 02:29:02 +01:00
|
|
|
s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_UDP;
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
} else if (s->packet_rrsig_missing &&
|
|
|
|
s->possible_feature_level >= DNS_SERVER_FEATURE_LEVEL_DO) {
|
2016-01-08 18:50:41 +01:00
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
/* RRSIG data was missing on a EDNS0 packet with DO bit set. This means the server doesn't
|
|
|
|
* augment responses with DNSSEC RRs. If so, let's better not ask the server for it anymore,
|
|
|
|
* after all some servers generate different replies depending if an OPT RR is in the query or
|
|
|
|
* not. */
|
|
|
|
|
|
|
|
log_debug("Detected server responses lack RRSIG records, downgrading feature level...");
|
|
|
|
s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_EDNS0;
|
|
|
|
|
|
|
|
} else if (s->n_failed_udp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
|
|
|
|
s->possible_feature_level >= DNS_SERVER_FEATURE_LEVEL_UDP) {
|
|
|
|
|
|
|
|
/* We lost too many UDP packets in a row, and are on a feature level of UDP or higher. If the
|
|
|
|
* packets are lost, maybe the server cannot parse them, hence downgrading sounds like a good
|
|
|
|
* idea. We might downgrade all the way down to TCP this way. */
|
|
|
|
|
|
|
|
log_debug("Lost too many UDP packets, downgrading feature level...");
|
|
|
|
s->possible_feature_level--;
|
|
|
|
|
|
|
|
} else if (s->packet_failed &&
|
|
|
|
s->possible_feature_level > DNS_SERVER_FEATURE_LEVEL_UDP) {
|
|
|
|
|
|
|
|
/* We got a failure packet, and are at a feature level above UDP. Note that in this case we
|
|
|
|
* downgrade no further than UDP, under the assumption that a failure packet indicates an
|
|
|
|
* incompatible packet contents, but not a problem with the transport. */
|
|
|
|
|
|
|
|
log_debug("Got server failure, downgrading feature level...");
|
2016-01-08 18:50:41 +01:00
|
|
|
s->possible_feature_level--;
|
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
} else if (s->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
|
|
|
|
s->packet_truncated &&
|
|
|
|
s->possible_feature_level > DNS_SERVER_FEATURE_LEVEL_UDP) {
|
|
|
|
|
|
|
|
/* We got too many TCP connection failures in a row, we had at least one truncated packet, and
|
|
|
|
* are on a feature level above UDP. By downgrading things and getting rid of DNSSEC or EDNS0
|
|
|
|
* data we hope to make the packet smaller, so that it still works via UDP given that TCP
|
|
|
|
* appears not to be a fallback. Note that if we are already at the lowest UDP level, we don't
|
|
|
|
* go further down, since that's TCP, and TCP failed too often after all. */
|
|
|
|
|
|
|
|
log_debug("Got too many failed TCP connection failures and truncated UDP packets, downgrading feature level...");
|
|
|
|
s->possible_feature_level--;
|
|
|
|
}
|
|
|
|
|
2016-01-08 18:50:41 +01:00
|
|
|
if (p != s->possible_feature_level) {
|
|
|
|
|
|
|
|
/* We changed the feature level, reset the counting */
|
2016-01-11 18:57:59 +01:00
|
|
|
dns_server_reset_counters(s);
|
2016-01-08 18:50:41 +01:00
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
log_warning("Using degraded feature set (%s) for DNS server %s.",
|
2016-01-08 20:59:03 +01:00
|
|
|
dns_server_feature_level_to_string(s->possible_feature_level),
|
|
|
|
dns_server_string(s));
|
2016-01-08 18:50:41 +01:00
|
|
|
}
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
}
|
|
|
|
|
2015-12-27 01:35:00 +01:00
|
|
|
return s->possible_feature_level;
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
}
|
|
|
|
|
2015-12-26 18:49:32 +01:00
|
|
|
int dns_server_adjust_opt(DnsServer *server, DnsPacket *packet, DnsServerFeatureLevel level) {
|
|
|
|
size_t packet_size;
|
|
|
|
bool edns_do;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(server);
|
|
|
|
assert(packet);
|
|
|
|
assert(packet->protocol == DNS_PROTOCOL_DNS);
|
|
|
|
|
|
|
|
/* Fix the OPT field in the packet to match our current feature level. */
|
|
|
|
|
|
|
|
r = dns_packet_truncate_opt(packet);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
if (level < DNS_SERVER_FEATURE_LEVEL_EDNS0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
edns_do = level >= DNS_SERVER_FEATURE_LEVEL_DO;
|
|
|
|
|
|
|
|
if (level >= DNS_SERVER_FEATURE_LEVEL_LARGE)
|
|
|
|
packet_size = DNS_PACKET_UNICAST_SIZE_LARGE_MAX;
|
|
|
|
else
|
|
|
|
packet_size = server->received_udp_packet_max;
|
|
|
|
|
|
|
|
return dns_packet_append_opt(packet, packet_size, edns_do, NULL);
|
|
|
|
}
|
|
|
|
|
2016-01-08 20:59:03 +01:00
|
|
|
const char *dns_server_string(DnsServer *server) {
|
|
|
|
assert(server);
|
|
|
|
|
|
|
|
if (!server->server_string)
|
|
|
|
(void) in_addr_to_string(server->family, &server->address, &server->server_string);
|
|
|
|
|
|
|
|
return strna(server->server_string);
|
|
|
|
}
|
|
|
|
|
2016-01-10 22:58:58 +01:00
|
|
|
bool dns_server_dnssec_supported(DnsServer *server) {
|
|
|
|
assert(server);
|
|
|
|
|
|
|
|
/* Returns whether the server supports DNSSEC according to what we know about it */
|
|
|
|
|
|
|
|
if (server->possible_feature_level < DNS_SERVER_FEATURE_LEVEL_DO)
|
|
|
|
return false;
|
|
|
|
|
2016-01-15 19:23:51 +01:00
|
|
|
if (server->packet_bad_opt)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (server->packet_rrsig_missing)
|
2016-01-10 22:58:58 +01:00
|
|
|
return false;
|
|
|
|
|
|
|
|
/* DNSSEC servers need to support TCP properly (see RFC5966), if they don't, we assume DNSSEC is borked too */
|
|
|
|
if (server->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-10-04 00:22:41 +02:00
|
|
|
static void dns_server_hash_func(const void *p, struct siphash *state) {
|
2014-08-12 12:21:10 +02:00
|
|
|
const DnsServer *s = p;
|
|
|
|
|
2015-10-04 00:22:41 +02:00
|
|
|
assert(s);
|
2014-08-12 12:21:10 +02:00
|
|
|
|
2015-10-04 00:22:41 +02:00
|
|
|
siphash24_compress(&s->family, sizeof(s->family), state);
|
|
|
|
siphash24_compress(&s->address, FAMILY_ADDRESS_SIZE(s->family), state);
|
2014-08-12 12:21:10 +02:00
|
|
|
}
|
|
|
|
|
2014-08-13 01:00:18 +02:00
|
|
|
static int dns_server_compare_func(const void *a, const void *b) {
|
2014-08-12 12:21:10 +02:00
|
|
|
const DnsServer *x = a, *y = b;
|
|
|
|
|
|
|
|
if (x->family < y->family)
|
|
|
|
return -1;
|
|
|
|
if (x->family > y->family)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return memcmp(&x->address, &y->address, FAMILY_ADDRESS_SIZE(x->family));
|
|
|
|
}
|
2014-08-13 01:00:18 +02:00
|
|
|
|
|
|
|
const struct hash_ops dns_server_hash_ops = {
|
|
|
|
.hash = dns_server_hash_func,
|
|
|
|
.compare = dns_server_compare_func
|
|
|
|
};
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
void dns_server_unlink_all(DnsServer *first) {
|
|
|
|
DnsServer *next;
|
2015-11-24 17:59:40 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
if (!first)
|
|
|
|
return;
|
2015-11-24 17:59:40 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
next = first->servers_next;
|
|
|
|
dns_server_unlink(first);
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
dns_server_unlink_all(next);
|
2015-11-24 17:59:40 +01:00
|
|
|
}
|
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
void dns_server_unlink_marked(DnsServer *first) {
|
|
|
|
DnsServer *next;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
if (!first)
|
|
|
|
return;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
next = first->servers_next;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
if (first->marked)
|
2015-11-24 17:59:40 +01:00
|
|
|
dns_server_unlink(first);
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
dns_server_unlink_marked(next);
|
|
|
|
}
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
void dns_server_mark_all(DnsServer *first) {
|
|
|
|
if (!first)
|
|
|
|
return;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
first->marked = true;
|
|
|
|
dns_server_mark_all(first->servers_next);
|
2015-11-24 16:48:13 +01:00
|
|
|
}
|
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
DnsServer *dns_server_find(DnsServer *first, int family, const union in_addr_union *in_addr) {
|
|
|
|
DnsServer *s;
|
2015-11-24 16:48:13 +01:00
|
|
|
|
|
|
|
LIST_FOREACH(servers, s, first)
|
2015-11-24 21:27:29 +01:00
|
|
|
if (s->family == family && in_addr_equal(family, &s->address, in_addr) > 0)
|
|
|
|
return s;
|
2015-11-24 17:03:12 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
2015-11-24 17:03:12 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
DnsServer *manager_get_first_dns_server(Manager *m, DnsServerType t) {
|
2015-11-24 17:03:12 +01:00
|
|
|
assert(m);
|
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
switch (t) {
|
2015-11-24 17:03:12 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
case DNS_SERVER_SYSTEM:
|
|
|
|
return m->dns_servers;
|
2015-11-24 17:03:12 +01:00
|
|
|
|
2015-11-24 21:27:29 +01:00
|
|
|
case DNS_SERVER_FALLBACK:
|
|
|
|
return m->fallback_dns_servers;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
2015-11-24 17:03:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
DnsServer *manager_set_dns_server(Manager *m, DnsServer *s) {
|
|
|
|
assert(m);
|
|
|
|
|
|
|
|
if (m->current_dns_server == s)
|
|
|
|
return s;
|
|
|
|
|
2016-01-08 20:59:03 +01:00
|
|
|
if (s)
|
|
|
|
log_info("Switching to system DNS server %s.", dns_server_string(s));
|
2015-11-24 17:03:12 +01:00
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
dns_server_unref(m->current_dns_server);
|
|
|
|
m->current_dns_server = dns_server_ref(s);
|
2015-11-24 17:03:12 +01:00
|
|
|
|
|
|
|
if (m->unicast_scope)
|
|
|
|
dns_cache_flush(&m->unicast_scope->cache);
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
DnsServer *manager_get_dns_server(Manager *m) {
|
|
|
|
Link *l;
|
|
|
|
assert(m);
|
|
|
|
|
|
|
|
/* Try to read updates resolv.conf */
|
|
|
|
manager_read_resolv_conf(m);
|
|
|
|
|
|
|
|
/* If no DNS server was chose so far, pick the first one */
|
|
|
|
if (!m->current_dns_server)
|
|
|
|
manager_set_dns_server(m, m->dns_servers);
|
|
|
|
|
|
|
|
if (!m->current_dns_server) {
|
|
|
|
bool found = false;
|
|
|
|
Iterator i;
|
|
|
|
|
|
|
|
/* No DNS servers configured, let's see if there are
|
|
|
|
* any on any links. If not, we use the fallback
|
|
|
|
* servers */
|
|
|
|
|
|
|
|
HASHMAP_FOREACH(l, m->links, i)
|
|
|
|
if (l->dns_servers) {
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
manager_set_dns_server(m, m->fallback_dns_servers);
|
|
|
|
}
|
|
|
|
|
|
|
|
return m->current_dns_server;
|
|
|
|
}
|
|
|
|
|
|
|
|
void manager_next_dns_server(Manager *m) {
|
|
|
|
assert(m);
|
|
|
|
|
|
|
|
/* If there's currently no DNS server set, then the next
|
|
|
|
* manager_get_dns_server() will find one */
|
|
|
|
if (!m->current_dns_server)
|
|
|
|
return;
|
|
|
|
|
2015-11-24 17:59:40 +01:00
|
|
|
/* Change to the next one, but make sure to follow the linked
|
|
|
|
* list only if the server is still linked. */
|
|
|
|
if (m->current_dns_server->linked && m->current_dns_server->servers_next) {
|
2015-11-24 17:03:12 +01:00
|
|
|
manager_set_dns_server(m, m->current_dns_server->servers_next);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If there was no next one, then start from the beginning of
|
|
|
|
* the list */
|
|
|
|
if (m->current_dns_server->type == DNS_SERVER_FALLBACK)
|
|
|
|
manager_set_dns_server(m, m->fallback_dns_servers);
|
|
|
|
else
|
|
|
|
manager_set_dns_server(m, m->dns_servers);
|
|
|
|
}
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
|
|
|
|
static const char* const dns_server_feature_level_table[_DNS_SERVER_FEATURE_LEVEL_MAX] = {
|
|
|
|
[DNS_SERVER_FEATURE_LEVEL_TCP] = "TCP",
|
|
|
|
[DNS_SERVER_FEATURE_LEVEL_UDP] = "UDP",
|
2015-06-23 23:06:09 +02:00
|
|
|
[DNS_SERVER_FEATURE_LEVEL_EDNS0] = "UDP+EDNS0",
|
2015-06-24 15:08:40 +02:00
|
|
|
[DNS_SERVER_FEATURE_LEVEL_DO] = "UDP+EDNS0+DO",
|
resolved: announce support for large UDP packets
This is often needed for proper DNSSEC support, and even to handle AAAA records
without falling back to TCP.
If the path between the client and server is fully compliant, this should always
work, however, that is not the case, and overlarge packets will get mysteriously
lost in some cases.
For that reason, we use a similar fallback mechanism as we do for palin EDNS0,
EDNS0+DO, etc.:
The large UDP size feature is different from the other supported feature, as we
cannot simply verify that it works based on receiving a reply (as the server
will usually send us much smaller packets than what we claim to support, so
simply receiving a reply does not mean much).
For that reason, we keep track of the largest UDP packet we ever received, as this
is the smallest known good size (defaulting to the standard 512 bytes). If
announcing the default large size of 4096 fails (in the same way as the other
features), we fall back to the known good size. The same logic of retrying after a
grace-period applies.
2015-07-06 16:48:24 +02:00
|
|
|
[DNS_SERVER_FEATURE_LEVEL_LARGE] = "UDP+EDNS0+DO+LARGE",
|
resolved: fallback to TCP if UDP fails
This is inspired by the logic in BIND [0], follow-up patches
will implement the reset of that scheme.
If we get a server error back, or if after several attempts we don't
get a reply at all, we switch from UDP to TCP for the given
server for the current and all subsequent requests. However, if
we ever successfully received a reply over UDP, we never fall
back to TCP, and once a grace-period has passed, we try to upgrade
again to using UDP. The grace-period starts off at five minutes
after the current feature level was verified and then grows
exponentially to six hours. This is to mitigate problems due
to temporary lack of network connectivity, but at the same time
avoid flooding the network with retries when the feature attempted
feature level genuinely does not work.
Note that UDP is likely much more commonly supported than TCP,
but depending on the path between the client and the server, we
may have more luck with TCP in case something is wrong. We really
do prefer UDP though, as that is much more lightweight, that is
why TCP is only the last resort.
[0]: <https://kb.isc.org/article/AA-01219/0/Refinements-to-EDNS-fallback-behavior-can-cause-different-outcomes-in-Recursive-Servers.html>
2015-07-06 08:15:25 +02:00
|
|
|
};
|
|
|
|
DEFINE_STRING_TABLE_LOOKUP(dns_server_feature_level, DnsServerFeatureLevel);
|